From 29c5c816cb695b6296f5cf595753ab7f09e7235b Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Sun, 27 Apr 2025 13:34:29 +0100 Subject: [PATCH] More libc. --- sqlite3/libc/libc.wasm | Bin 3534 -> 3527 bytes sqlite3/libc/libc.wat | 791 ++++++++++++++++++++--------------------- sqlite3/libc/math.h | 29 ++ sqlite3/libc/stdlib.h | 13 +- sqlite3/libc/strings.h | 47 +++ 5 files changed, 480 insertions(+), 400 deletions(-) create mode 100644 sqlite3/libc/math.h create mode 100644 sqlite3/libc/strings.h diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index a69c6c0b0e1f04bacb74ae3698dd55ff0d3aa3f0..f8b8f94ab197536f7e98dd6c2efae2fe23651f85 100755 GIT binary patch delta 33 pcmX>neO!9N0j8hg8xK9>-;c`uhBGXUD$3dH~b delta 40 wcmX>ueNKA90j36tjfb9biq2qR5~ydaW^zzqa^+=EV02{iV!Z diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 2152173..9302590 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -1505,433 +1505,428 @@ (local $19 i32) (local $20 v128) (local $scratch i32) - (block $block - (br_if $block - (i32.eqz - (local.get $2) - ) + (if + (i32.ge_u + (local.get $1) + (i32.const 2) ) - (br_if $block - (i32.lt_u - (local.get $1) - (i32.const 2) - ) - ) - (local.set $14 - (i32.mul - (local.get $1) - (local.get $2) - ) - ) - (local.set $15 - (i32.and - (local.get $2) - (i32.const 15) - ) - ) - (local.set $9 - (i32.and - (local.get $2) - (i32.const -16) - ) - ) - (local.set $16 - (i32.add - (local.get $0) - (local.get $2) - ) - ) - (local.set $17 - (i32.lt_u - (local.get $2) - (i32.const 16) - ) - ) - (loop $label5 - (local.set $6 - (i32.eq + (then + (local.set $14 + (i32.mul (local.get $1) - (i32.const 2) + (local.get $2) ) ) - (local.set $18 - (i32.le_u - (i32.add - (local.get $0) - (i32.mul - (i32.add - (local.tee $13 - (select - (i32.const 1) - (local.tee $1 - (i32.wrap_i64 - (i64.div_u - (i64.sub - (i64.mul - (i64.extend_i32_u - (local.get $1) - ) - (i64.const 5) - ) - (i64.const 1) - ) - (i64.const 11) - ) - ) - ) - (local.get $6) - ) - ) - (i32.const 1) - ) - (local.get $2) - ) - ) + (local.set $15 + (i32.and + (local.get $2) + (i32.const 15) + ) + ) + (local.set $9 + (i32.and + (local.get $2) + (i32.const -16) + ) + ) + (local.set $16 + (i32.add (local.get $0) + (local.get $2) ) ) - (local.set $11 - (local.tee $10 - (i32.mul - (local.get $2) - (local.get $13) - ) + (local.set $17 + (i32.lt_u + (local.get $2) + (i32.const 16) ) ) - (loop $label4 - (block $block1 - (br_if $block1 - (i32.gt_u - (local.tee $5 - (i32.sub - (local.get $11) - (local.get $10) - ) - ) - (local.get $11) - ) + (loop $label5 + (local.set $6 + (i32.eq + (local.get $1) + (i32.const 2) ) - (loop $label3 - (br_if $block1 - (i32.le_s - (call_indirect $0 (type $0) - (local.tee $4 - (i32.add - (local.get $0) - (local.tee $12 - (local.get $5) - ) - ) - ) - (local.tee $5 - (i32.add - (local.get $4) - (local.get $10) - ) - ) - (local.get $3) - ) - (i32.const 0) - ) - ) - (block $block3 - (block $block4 - (block $block2 - (br_if $block2 - (local.get $17) - ) - (br_if $block2 - (i32.and - (i32.eqz - (local.get $18) - ) - (i32.lt_u - (local.get $5) - (i32.add - (local.get $12) - (local.get $16) + ) + (local.set $18 + (i32.le_u + (i32.add + (local.get $0) + (i32.mul + (i32.add + (local.tee $13 + (select + (i32.const 1) + (local.tee $1 + (i32.wrap_i64 + (i64.div_u + (i64.sub + (i64.mul + (i64.extend_i32_u + (local.get $1) + ) + (i64.const 5) + ) + (i64.const 1) + ) + (i64.const 11) + ) ) ) + (local.get $6) + ) + ) + (i32.const 1) + ) + (local.get $2) + ) + ) + (local.get $0) + ) + ) + (local.set $11 + (local.tee $10 + (i32.mul + (local.get $2) + (local.get $13) + ) + ) + ) + (loop $label4 + (block $block + (br_if $block + (i32.gt_u + (local.tee $5 + (i32.sub + (local.get $11) + (local.get $10) + ) + ) + (local.get $11) + ) + ) + (loop $label3 + (br_if $block + (i32.le_s + (call_indirect $0 (type $0) + (local.tee $4 + (i32.add + (local.get $0) + (local.tee $12 + (local.get $5) + ) + ) + ) + (local.tee $5 + (i32.add + (local.get $4) + (local.get $10) + ) + ) + (local.get $3) + ) + (i32.const 0) + ) + ) + (block $block2 + (block $block3 + (block $block1 + (br_if $block1 + (local.get $17) + ) + (br_if $block1 + (i32.and + (i32.eqz + (local.get $18) + ) + (i32.lt_u + (local.get $5) + (i32.add + (local.get $12) + (local.get $16) + ) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (local.get $9) + ) + ) + (local.set $7 + (i32.add + (local.get $4) + (local.get $9) + ) + ) + (local.set $6 + (local.get $9) + ) + (loop $label + (local.set $20 + (v128.load align=1 + (local.get $4) + ) + ) + (v128.store align=1 + (local.get $4) + (v128.load align=1 + (local.tee $8 + (i32.add + (local.get $4) + (local.get $10) + ) + ) + ) + ) + (v128.store align=1 + (local.get $8) + (local.get $20) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (br_if $label + (local.tee $6 + (i32.sub + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (local.set $6 + (local.get $15) + ) + (br_if $block2 + (i32.eq + (local.get $2) + (local.get $9) + ) + ) + (br $block3) + ) + (local.set $7 + (local.get $4) + ) + (local.set $6 + (local.get $2) + ) + ) + (br_if $block2 + (i32.lt_u + (block (result i32) + (local.set $scratch + (i32.sub + (local.get $6) + (i32.const 1) + ) + ) + (if + (local.tee $4 + (i32.and + (local.get $6) + (i32.const 3) + ) + ) + (then + (local.set $6 + (i32.and + (local.get $6) + (i32.const -4) + ) + ) + (loop $label1 + (local.set $19 + (i32.load8_u + (local.get $7) + ) + ) + (i32.store8 + (local.get $7) + (i32.load8_u + (local.get $5) + ) + ) + (i32.store8 + (local.get $5) + (local.get $19) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (br_if $label1 + (local.tee $4 + (i32.sub + (local.get $4) + (i32.const 1) + ) + ) + ) + ) + ) + ) + (local.get $scratch) + ) + (i32.const 3) + ) + ) + (loop $label2 + (local.set $4 + (i32.load8_u + (local.get $7) + ) + ) + (i32.store8 + (local.get $7) + (i32.load8_u + (local.get $5) + ) + ) + (i32.store8 + (local.get $5) + (local.get $4) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 3) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 3) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 4) ) ) (local.set $5 (i32.add (local.get $5) - (local.get $9) - ) - ) - (local.set $7 - (i32.add - (local.get $4) - (local.get $9) - ) - ) - (local.set $6 - (local.get $9) - ) - (loop $label - (local.set $20 - (v128.load align=1 - (local.get $4) - ) - ) - (v128.store align=1 - (local.get $4) - (v128.load align=1 - (local.tee $8 - (i32.add - (local.get $4) - (local.get $10) - ) - ) - ) - ) - (v128.store align=1 - (local.get $8) - (local.get $20) - ) - (local.set $4 - (i32.add - (local.get $4) - (i32.const 16) - ) - ) - (br_if $label - (local.tee $6 - (i32.sub - (local.get $6) - (i32.const 16) - ) - ) - ) - ) - (local.set $6 - (local.get $15) - ) - (br_if $block3 - (i32.eq - (local.get $2) - (local.get $9) - ) - ) - (br $block4) - ) - (local.set $7 - (local.get $4) - ) - (local.set $6 - (local.get $2) - ) - ) - (br_if $block3 - (i32.lt_u - (block (result i32) - (local.set $scratch - (i32.sub - (local.get $6) - (i32.const 1) - ) - ) - (if - (local.tee $4 - (i32.and - (local.get $6) - (i32.const 3) - ) - ) - (then - (local.set $6 - (i32.and - (local.get $6) - (i32.const -4) - ) - ) - (loop $label1 - (local.set $19 - (i32.load8_u - (local.get $7) - ) - ) - (i32.store8 - (local.get $7) - (i32.load8_u - (local.get $5) - ) - ) - (i32.store8 - (local.get $5) - (local.get $19) - ) - (local.set $5 - (i32.add - (local.get $5) - (i32.const 1) - ) - ) - (local.set $7 - (i32.add - (local.get $7) - (i32.const 1) - ) - ) - (br_if $label1 - (local.tee $4 - (i32.sub - (local.get $4) - (i32.const 1) - ) - ) - ) - ) - ) - ) - (local.get $scratch) - ) - (i32.const 3) - ) - ) - (loop $label2 - (local.set $4 - (i32.load8_u - (local.get $7) - ) - ) - (i32.store8 - (local.get $7) - (i32.load8_u - (local.get $5) - ) - ) - (i32.store8 - (local.get $5) - (local.get $4) - ) - (local.set $8 - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $7) - (i32.const 1) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $5) - (i32.const 1) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (local.get $8) - ) - (local.set $8 - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $7) - (i32.const 2) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $5) - (i32.const 2) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (local.get $8) - ) - (local.set $8 - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $7) - (i32.const 3) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $5) - (i32.const 3) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (local.get $8) - ) - (local.set $7 - (i32.add - (local.get $7) - (i32.const 4) - ) - ) - (local.set $5 - (i32.add - (local.get $5) - (i32.const 4) - ) - ) - (br_if $label2 - (local.tee $6 - (i32.sub - (local.get $6) (i32.const 4) ) ) - ) - ) - ) - (br_if $label3 - (i32.le_u - (local.tee $5 - (i32.sub - (local.get $12) - (local.get $10) + (br_if $label2 + (local.tee $6 + (i32.sub + (local.get $6) + (i32.const 4) + ) + ) ) ) - (local.get $12) + ) + (br_if $label3 + (i32.le_u + (local.tee $5 + (i32.sub + (local.get $12) + (local.get $10) + ) + ) + (local.get $12) + ) ) ) ) - ) - (br_if $label4 - (i32.lt_u - (local.tee $11 - (i32.add - (local.get $2) - (local.get $11) + (br_if $label4 + (i32.lt_u + (local.tee $11 + (i32.add + (local.get $2) + (local.get $11) + ) ) + (local.get $14) ) - (local.get $14) ) ) - ) - (br_if $label5 - (i32.ge_u - (local.get $13) - (i32.const 2) + (br_if $label5 + (i32.ge_u + (local.get $13) + (i32.const 2) + ) ) ) ) diff --git a/sqlite3/libc/math.h b/sqlite3/libc/math.h new file mode 100644 index 0000000..76128b3 --- /dev/null +++ b/sqlite3/libc/math.h @@ -0,0 +1,29 @@ +#ifndef _WASM_SIMD128_MATH_H +#define _WASM_SIMD128_MATH_H + +#include + +#include_next // the system math.h + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __wasm_relaxed_simd__ + +__attribute__((weak)) +double fma(double x, double y, double z) { + const v128_t wx = wasm_f64x2_splat(x); + const v128_t wy = wasm_f64x2_splat(y); + const v128_t wz = wasm_f64x2_splat(z); + const v128_t wr = wasm_f64x2_relaxed_madd(wx, wy, wz); + return wasm_f64x2_extract_lane(wr, 0); +} + +#endif // __wasm_relaxed_simd__ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _WASM_SIMD128_MATH_H \ No newline at end of file diff --git a/sqlite3/libc/stdlib.h b/sqlite3/libc/stdlib.h index ba465ba..760af84 100644 --- a/sqlite3/libc/stdlib.h +++ b/sqlite3/libc/stdlib.h @@ -17,22 +17,31 @@ extern "C" { __attribute__((weak)) void qsort(void *base, size_t nel, size_t width, int (*comp)(const void *, const void *)) { - if (width == 0) return; - + // If nel is zero, we're required to do nothing. + // If it's one, the array is already sorted. size_t wnel = width * nel; size_t gap = nel; while (gap > 1) { + // Use 64-bit unsigned arithmetic to avoid intermediate overflow. + // Absent overflow, gap will be strictly less than its previous value. + // Once it is one or zero, set it to one: do a final pass, and stop. gap = (5ull * gap - 1) / 11; if (gap == 0) gap = 1; + // It'd be undefined behavior for wnel to overflow a size_t; + // or if width is zero: the base pointer would be invalid. + // Since gap is stricly less than nel, we can assume + // wgap is strictly less than wnel. size_t wgap = width * gap; __builtin_assume(wgap < wnel); for (size_t i = wgap; i < wnel; i += width) { + // Even without overflow flags, the overflow builtin helps the compiler. for (size_t j = i; !__builtin_sub_overflow(j, wgap, &j);) { char *a = j + (char *)base; char *b = a + wgap; if (comp(a, b) <= 0) break; + // This well known loop is automatically vectorized. size_t s = width; do { char tmp = *a; diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h new file mode 100644 index 0000000..cc3e8ec --- /dev/null +++ b/sqlite3/libc/strings.h @@ -0,0 +1,47 @@ +#ifndef _WASM_SIMD128_STRINGS_H +#define _WASM_SIMD128_STRINGS_H + +#include +#include + +#include_next // the system strings.h + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __wasm_simd128__ + +__attribute__((weak)) +int bcmp(const void *v1, const void *v2, size_t n) { + // bcmp is the same as memcmp but only compares for equality. + + const v128_t *w1 = v1; + const v128_t *w2 = v2; + for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + // Find any single bit difference. + if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { + return 1; + } + w1++; + w2++; + } + + // Continue byte-by-byte. + const unsigned char *u1 = (void *)w1; + const unsigned char *u2 = (void *)w2; + while (n--) { + if (*u1 != *u2) return 1; + u1++; + u2++; + } + return 0; +} + +#endif // __wasm_simd128__ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _WASM_SIMD128_STRINGS_H \ No newline at end of file