diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 855a393..cff8387 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -11,6 +11,7 @@ SRCS="${1:-libc.c}" trap 'rm -f libc.c libc.tmp' EXIT echo '#include ' > libc.c +echo '#include ' >> libc.c "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ -o libc.wasm -I. "$SRCS" \ @@ -32,7 +33,8 @@ echo '#include ' > libc.c -Wl,--export=strcspn \ -Wl,--export=strlen \ -Wl,--export=strncmp \ - -Wl,--export=strspn + -Wl,--export=strspn \ + -Wl,--export=qsort "$BINARYEN/wasm-ctor-eval" -g -c _initialize libc.wasm -o libc.tmp "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 995c28e..4358642 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 738ab4c..9373667 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -1,9 +1,11 @@ (module $libc.wasm - (type $0 (func (param i32 i32 i32) (result i32))) - (type $1 (func (param i32 i32) (result i32))) + (type $0 (func (param i32 i32) (result i32))) + (type $1 (func (param i32 i32 i32) (result i32))) (type $2 (func (param i32) (result i32))) + (type $3 (func (param i32 i32 i32 i32))) (memory $0 256) (data $0 (i32.const 65536) "\01") + (table $0 1 1 funcref) (export "memory" (memory $0)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) @@ -16,6 +18,7 @@ (export "strchr" (func $strchr)) (export "strspn" (func $strspn)) (export "strcspn" (func $strcspn)) + (export "qsort" (func $qsort)) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (memory.fill (local.get $0) @@ -1315,6 +1318,457 @@ (local.get $0) ) ) + (func $qsort (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 v128) + (local $scratch i32) + (block $block + (br_if $block + (i32.eqz + (local.get $2) + ) + ) + (br_if $block + (i32.lt_u + (local.get $1) + (i32.const 2) + ) + ) + (local.set $14 + (i32.mul + (local.get $1) + (local.get $2) + ) + ) + (local.set $15 + (i32.and + (local.get $2) + (i32.const 15) + ) + ) + (local.set $9 + (i32.and + (local.get $2) + (i32.const -16) + ) + ) + (local.set $16 + (i32.add + (local.get $0) + (local.get $2) + ) + ) + (local.set $17 + (i32.lt_u + (local.get $2) + (i32.const 16) + ) + ) + (loop $label5 + (local.set $6 + (i32.eq + (local.get $1) + (i32.const 2) + ) + ) + (local.set $18 + (i32.le_u + (i32.add + (local.get $0) + (i32.mul + (i32.add + (local.tee $13 + (select + (i32.const 1) + (local.tee $1 + (i32.wrap_i64 + (i64.div_u + (i64.sub + (i64.mul + (i64.extend_i32_u + (local.get $1) + ) + (i64.const 5) + ) + (i64.const 1) + ) + (i64.const 11) + ) + ) + ) + (local.get $6) + ) + ) + (i32.const 1) + ) + (local.get $2) + ) + ) + (local.get $0) + ) + ) + (local.set $11 + (local.tee $10 + (i32.mul + (local.get $2) + (local.get $13) + ) + ) + ) + (loop $label4 + (block $block1 + (br_if $block1 + (i32.gt_u + (local.tee $5 + (i32.sub + (local.get $11) + (local.get $10) + ) + ) + (local.get $11) + ) + ) + (loop $label3 + (br_if $block1 + (i32.le_s + (call_indirect $0 (type $0) + (local.tee $4 + (i32.add + (local.get $0) + (local.tee $12 + (local.get $5) + ) + ) + ) + (local.tee $5 + (i32.add + (local.get $4) + (local.get $10) + ) + ) + (local.get $3) + ) + (i32.const 0) + ) + ) + (block $block3 + (block $block4 + (block $block2 + (br_if $block2 + (local.get $17) + ) + (br_if $block2 + (i32.and + (i32.eqz + (local.get $18) + ) + (i32.lt_u + (local.get $5) + (i32.add + (local.get $12) + (local.get $16) + ) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (local.get $9) + ) + ) + (local.set $7 + (i32.add + (local.get $4) + (local.get $9) + ) + ) + (local.set $6 + (local.get $9) + ) + (loop $label + (local.set $20 + (v128.load align=1 + (local.get $4) + ) + ) + (v128.store align=1 + (local.get $4) + (v128.load align=1 + (local.tee $8 + (i32.add + (local.get $4) + (local.get $10) + ) + ) + ) + ) + (v128.store align=1 + (local.get $8) + (local.get $20) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (br_if $label + (local.tee $6 + (i32.sub + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (local.set $6 + (local.get $15) + ) + (br_if $block3 + (i32.eq + (local.get $2) + (local.get $9) + ) + ) + (br $block4) + ) + (local.set $7 + (local.get $4) + ) + (local.set $6 + (local.get $2) + ) + ) + (br_if $block3 + (i32.lt_u + (block (result i32) + (local.set $scratch + (i32.sub + (local.get $6) + (i32.const 1) + ) + ) + (if + (local.tee $4 + (i32.and + (local.get $6) + (i32.const 3) + ) + ) + (then + (local.set $6 + (i32.and + (local.get $6) + (i32.const -4) + ) + ) + (loop $label1 + (local.set $19 + (i32.load8_u + (local.get $7) + ) + ) + (i32.store8 + (local.get $7) + (i32.load8_u + (local.get $5) + ) + ) + (i32.store8 + (local.get $5) + (local.get $19) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (br_if $label1 + (local.tee $4 + (i32.sub + (local.get $4) + (i32.const 1) + ) + ) + ) + ) + ) + ) + (local.get $scratch) + ) + (i32.const 3) + ) + ) + (loop $label2 + (local.set $4 + (i32.load8_u + (local.get $7) + ) + ) + (i32.store8 + (local.get $7) + (i32.load8_u + (local.get $5) + ) + ) + (i32.store8 + (local.get $5) + (local.get $4) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 3) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 3) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (br_if $label2 + (local.tee $6 + (i32.sub + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + ) + (br_if $label3 + (i32.le_u + (local.tee $5 + (i32.sub + (local.get $12) + (local.get $10) + ) + ) + (local.get $12) + ) + ) + ) + ) + (br_if $label4 + (i32.lt_u + (local.tee $11 + (i32.add + (local.get $2) + (local.get $11) + ) + ) + (local.get $14) + ) + ) + ) + (br_if $label5 + (i32.ge_u + (local.get $13) + (i32.const 2) + ) + ) + ) + ) + ) ;; features section: mutable-globals, nontrapping-float-to-int, simd, bulk-memory, sign-ext, reference-types, multivalue, bulk-memory-opt ) diff --git a/sqlite3/libc/stdlib.h b/sqlite3/libc/stdlib.h new file mode 100644 index 0000000..ba91da6 --- /dev/null +++ b/sqlite3/libc/stdlib.h @@ -0,0 +1,50 @@ +#ifndef _WASM_SIMD128_STDLIB_H +#define _WASM_SIMD128_STDLIB_H + +#include +#include + +#include_next // the system stdlib.h + +#ifdef __cplusplus +extern "C" { +#endif + +// Shellsort with Gonnet & Baeza-Yates gap sequence. +// Simple, no recursion, doesn't use the C stack. +// Clang auto-vectorizes the inner loop. + +void qsort(void *base, size_t nel, size_t width, + int (*comp)(const void *, const void *)) { + if (width == 0) return; + + size_t wnel = width * nel; + size_t gap = nel; + while (gap > 1) { + gap = (5ull * gap - 1) / 11; + if (gap == 0) gap = 1; + + size_t wgap = width * gap; + __builtin_assume(wgap < wnel); + for (size_t i = wgap; i < wnel; i += width) { + for (size_t j = i; !__builtin_sub_overflow(j, wgap, &j);) { + char *a = j + (char *)base; + char *b = a + wgap; + if (comp(a, b) <= 0) break; + + size_t s = width; + do { + char tmp = *a; + *a++ = *b; + *b++ = tmp; + } while (--s); + } + } + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _WASM_SIMD128_STDLIB_H \ No newline at end of file