diff --git a/embed/build.sh b/embed/build.sh index 117f8f1..88e12be 100755 --- a/embed/build.sh +++ b/embed/build.sh @@ -12,7 +12,7 @@ trap 'rm -f sqlite3.tmp' EXIT "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ -Wall -Wextra -Wno-unused-parameter -Wno-unused-function \ -o sqlite3.wasm "$ROOT/sqlite3/main.c" \ - -I"$ROOT/sqlite3" \ + -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ -mexec-model=reactor \ -msimd128 -mmutable-globals -mmultivalue \ -mbulk-memory -mreference-types \ diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 3d9aa3d..855a393 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -6,13 +6,14 @@ cd -P -- "$(dirname -- "$0")" ROOT=../../ BINARYEN="$ROOT/tools/binaryen/bin" WASI_SDK="$ROOT/tools/wasi-sdk/bin" -SRCS="${1:-../strings.c}" +SRCS="${1:-libc.c}" "../tools.sh" -trap 'rm -f libc.tmp' EXIT +trap 'rm -f libc.c libc.tmp' EXIT +echo '#include ' > libc.c "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ - -o libc.wasm "$SRCS" \ + -o libc.wasm -I. "$SRCS" \ -mexec-model=reactor \ -msimd128 -mmutable-globals -mmultivalue \ -mbulk-memory -mreference-types \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index f376156..995c28e 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 65e52cc..738ab4c 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -849,9 +849,11 @@ (func $strspn (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) + (local $4 v128) + (local $scratch i32) (if (i32.eqz - (local.tee $2 + (local.tee $3 (i32.load8_u (local.get $1) ) @@ -863,48 +865,108 @@ ) ) ) - (v128.store - (i32.const 65520) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 65504) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (loop $label - (i32.store - (local.tee $3 - (i32.add - (i32.and - (i32.shr_u - (local.get $2) - (i32.const 3) + (block $block1 + (if + (i32.eqz + (i32.load8_u offset=1 + (local.get $1) + ) + ) + (then + (block $block + (br_if $block + (i32.gt_u + (local.tee $1 + (local.get $0) + ) + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + ) + (local.set $4 + (i8x16.splat + (local.get $3) + ) + ) + (loop $label + (br_if $block + (i32.eqz + (i8x16.all_true + (i8x16.eq + (v128.load align=1 + (local.get $1) + ) + (local.get $4) + ) + ) + ) + ) + (br_if $label + (i32.le_u + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (local.get $2) + ) ) - (i32.const 28) ) - (i32.const 65504) ) - ) - (i32.or - (i32.load - (local.get $3) + (local.set $2 + (i32.add + (i32.xor + (local.get $0) + (i32.const -1) + ) + (local.get $1) + ) ) - (i32.shl - (i32.const 1) - (local.get $2) + (loop $label1 + (local.set $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label1 + (i32.eq + (block (result i32) + (local.set $scratch + (i32.load8_u + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $scratch) + ) + (local.get $3) + ) + ) ) + (br $block1) ) ) - (local.set $2 - (i32.load8_u - (local.get $1) - ) + (v128.store + (i32.const 65520) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (v128.store + (i32.const 65504) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (local.set $1 (i32.add @@ -912,20 +974,62 @@ (i32.const 1) ) ) - (br_if $label - (local.get $2) - ) - ) - (if - (local.tee $2 - (i32.load8_u - (local.tee $1 - (local.get $0) + (loop $label2 + (i32.store + (local.tee $2 + (i32.add + (i32.and + (i32.shr_u + (local.get $3) + (i32.const 3) + ) + (i32.const 28) + ) + (i32.const 65504) + ) + ) + (i32.or + (i32.load + (local.get $2) + ) + (i32.shl + (i32.const 1) + (local.get $3) + ) ) ) + (local.set $3 + (i32.load8_u + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label2 + (local.get $3) + ) ) - (then - (loop $label1 + (local.set $2 + (local.get $0) + ) + (block $block2 + (br_if $block2 + (i32.eqz + (local.tee $3 + (i32.load8_u + (local.get $0) + ) + ) + ) + ) + (local.set $1 + (local.get $0) + ) + (loop $label3 (if (i32.eqz (i32.and @@ -934,7 +1038,7 @@ (i32.add (i32.and (i32.shr_u - (local.get $2) + (local.get $3) (i32.const 3) ) (i32.const 28) @@ -942,41 +1046,44 @@ (i32.const 65504) ) ) - (local.get $2) + (local.get $3) ) (i32.const 1) ) ) (then - (return - (i32.sub - (local.get $1) - (local.get $0) - ) + (local.set $2 + (local.get $1) ) + (br $block2) ) ) - (local.set $2 + (local.set $3 (i32.load8_u offset=1 (local.get $1) ) ) (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 1) + ) ) ) - (br_if $label1 - (local.get $2) + (br_if $label3 + (local.get $3) ) ) ) + (local.set $2 + (i32.sub + (local.get $2) + (local.get $0) + ) + ) ) - (i32.sub - (local.get $1) - (local.get $0) - ) + (local.get $2) ) (func $strcspn (param $0 i32) (param $1 i32) (result i32) (local $2 i32) diff --git a/sqlite3/strings.c b/sqlite3/libc/string.h similarity index 79% rename from sqlite3/strings.c rename to sqlite3/libc/string.h index cdf5389..a785887 100644 --- a/sqlite3/strings.c +++ b/sqlite3/libc/string.h @@ -1,10 +1,23 @@ +#ifndef _WASM_SIMD128_STRING_H +#define _WASM_SIMD128_STRING_H + #include #include #include #include <__macro_PAGESIZE.h> +#include_next // the system string.h + +#ifdef __cplusplus +extern "C" { +#endif + #ifdef __wasm_bulk_memory__ +// Use the builtins if compiled with bulk memory operations. +// Clang will intrinsify using SIMD for small, constant N. +// For everything else, this helps inlining. + void *memset(void *dest, int c, size_t n) { return __builtin_memset(dest, c, n); } @@ -17,10 +30,19 @@ void *memmove(void *dest, const void *src, size_t n) { return __builtin_memmove(dest, src, n); } -#endif +#endif // __wasm_bulk_memory__ #ifdef __wasm_simd128__ +// SIMD versions of some string.h functions. +// +// These assume aligned v128_t reads can't fail, +// and so can't unaligned reads up to the last +// aligned address less than memory size. +// +// These also assume unaligned access is not painfully slow, +// but that bitmask extraction is slow on AArch64. + int memcmp(const void *v1, const void *v2, size_t n) { const v128_t *w1 = v1; const v128_t *w2 = v2; @@ -166,16 +188,34 @@ char *strchr(const char *s, int c) { return *(char *)r == (char)c ? r : NULL; } -#endif +#pragma push_macro("BITOP") #define BITOP(a, b, op) \ ((a)[(b) / (8 * sizeof(size_t))] op((size_t)1) \ << ((b) % (8 * sizeof(size_t)))) size_t strspn(const char *s, const char *c) { - if (!c[0]) return 0; - const char *const a = s; + + if (!c[0]) return 0; + if (!c[1]) { + const v128_t *const limit = + (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; + + const v128_t *w = (void *)s; + const v128_t wc = wasm_i8x16_splat(*c); + while (w <= limit) { + if (!wasm_i8x16_all_true(wasm_i8x16_eq(wasm_v128_load(w), wc))) { + break; + } + w++; + } + + s = (void *)w; + while (*s == *c) s++; + return s - a; + } + size_t byteset[32 / sizeof(size_t)] = {0}; for (; *c && BITOP(byteset, *(uint8_t *)c, |=); c++); @@ -194,4 +234,12 @@ size_t strcspn(const char *s, const char *c) { return s - a; } -#undef BITOP +#pragma pop_macro("BITOP") + +#endif // __wasm_simd128__ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _WASM_SIMD128_STRING_H \ No newline at end of file diff --git a/sqlite3/main.c b/sqlite3/main.c index 88da7e7..62eb1aa 100644 --- a/sqlite3/main.c +++ b/sqlite3/main.c @@ -1,4 +1,3 @@ -#include "strings.c" // Amalgamation #include "sqlite3.c" // Extensions diff --git a/util/sql3util/wasm/build.sh b/util/sql3util/wasm/build.sh index 53a00c5..e445a5b 100755 --- a/util/sql3util/wasm/build.sh +++ b/util/sql3util/wasm/build.sh @@ -12,7 +12,7 @@ trap 'rm -f sql3parse_table.tmp' EXIT "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -Oz \ -Wall -Wextra -Wno-unused-parameter -Wno-unused-function \ -o sql3parse_table.wasm main.c \ - -I"$ROOT/sqlite3" \ + -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ -mexec-model=reactor \ -msimd128 -mmutable-globals -mmultivalue \ -mbulk-memory -mreference-types \ diff --git a/util/sql3util/wasm/main.c b/util/sql3util/wasm/main.c index 3f04173..ede7edb 100644 --- a/util/sql3util/wasm/main.c +++ b/util/sql3util/wasm/main.c @@ -1,6 +1,5 @@ #include -#include "strings.c" #include "sql3parse_table.c" static_assert(offsetof(sql3table, name) == 0, "Unexpected offset"); diff --git a/vfs/tests/mptest/wasm/build.sh b/vfs/tests/mptest/wasm/build.sh index 18c2fd5..1596c60 100755 --- a/vfs/tests/mptest/wasm/build.sh +++ b/vfs/tests/mptest/wasm/build.sh @@ -9,7 +9,7 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ -o mptest.wasm main.c \ - -I"$ROOT/sqlite3" \ + -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ -msimd128 -mmutable-globals -mmultivalue \ -mbulk-memory -mreference-types \ -mnontrapping-fptoint -msign-ext \ diff --git a/vfs/tests/mptest/wasm/main.c b/vfs/tests/mptest/wasm/main.c index ba70fc9..57d0ef7 100644 --- a/vfs/tests/mptest/wasm/main.c +++ b/vfs/tests/mptest/wasm/main.c @@ -3,7 +3,6 @@ // Use the default callback, not the Go one we patched in. #define sqliteBusyCallback sqliteDefaultBusyCallback -#include "strings.c" // Amalgamation #include "sqlite3.c" // VFS diff --git a/vfs/tests/speedtest1/wasm/build.sh b/vfs/tests/speedtest1/wasm/build.sh index ffcf157..0a22ee9 100755 --- a/vfs/tests/speedtest1/wasm/build.sh +++ b/vfs/tests/speedtest1/wasm/build.sh @@ -9,7 +9,7 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ -o speedtest1.wasm main.c \ - -I"$ROOT/sqlite3" \ + -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ -msimd128 -mmutable-globals -mmultivalue \ -mbulk-memory -mreference-types \ -mnontrapping-fptoint -msign-ext \ diff --git a/vfs/tests/speedtest1/wasm/main.c b/vfs/tests/speedtest1/wasm/main.c index d191e8e..cc08d56 100644 --- a/vfs/tests/speedtest1/wasm/main.c +++ b/vfs/tests/speedtest1/wasm/main.c @@ -1,7 +1,6 @@ // Use the default callback, not the Go one we patched in. #define sqliteBusyCallback sqliteDefaultBusyCallback -#include "strings.c" // Amalgamation #include "sqlite3.c" // VFS