diff --git a/embed/bcw2/build.sh b/embed/bcw2/build.sh index c2502db..0a1670a 100755 --- a/embed/bcw2/build.sh +++ b/embed/bcw2/build.sh @@ -46,9 +46,9 @@ cd ~- -o bcw2.wasm build/main.c \ -I"$ROOT/sqlite3/libc" -I"build" \ -mexec-model=reactor \ - -msimd128 -mmutable-globals -mmultivalue \ - -mbulk-memory -mreference-types \ - -mnontrapping-fptoint -msign-ext \ + -mmutable-globals -mnontrapping-fptoint \ + -msimd128 -mbulk-memory -msign-ext \ + -mreference-types -mmultivalue \ -fno-stack-protector -fno-stack-clash-protection \ -Wl,--stack-first \ -Wl,--import-undefined \ @@ -61,6 +61,6 @@ cd ~- "$BINARYEN/wasm-ctor-eval" -g -c _initialize bcw2.wasm -o bcw2.tmp "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ bcw2.tmp -o bcw2.wasm --low-memory-unused \ - --enable-simd --enable-mutable-globals --enable-multivalue \ - --enable-bulk-memory --enable-reference-types \ - --enable-nontrapping-float-to-int --enable-sign-ext \ No newline at end of file + --enable-mutable-globals --enable-nontrapping-float-to-int \ + --enable-simd --enable-bulk-memory --enable-sign-ext \ + --enable-reference-types --enable-multivalue \ No newline at end of file diff --git a/embed/build.sh b/embed/build.sh index 88e12be..c96a186 100755 --- a/embed/build.sh +++ b/embed/build.sh @@ -14,9 +14,9 @@ trap 'rm -f sqlite3.tmp' EXIT -o sqlite3.wasm "$ROOT/sqlite3/main.c" \ -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ -mexec-model=reactor \ - -msimd128 -mmutable-globals -mmultivalue \ - -mbulk-memory -mreference-types \ - -mnontrapping-fptoint -msign-ext \ + -mmutable-globals -mnontrapping-fptoint \ + -msimd128 -mbulk-memory -msign-ext \ + -mreference-types -mmultivalue \ -fno-stack-protector -fno-stack-clash-protection \ -Wl,--stack-first \ -Wl,--import-undefined \ @@ -28,6 +28,6 @@ trap 'rm -f sqlite3.tmp' EXIT "$BINARYEN/wasm-ctor-eval" -g -c _initialize sqlite3.wasm -o sqlite3.tmp "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ sqlite3.tmp -o sqlite3.wasm --low-memory-unused \ - --enable-simd --enable-mutable-globals --enable-multivalue \ - --enable-bulk-memory --enable-reference-types \ - --enable-nontrapping-float-to-int --enable-sign-ext \ No newline at end of file + --enable-mutable-globals --enable-nontrapping-float-to-int \ + --enable-simd --enable-bulk-memory --enable-sign-ext \ + --enable-reference-types --enable-multivalue \ No newline at end of file diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 9893823..e28dd43 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -19,9 +19,9 @@ EOF -Wall -Wextra -Wno-unused-parameter -Wno-unused-function \ -o libc.wasm -I. "$SRCS" \ -mexec-model=reactor \ - -msimd128 -mmutable-globals -mmultivalue \ - -mbulk-memory -mreference-types \ - -mnontrapping-fptoint -msign-ext \ + -mmutable-globals -mnontrapping-fptoint \ + -msimd128 -mbulk-memory -msign-ext \ + -mreference-types -mmultivalue \ -fno-stack-protector -fno-stack-clash-protection \ -Wl,-z,stack-size=4096 \ -Wl,--stack-first \ @@ -54,8 +54,8 @@ EOF "$BINARYEN/wasm-ctor-eval" -g -c _initialize libc.wasm -o libc.tmp "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ libc.tmp -o libc.wasm \ - --enable-simd --enable-mutable-globals --enable-multivalue \ - --enable-bulk-memory --enable-reference-types \ - --enable-nontrapping-float-to-int --enable-sign-ext + --enable-mutable-globals --enable-nontrapping-float-to-int \ + --enable-simd --enable-bulk-memory --enable-sign-ext \ + --enable-reference-types --enable-multivalue "$BINARYEN/wasm-dis" -o libc.wat libc.wasm \ No newline at end of file diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 1fa577a..3e5583b 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 9db0a23..7eed352 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -2164,13 +2164,13 @@ (local $18 v128) (local $19 v128) (local $20 v128) - (local.set $5 + (local.set $10 (i32.load8_u (local.get $2) ) ) (local.set $6 - (local.tee $10 + (local.tee $8 (i32.sub (local.get $3) (i32.const 1) @@ -2181,7 +2181,7 @@ (loop $label (br_if $block (i32.ne - (local.get $5) + (local.get $10) (local.tee $7 (i32.load8_u (i32.add @@ -2205,15 +2205,15 @@ (i32.load8_u (i32.add (local.get $2) - (local.get $10) + (local.get $8) ) ) ) (local.set $6 - (local.get $10) + (local.get $8) ) ) - (block $block6 + (block $block5 (block $block1 (br_if $block1 (i32.lt_u @@ -2239,7 +2239,7 @@ ) (local.set $19 (i8x16.splat - (local.get $5) + (local.get $10) ) ) (local.set $15 @@ -2248,12 +2248,18 @@ (i32.const 14) ) ) - (local.set $16 + (local.set $10 (i32.add (local.get $2) (i32.const 1) ) ) + (local.set $16 + (i32.lt_u + (local.get $3) + (i32.const 17) + ) + ) (loop $label4 (block $block2 (br_if $block2 @@ -2310,134 +2316,118 @@ ) ) (loop $label3 - (br_if $block6 - (i32.eqz - (block $block5 (result i32) - (local.set $8 - (i32.add - (local.tee $7 - (i32.add - (local.get $0) - (i32.ctz - (local.get $11) - ) - ) - ) - (i32.const 1) + (local.set $9 + (i32.add + (local.tee $12 + (i32.add + (local.get $0) + (i32.ctz + (local.get $11) ) ) + ) + (i32.const 1) + ) + ) + (block $block4 + (if + (i32.eqz + (local.get $16) + ) + (then + (local.set $7 + (local.get $10) + ) (local.set $5 - (local.get $16) + (local.get $8) ) - (local.set $12 - (i32.const 0) - ) - (block $block4 - (if - (i32.ge_u - (local.tee $9 - (local.get $10) - ) - (i32.const 16) - ) - (then - (local.set $12 - (i32.const 1) - ) - (loop $label1 - (br_if $block4 - (v128.any_true - (v128.xor - (v128.load align=1 - (local.get $5) - ) - (v128.load align=1 - (local.get $8) - ) - ) - ) - ) - (local.set $5 - (i32.add - (local.get $5) - (local.tee $13 - (i32.add - (i32.and - (i32.sub - (local.get $9) - (i32.const 1) - ) - (i32.const 15) - ) - (i32.const 1) - ) - ) - ) - ) - (local.set $8 - (i32.add - (local.get $8) - (local.get $13) - ) - ) - (br_if $label1 - (local.tee $9 - (i32.sub - (local.get $9) - (local.get $13) - ) - ) - ) - ) - (br $block5 - (i32.const 0) - ) - ) - ) + (loop $label1 (br_if $block4 - (i32.eqz - (local.get $9) - ) - ) - (loop $label2 - (drop - (br_if $block5 - (i32.const 1) - (i32.ne - (i32.load8_u - (local.get $8) - ) - (i32.load8_u - (local.get $5) - ) + (v128.any_true + (v128.xor + (v128.load align=1 + (local.get $7) + ) + (v128.load align=1 + (local.get $9) ) ) ) - (local.set $5 - (i32.add - (local.get $5) - (i32.const 1) - ) - ) - (local.set $8 - (i32.add - (local.get $8) - (i32.const 1) - ) - ) - (br_if $label2 - (local.tee $9 - (i32.sub - (local.get $9) + ) + (local.set $7 + (i32.add + (local.get $7) + (local.tee $13 + (i32.add + (i32.and + (i32.sub + (local.get $5) + (i32.const 1) + ) + (i32.const 15) + ) (i32.const 1) ) ) ) ) + (local.set $9 + (i32.add + (local.get $9) + (local.get $13) + ) + ) + (br_if $label1 + (local.tee $5 + (i32.sub + (local.get $5) + (local.get $13) + ) + ) + ) ) - (local.get $12) + (br $block5) ) ) + (local.set $5 + (local.get $8) + ) + (local.set $7 + (local.get $10) + ) + (loop $label2 + (br_if $block4 + (i32.ne + (i32.load8_u + (local.get $9) + ) + (i32.load8_u + (local.get $7) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (local.set $9 + (i32.add + (local.get $9) + (i32.const 1) + ) + ) + (br_if $label2 + (local.tee $5 + (i32.sub + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (br $block5) ) (br_if $label3 (local.tee $11 @@ -2476,17 +2466,17 @@ ) ) ) - (block $block7 + (block $block6 (if (i32.ne (local.get $1) (i32.const -1) ) (then - (local.set $7 + (local.set $12 (i32.const 0) ) - (br_if $block6 + (br_if $block5 (i32.lt_u (local.get $1) (local.tee $1 @@ -2497,19 +2487,19 @@ ) ) ) - (br_if $block6 + (br_if $block5 (i32.lt_u (local.get $1) (local.get $3) ) ) - (br $block7) + (br $block6) ) ) (local.set $1 (i32.const -1) ) - (br_if $block7 + (br_if $block6 (i8x16.all_true (local.get $17) ) @@ -2531,16 +2521,16 @@ ) ) ) - (local.set $10 + (local.set $8 (i32.sub (local.get $1) (local.get $3) ) ) - (local.set $4 + (local.set $5 (i32.const 0) ) - (local.set $1 + (local.set $4 (i32.ne (local.get $1) (i32.const -1) @@ -2551,14 +2541,14 @@ (i32.const 0) ) (loop $label6 - (local.set $7 + (local.set $12 (i32.const 0) ) - (br_if $block6 + (br_if $block5 (i32.eqz (i32.or - (local.get $1) - (local.tee $5 + (local.get $4) + (local.tee $1 (i32.load8_u (i32.add (local.get $0) @@ -2571,7 +2561,7 @@ ) (if (i32.ne - (local.get $5) + (local.get $1) (i32.load8_u (i32.add (local.get $2) @@ -2588,16 +2578,16 @@ ) (br_if $label5 (i32.le_u - (local.tee $4 + (local.tee $5 (i32.add - (local.get $4) + (local.get $5) (i32.const 1) ) ) - (local.get $10) + (local.get $8) ) ) - (br $block6) + (br $block5) ) ) (br_if $label6 @@ -2613,11 +2603,11 @@ ) ) ) - (local.set $7 + (local.set $12 (local.get $0) ) ) - (local.get $7) + (local.get $12) ) (func $strstr (param $0 i32) (param $1 i32) (result i32) (local $2 i32) diff --git a/sqlite3/libc/stdlib.h b/sqlite3/libc/stdlib.h index 760af84..7ac8177 100644 --- a/sqlite3/libc/stdlib.h +++ b/sqlite3/libc/stdlib.h @@ -1,9 +1,6 @@ #ifndef _WASM_SIMD128_STDLIB_H #define _WASM_SIMD128_STDLIB_H -#include -#include - #include_next // the system stdlib.h #ifdef __cplusplus diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 04764f8..7a1e857 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -1,9 +1,7 @@ #ifndef _WASM_SIMD128_STRING_H #define _WASM_SIMD128_STRING_H -#include #include -#include #include #include <__macro_PAGESIZE.h> @@ -82,6 +80,49 @@ int memcmp(const void *v1, const void *v2, size_t n) { return 0; } +#ifdef __OPTIMIZE_SIZE__ + +// __memcmpeq is the same as memcmp but only compares for equality. + +#define __memcmpeq(v1, v2, n) memcmp(v1, v2, n) + +#else // __OPTIMIZE_SIZE__ + +static int __memcmpeq(const void *v1, const void *v2, size_t n) { + // Baseline algorithm. + if (n < sizeof(v128_t)) { + const unsigned char *u1 = (unsigned char *)v1; + const unsigned char *u2 = (unsigned char *)v2; + while (n--) { + if (*u1 != *u2) return 1; + u1++; + u2++; + } + return 0; + } + + // memcmpeq is allowed to read up to n bytes from each object. + // Unaligned loads handle the case where the objects + // have mismatching alignments. + const v128_t *w1 = (v128_t *)v1; + const v128_t *w2 = (v128_t *)v2; + while (n) { + // Find any single bit difference. + if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { + return 1; + } + // This makes n a multiple of sizeof(v128_t) + // for every iteration except the first. + size_t align = (n - 1) % sizeof(v128_t) + 1; + w1 = (v128_t *)((char *)w1 + align); + w2 = (v128_t *)((char *)w2 + align); + n -= align; + } + return 0; +} + +#endif // __OPTIMIZE_SIZE__ + __attribute__((weak)) void *memchr(const void *v, int c, size_t n) { // When n is zero, a function that locates a character finds no occurrence. @@ -510,7 +551,7 @@ static const char *__memmem_raita(const char *haystk, size_t sh, // Each iteration clears that bit, tries again. for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) { size_t ctz = __builtin_ctz(mask); - if (!bcmp(haystk + ctz + 1, needle + 1, sn - 1)) { + if (!__memcmpeq(haystk + ctz + 1, needle + 1, sn - 1)) { return haystk + ctz; } } diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h index 811400d..373a33a 100644 --- a/sqlite3/libc/strings.h +++ b/sqlite3/libc/strings.h @@ -1,8 +1,7 @@ #ifndef _WASM_SIMD128_STRINGS_H #define _WASM_SIMD128_STRINGS_H -#include -#include +#include #include_next // the system strings.h @@ -11,45 +10,12 @@ extern "C" { #endif #ifdef __wasm_simd128__ -#ifndef __OPTIMIZE_SIZE__ __attribute__((weak)) int bcmp(const void *v1, const void *v2, size_t n) { - // bcmp is the same as memcmp but only compares for equality. - - // Baseline algorithm. - if (n < sizeof(v128_t)) { - const unsigned char *u1 = (unsigned char *)v1; - const unsigned char *u2 = (unsigned char *)v2; - while (n--) { - if (*u1 != *u2) return 1; - u1++; - u2++; - } - return 0; - } - - // bcmp is allowed to read up to n bytes from each object. - // Unaligned loads handle the case where the objects - // have mismatching alignments. - const v128_t *w1 = (v128_t *)v1; - const v128_t *w2 = (v128_t *)v2; - while (n) { - // Find any single bit difference. - if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { - return 1; - } - // This makes n a multiple of sizeof(v128_t) - // for every iteration except the first. - size_t align = (n - 1) % sizeof(v128_t) + 1; - w1 = (v128_t *)((char *)w1 + align); - w2 = (v128_t *)((char *)w2 + align); - n -= align; - } - return 0; + return __memcmpeq(v1, v2, n); } -#endif // __OPTIMIZE_SIZE__ #endif // __wasm_simd128__ #ifdef __cplusplus diff --git a/util/sql3util/wasm/build.sh b/util/sql3util/wasm/build.sh index e445a5b..4146943 100755 --- a/util/sql3util/wasm/build.sh +++ b/util/sql3util/wasm/build.sh @@ -14,9 +14,9 @@ trap 'rm -f sql3parse_table.tmp' EXIT -o sql3parse_table.wasm main.c \ -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ -mexec-model=reactor \ - -msimd128 -mmutable-globals -mmultivalue \ - -mbulk-memory -mreference-types \ - -mnontrapping-fptoint -msign-ext \ + -mmutable-globals -mnontrapping-fptoint \ + -msimd128 -mbulk-memory -msign-ext \ + -mreference-types -mmultivalue \ -fno-stack-protector -fno-stack-clash-protection \ -Wl,--stack-first \ -Wl,--import-undefined \ @@ -25,6 +25,6 @@ trap 'rm -f sql3parse_table.tmp' EXIT "$BINARYEN/wasm-ctor-eval" -c _initialize sql3parse_table.wasm -o sql3parse_table.tmp "$BINARYEN/wasm-opt" --strip --strip-debug --strip-producers -c -Oz \ sql3parse_table.tmp -o sql3parse_table.wasm --low-memory-unused \ - --enable-simd --enable-mutable-globals --enable-multivalue \ - --enable-bulk-memory --enable-reference-types \ - --enable-nontrapping-float-to-int --enable-sign-ext \ No newline at end of file + --enable-mutable-globals --enable-nontrapping-float-to-int \ + --enable-simd --enable-bulk-memory --enable-sign-ext \ + --enable-reference-types --enable-multivalue \ No newline at end of file diff --git a/vfs/tests/mptest/wasm/build.sh b/vfs/tests/mptest/wasm/build.sh index 1596c60..151c8cc 100755 --- a/vfs/tests/mptest/wasm/build.sh +++ b/vfs/tests/mptest/wasm/build.sh @@ -10,9 +10,9 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ -o mptest.wasm main.c \ -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ - -msimd128 -mmutable-globals -mmultivalue \ - -mbulk-memory -mreference-types \ - -mnontrapping-fptoint -msign-ext \ + -mmutable-globals -mnontrapping-fptoint \ + -msimd128 -mbulk-memory -msign-ext \ + -mreference-types -mmultivalue \ -fno-stack-protector -fno-stack-clash-protection \ -Wl,--stack-first \ -Wl,--import-undefined \ @@ -27,7 +27,7 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ mptest.wasm -o mptest.tmp --low-memory-unused \ - --enable-simd --enable-mutable-globals --enable-multivalue \ - --enable-bulk-memory --enable-reference-types \ - --enable-nontrapping-float-to-int --enable-sign-ext + --enable-mutable-globals --enable-nontrapping-float-to-int \ + --enable-simd --enable-bulk-memory --enable-sign-ext \ + --enable-reference-types --enable-multivalue mv mptest.tmp mptest.wasm \ No newline at end of file diff --git a/vfs/tests/speedtest1/wasm/build.sh b/vfs/tests/speedtest1/wasm/build.sh index 0a22ee9..1b2d548 100755 --- a/vfs/tests/speedtest1/wasm/build.sh +++ b/vfs/tests/speedtest1/wasm/build.sh @@ -10,9 +10,9 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ -o speedtest1.wasm main.c \ -I"$ROOT/sqlite3/libc" -I"$ROOT/sqlite3" \ - -msimd128 -mmutable-globals -mmultivalue \ - -mbulk-memory -mreference-types \ - -mnontrapping-fptoint -msign-ext \ + -mmutable-globals -mnontrapping-fptoint \ + -msimd128 -mbulk-memory -msign-ext \ + -mreference-types -mmultivalue \ -fno-stack-protector -fno-stack-clash-protection \ -Wl,--stack-first \ -Wl,--import-undefined \ @@ -22,7 +22,7 @@ WASI_SDK="$ROOT/tools/wasi-sdk/bin" "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ speedtest1.wasm -o speedtest1.tmp --low-memory-unused \ - --enable-simd --enable-mutable-globals --enable-multivalue \ - --enable-bulk-memory --enable-reference-types \ - --enable-nontrapping-float-to-int --enable-sign-ext + --enable-mutable-globals --enable-nontrapping-float-to-int \ + --enable-simd --enable-bulk-memory --enable-sign-ext \ + --enable-reference-types --enable-multivalue mv speedtest1.tmp speedtest1.wasm \ No newline at end of file