diff --git a/embed/bcw2/bcw2.wasm b/embed/bcw2/bcw2.wasm index 113be09..8cbc72c 100755 Binary files a/embed/bcw2/bcw2.wasm and b/embed/bcw2/bcw2.wasm differ diff --git a/embed/sqlite3.wasm b/embed/sqlite3.wasm index be08c9d..60e4387 100755 Binary files a/embed/sqlite3.wasm and b/embed/sqlite3.wasm differ diff --git a/sqlite3/libc/README.md b/sqlite3/libc/README.md deleted file mode 100644 index b30a8a5..0000000 --- a/sqlite3/libc/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# Using SIMD for libc - -I found that implementing some libc functions with Wasm SIMD128 can make them significantly faster. - -Rough numbers for [wazero](https://wazero.io/): - - function | speedup ------------- | ----- -`strlen` | 4.1× -`memchr` | 4.1× -`strchr` | 4.0× -`strrchr` | 9.1× -`memcmp` | 13.0× -`strcmp` | 10.4× -`strncmp` | 15.7× -`strcasecmp` | 8.8× -`strncasecmp`| 8.6× -`strspn` | 9.9× -`strcspn` | 9.0× -`memmem` | 2.2× -`strstr` | 5.5× -`strcasestr` | 25.2× - -For functions where musl uses SWAR on a 4-byte `size_t`, -the improvement is around 4×. -This is very close to the expected theoretical improvement, -as we're processing 4× the bytes per cycle (16 _vs._ 4). - -For other functions where there's no algorithmic change, -the improvement is around 8×. -These functions are harder to optimize -(which is why musl doesn't bother with SWAR), -so getting an 8× improvement from processing 16× bytes seems decent. - -String search is harder to compare, since there are algorithmic changes, -and different needles produce very different numbers. -We use [Quick Search](https://igm.univ-mlv.fr/~lecroq/string/node19.html) for `memmem`, -and a [Rabin–Karp](https://igm.univ-mlv.fr/~lecroq/string/node5.html) for `strstr` and `strcasestr`; -musl uses [Two Way](https://igm.univ-mlv.fr/~lecroq/string/node26.html) for `memmem` and `strstr`, -and [brute force](https://igm.univ-mlv.fr/~lecroq/string/node3.html) for `strcasestr`. -Unlike Two-Way, both replacements can go quadratic for long, periodic needles. \ No newline at end of file diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 4d4047c..ea68bfa 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -28,31 +28,18 @@ EOF -Wl,--stack-first \ -Wl,--import-undefined \ -Wl,--initial-memory=16777216 \ - -Wl,--export=memccpy \ -Wl,--export=memchr \ -Wl,--export=memcmp \ -Wl,--export=memcpy \ - -Wl,--export=memmem \ -Wl,--export=memmove \ -Wl,--export=memrchr \ -Wl,--export=memset \ - -Wl,--export=stpcpy \ - -Wl,--export=stpncpy \ - -Wl,--export=strcasecmp \ - -Wl,--export=strcasestr \ -Wl,--export=strchr \ -Wl,--export=strchrnul \ - -Wl,--export=strcmp \ - -Wl,--export=strcpy \ -Wl,--export=strcspn \ -Wl,--export=strlen \ - -Wl,--export=strncasecmp \ - -Wl,--export=strncat \ - -Wl,--export=strncmp \ - -Wl,--export=strncpy \ -Wl,--export=strrchr \ -Wl,--export=strspn \ - -Wl,--export=strstr \ -Wl,--export=qsort "$BINARYEN/wasm-ctor-eval" -g -c _initialize libc.wasm -o libc.tmp diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 0769f94..a55a4d2 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 1e9394f..c9576d1 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -1,40 +1,25 @@ (module $libc.wasm (type $0 (func (param i32 i32) (result i32))) (type $1 (func (param i32 i32 i32) (result i32))) - (type $2 (func (param i32 i32 i32 i32) (result i32))) - (type $3 (func (param i32 i32 i32 i32))) - (type $4 (func (param i32) (result i32))) - (type $5 (func (param i32 i32 i32 i32 i32) (result i32))) + (type $2 (func (param i32 i32 i32 i32))) + (type $3 (func (param i32) (result i32))) (memory $0 256) (data $0 (i32.const 4096) "\01") (table $0 1 1 funcref) (export "memory" (memory $0)) (export "qsort" (func $qsort)) - (export "strcasecmp" (func $strcasecmp)) - (export "strlen" (func $strlen)) - (export "strncasecmp" (func $strncasecmp)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) (export "memmove" (func $memcpy)) (export "memcmp" (func $memcmp)) (export "memchr" (func $memchr)) (export "memrchr" (func $memrchr)) - (export "strcmp" (func $strcmp)) - (export "strncmp" (func $strncmp)) + (export "strlen" (func $strlen)) (export "strchrnul" (func $strchrnul)) (export "strchr" (func $strchr)) (export "strrchr" (func $strrchr)) (export "strspn" (func $strspn)) (export "strcspn" (func $strcspn)) - (export "memmem" (func $memmem)) - (export "strstr" (func $strstr)) - (export "strcasestr" (func $strcasestr)) - (export "memccpy" (func $memccpy)) - (export "strncat" (func $strncat)) - (export "stpcpy" (func $stpcpy)) - (export "strcpy" (func $strcpy)) - (export "stpncpy" (func $stpncpy)) - (export "strncpy" (func $strncpy)) (func $qsort (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (local $4 i32) (local $5 i32) @@ -481,542 +466,6 @@ ) ) ) - (func $strcasecmp (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) - (local $3 i32) - (local $4 v128) - (block $block - (br_if $block - (i32.lt_u - (local.tee $2 - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (select - (local.get $0) - (local.get $1) - (i32.gt_u - (local.get $0) - (local.get $1) - ) - ) - ) - ) - (i32.const 16) - ) - ) - (loop $label - (br_if $block - (v128.any_true - (v128.xor - (v128.or - (local.tee $4 - (v128.load align=1 - (local.get $1) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i8x16.add - (local.get $4) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - (local.tee $4 - (v128.or - (local.tee $4 - (v128.load align=1 - (local.get $0) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i8x16.add - (local.get $4) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - ) - ) - ) - ) - (if - (i32.eqz - (i8x16.all_true - (local.get $4) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - ) - ) - (if - (i32.eq - (local.tee $2 - (select - (i32.or - (local.tee $2 - (i32.load8_u - (local.get $0) - ) - ) - (i32.const 32) - ) - (local.get $2) - (i32.lt_u - (i32.sub - (local.get $2) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.tee $3 - (select - (i32.or - (local.tee $3 - (i32.load8_u - (local.get $1) - ) - ) - (i32.const 32) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - ) - (then - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (loop $label1 - (if - (i32.eqz - (local.get $2) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $2 - (i32.load8_u - (local.get $0) - ) - ) - (local.set $3 - (i32.load8_u - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label1 - (i32.eq - (local.tee $2 - (select - (i32.or - (local.get $2) - (i32.const 32) - ) - (local.get $2) - (i32.lt_u - (i32.sub - (local.get $2) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.tee $3 - (select - (i32.or - (local.get $3) - (i32.const 32) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - ) - ) - ) - ) - ) - (i32.sub - (local.get $2) - (local.get $3) - ) - ) - (func $strlen (param $0 i32) (result i32) - (local $1 i32) - (local $2 i32) - (local $3 v128) - (block $block1 - (block $block - (br_if $block - (i8x16.all_true - (local.tee $3 - (v128.load - (local.tee $1 - (i32.and - (local.get $0) - (i32.const -16) - ) - ) - ) - ) - ) - ) - (br_if $block - (i32.eqz - (local.tee $2 - (i32.and - (i8x16.bitmask - (i8x16.eq - (local.get $3) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - ) - (i32.shl - (i32.const -1) - (i32.and - (local.get $0) - (i32.const 15) - ) - ) - ) - ) - ) - ) - (br $block1) - ) - (loop $label - (local.set $3 - (v128.load offset=16 - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (br_if $label - (i8x16.all_true - (local.get $3) - ) - ) - ) - (local.set $2 - (i8x16.bitmask - (i8x16.eq - (local.get $3) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - ) - ) - ) - (i32.add - (i32.ctz - (local.get $2) - ) - (i32.sub - (local.get $1) - (local.get $0) - ) - ) - ) - (func $strncasecmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (local $3 i32) - (local $4 i32) - (local $5 v128) - (block $block - (if - (i32.ge_u - (local.tee $2 - (select - (local.tee $3 - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (select - (local.get $0) - (local.get $1) - (i32.gt_u - (local.get $0) - (local.get $1) - ) - ) - ) - ) - (local.get $2) - (i32.gt_u - (local.get $2) - (local.get $3) - ) - ) - ) - (i32.const 16) - ) - (then - (loop $label - (br_if $block - (v128.any_true - (v128.xor - (v128.or - (local.tee $5 - (v128.load align=1 - (local.get $1) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i8x16.add - (local.get $5) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - (local.tee $5 - (v128.or - (local.tee $5 - (v128.load align=1 - (local.get $0) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i8x16.add - (local.get $5) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - ) - ) - ) - ) - (if - (i32.eqz - (i8x16.all_true - (local.get $5) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - ) - ) - ) - (br_if $block - (local.get $2) - ) - (return - (i32.const 0) - ) - ) - (local.set $2 - (i32.sub - (local.get $2) - (i32.const 1) - ) - ) - (loop $label1 - (if - (i32.ne - (local.tee $3 - (select - (i32.or - (local.tee $3 - (i32.load8_u - (local.get $0) - ) - ) - (i32.const 32) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.tee $4 - (select - (i32.or - (local.tee $4 - (i32.load8_u - (local.get $1) - ) - ) - (i32.const 32) - ) - (local.get $4) - (i32.lt_u - (i32.sub - (local.get $4) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - ) - (then - (return - (i32.sub - (local.get $3) - (local.get $4) - ) - ) - ) - ) - (if - (local.get $3) - (then - (local.set $2 - (i32.sub - (local.tee $3 - (local.get $2) - ) - (i32.const 1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label1 - (local.get $3) - ) - ) - ) - ) - (i32.const 0) - ) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (memory.fill (local.get $0) @@ -1206,6 +655,11 @@ (v128.any_true (local.tee $6 (i8x16.eq + (local.tee $7 + (i8x16.splat + (local.get $1) + ) + ) (v128.load (local.tee $2 (i32.and @@ -1214,11 +668,6 @@ ) ) ) - (local.tee $7 - (i8x16.splat - (local.get $1) - ) - ) ) ) ) @@ -1272,10 +721,10 @@ (v128.any_true (local.tee $6 (i8x16.eq + (local.get $7) (v128.load (local.get $2) ) - (local.get $7) ) ) ) @@ -1445,57 +894,53 @@ ) (local.get $0) ) - (func $strcmp (param $0 i32) (param $1 i32) (result i32) + (func $strlen (param $0 i32) (result i32) + (local $1 i32) (local $2 i32) - (local $3 i32) - (local $4 v128) - (block $block - (br_if $block - (i32.lt_u - (local.tee $2 - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (select - (local.get $0) - (local.get $1) - (i32.gt_u - (local.get $0) - (local.get $1) + (local $3 v128) + (block $block1 + (block $block + (br_if $block + (i8x16.all_true + (local.tee $3 + (v128.load + (local.tee $1 + (i32.and + (local.get $0) + (i32.const -16) + ) ) ) ) ) - (i32.const 16) ) + (br_if $block + (i32.eqz + (local.tee $2 + (i32.and + (i8x16.bitmask + (i8x16.eq + (local.get $3) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) + ) + ) + ) + ) + (br $block1) ) (loop $label - (br_if $block - (v128.any_true - (v128.xor - (v128.load align=1 - (local.get $1) - ) - (local.tee $4 - (v128.load align=1 - (local.get $0) - ) - ) - ) - ) - ) - (if - (i32.eqz - (i8x16.all_true - (local.get $4) - ) - ) - (then - (return - (i32.const 0) - ) + (local.set $3 + (v128.load offset=16 + (local.get $1) ) ) (local.set $1 @@ -1504,255 +949,30 @@ (i32.const 16) ) ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - ) - ) - (if - (i32.eq - (local.tee $2 - (i32.load8_u - (local.get $0) - ) - ) - (local.tee $3 - (i32.load8_u - (local.get $1) - ) - ) - ) - (then - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (loop $label1 - (if - (i32.eqz - (i32.and - (local.get $2) - (i32.const 255) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $3 - (i32.load8_u - (local.get $1) - ) - ) - (local.set $2 - (i32.load8_u - (local.get $0) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (br_if $label1 - (i32.eq - (local.get $2) - (local.get $3) - ) - ) - ) - ) - ) - (i32.sub - (local.get $2) - (local.get $3) - ) - ) - (func $strncmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (local $3 i32) - (local $4 i32) - (local $5 v128) - (block $block - (if - (i32.ge_u - (local.tee $2 - (select - (local.tee $3 - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (select - (local.get $0) - (local.get $1) - (i32.gt_u - (local.get $0) - (local.get $1) - ) - ) - ) - ) - (local.get $2) - (i32.gt_u - (local.get $2) - (local.get $3) - ) - ) - ) - (i32.const 16) - ) - (then - (loop $label - (br_if $block - (v128.any_true - (v128.xor - (v128.load align=1 - (local.get $1) - ) - (local.tee $5 - (v128.load align=1 - (local.get $0) - ) - ) - ) - ) - ) - (if - (i32.eqz - (i8x16.all_true - (local.get $5) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - ) - ) - ) - (br_if $block - (local.get $2) - ) - (return - (i32.const 0) - ) - ) - (local.set $2 - (i32.sub - (local.get $2) - (i32.const 1) - ) - ) - (loop $label1 - (if - (i32.ne - (local.tee $3 - (i32.load8_u - (local.get $0) - ) - ) - (local.tee $4 - (i32.load8_u - (local.get $1) - ) - ) - ) - (then - (return - (i32.sub - (local.get $3) - (local.get $4) - ) - ) - ) - ) - (if - (local.get $3) - (then - (local.set $2 - (i32.sub - (local.tee $3 - (local.get $2) - ) - (i32.const 1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label1 + (i8x16.all_true (local.get $3) ) ) ) + (local.set $2 + (i8x16.bitmask + (i8x16.eq + (local.get $3) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + ) + ) + ) + (i32.add + (i32.ctz + (local.get $2) + ) + (i32.sub + (local.get $1) + (local.get $0) + ) ) - (i32.const 0) ) (func $strchrnul (param $0 i32) (param $1 i32) (result i32) (local $2 v128) @@ -1777,12 +997,12 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $2) (local.tee $3 (i8x16.splat (local.get $1) ) ) + (local.get $2) ) ) ) @@ -1828,8 +1048,8 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $2) (local.get $3) + (local.get $2) ) ) ) @@ -1873,12 +1093,12 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $2) (local.tee $3 (i8x16.splat (local.get $1) ) ) + (local.get $2) ) ) ) @@ -1924,8 +1144,8 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $2) (local.get $3) + (local.get $2) ) ) ) @@ -1981,7 +1201,6 @@ (local $7 i32) (local $8 i32) (local $9 i32) - (local $scratch v128) (if (i32.eqz (local.tee $5 @@ -2241,14 +1460,14 @@ (i8x16.all_true (local.tee $3 (i8x16.eq - (v128.load - (local.get $6) - ) (local.tee $4 (i8x16.splat (local.get $5) ) ) + (v128.load + (local.get $6) + ) ) ) ) @@ -2277,27 +1496,25 @@ (br $block2) ) (loop $label2 + (local.set $3 + (v128.load offset=16 + (local.get $6) + ) + ) + (local.set $6 + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) (br_if $label2 (i8x16.all_true (local.tee $3 (i8x16.eq - (block (result v128) - (local.set $scratch - (v128.load offset=16 - (local.get $6) - ) - ) - (local.set $6 - (local.tee $1 - (i32.add - (local.get $6) - (i32.const 16) - ) - ) - ) - (local.get $scratch) - ) (local.get $4) + (local.get $3) ) ) ) @@ -2365,12 +1582,12 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $3) (local.tee $4 (i8x16.splat (local.get $6) ) ) + (local.get $3) ) ) ) @@ -2416,8 +1633,8 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $3) (local.get $4) + (local.get $3) ) ) ) @@ -2663,1525 +1880,6 @@ ) ) ) - (func $memmem (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) - (local $4 i32) - (local $5 i32) - (local $6 i32) - (local $7 i32) - (local $8 i32) - (local $9 i32) - (if - (i32.eqz - (local.get $3) - ) - (then - (return - (local.get $0) - ) - ) - ) - (block $block - (br_if $block - (i32.lt_u - (local.get $1) - (local.get $3) - ) - ) - (local.set $5 - (call $memchr - (local.get $0) - (i32.load8_s - (local.get $2) - ) - (local.get $1) - ) - ) - (if - (i32.eq - (local.get $3) - (i32.const 1) - ) - (then - (return - (local.get $5) - ) - ) - ) - (if - (i32.eqz - (local.get $5) - ) - (then - (return - (local.get $5) - ) - ) - ) - (br_if $block - (i32.lt_u - (local.tee $7 - (i32.add - (i32.sub - (local.get $0) - (local.get $5) - ) - (local.get $1) - ) - ) - (local.get $3) - ) - ) - (if - (i32.eqz - (i32.and - (i32.gt_u - (i32.sub - (local.get $7) - (local.get $3) - ) - (i32.const 15) - ) - (i32.ge_u - (local.get $3) - (i32.const 16) - ) - ) - ) - (then - (return - (call $__memmem - (local.get $5) - (local.get $7) - (local.get $2) - (local.get $3) - (i32.const 0) - ) - ) - ) - ) - (memory.fill - (i32.const 4112) - (select - (i32.const -1) - (local.get $3) - (local.tee $0 - (i32.gt_u - (local.get $3) - (i32.const 254) - ) - ) - ) - (i32.const 256) - ) - (block $block1 - (br_if $block1 - (i32.ge_u - (local.tee $6 - (select - (i32.sub - (local.get $3) - (i32.const 255) - ) - (i32.const 0) - (local.get $0) - ) - ) - (local.get $3) - ) - ) - (if - (local.tee $4 - (i32.and - (i32.sub - (local.get $3) - (local.tee $1 - (local.get $6) - ) - ) - (i32.const 3) - ) - ) - (then - (local.set $0 - (i32.add - (i32.xor - (local.get $1) - (i32.const -1) - ) - (local.get $3) - ) - ) - (loop $label - (i32.store8 - (i32.add - (i32.load8_u - (i32.add - (local.get $1) - (local.get $2) - ) - ) - (i32.const 4112) - ) - (local.get $0) - ) - (local.set $0 - (i32.sub - (local.get $0) - (i32.const 1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (br_if $label - (local.tee $4 - (i32.sub - (local.get $4) - (i32.const 1) - ) - ) - ) - ) - ) - ) - (br_if $block1 - (i32.gt_u - (i32.sub - (local.get $6) - (local.get $3) - ) - (i32.const -4) - ) - ) - (local.set $9 - (i32.sub - (i32.const 0) - (local.get $1) - ) - ) - (local.set $0 - (local.get $3) - ) - (local.set $6 - (local.get $2) - ) - (loop $label1 - (i32.store8 - (i32.add - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $1) - (local.get $6) - ) - ) - ) - (i32.const 4112) - ) - (i32.sub - (local.tee $8 - (i32.add - (local.get $0) - (local.get $9) - ) - ) - (i32.const 1) - ) - ) - (i32.store8 - (i32.add - (i32.load8_u - (i32.add - (local.get $4) - (i32.const 1) - ) - ) - (i32.const 4112) - ) - (i32.sub - (local.get $8) - (i32.const 2) - ) - ) - (i32.store8 - (i32.add - (i32.load8_u - (i32.add - (local.get $4) - (i32.const 2) - ) - ) - (i32.const 4112) - ) - (i32.sub - (local.get $8) - (i32.const 3) - ) - ) - (i32.store8 - (i32.add - (i32.load8_u - (i32.add - (local.get $4) - (i32.const 3) - ) - ) - (i32.const 4112) - ) - (i32.sub - (local.get $8) - (i32.const 4) - ) - ) - (local.set $6 - (i32.add - (local.get $6) - (i32.const 4) - ) - ) - (br_if $label1 - (i32.ne - (local.get $1) - (local.tee $0 - (i32.sub - (local.get $0) - (i32.const 4) - ) - ) - ) - ) - ) - ) - (local.set $4 - (call $__memmem - (local.get $5) - (local.get $7) - (local.get $2) - (local.get $3) - (i32.const 4112) - ) - ) - ) - (local.get $4) - ) - (func $__memmem (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) (result i32) - (local $5 i32) - (local $6 i32) - (local $7 i32) - (local $8 i32) - (local $9 i32) - (local $10 i32) - (local $11 i32) - (local $12 i32) - (local $13 i32) - (local $14 i32) - (local $15 i32) - (local $16 i32) - (local $17 v128) - (local $18 v128) - (local $19 v128) - (local $20 v128) - (local.set $5 - (i32.load8_u - (local.get $2) - ) - ) - (local.set $6 - (local.tee $11 - (i32.sub - (local.get $3) - (i32.const 1) - ) - ) - ) - (block $block - (loop $label - (br_if $block - (i32.ne - (local.get $5) - (local.tee $8 - (i32.load8_u - (i32.add - (local.get $2) - (local.get $6) - ) - ) - ) - ) - ) - (br_if $label - (local.tee $6 - (i32.sub - (local.get $6) - (i32.const 1) - ) - ) - ) - ) - (local.set $8 - (i32.load8_u - (i32.add - (local.get $2) - (local.get $11) - ) - ) - ) - (local.set $6 - (local.get $11) - ) - ) - (local.set $1 - (select - (i32.const -1) - (i32.sub - (local.get $1) - (local.get $3) - ) - (i32.eq - (local.get $1) - (i32.const -1) - ) - ) - ) - (block $block5 - (block $block1 - (if - (i32.gt_u - (local.get $0) - (local.tee $14 - (i32.sub - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (local.get $6) - ) - (i32.const 16) - ) - ) - ) - (then - (local.set $5 - (local.get $1) - ) - (br $block1) - ) - ) - (local.set $18 - (i8x16.splat - (local.get $8) - ) - ) - (local.set $19 - (i8x16.splat - (local.get $5) - ) - ) - (local.set $15 - (i32.add - (local.get $3) - (i32.const 15) - ) - ) - (local.set $16 - (i32.add - (local.get $2) - (i32.const 1) - ) - ) - (loop $label4 - (block $block2 - (br_if $block2 - (i32.eqz - (v128.any_true - (local.tee $20 - (v128.and - (i8x16.eq - (local.get $18) - (v128.load align=1 - (i32.add - (local.get $0) - (local.get $6) - ) - ) - ) - (i8x16.eq - (local.get $19) - (local.tee $17 - (v128.load align=1 - (local.get $0) - ) - ) - ) - ) - ) - ) - ) - ) - (local.set $5 - (i32.const -1) - ) - (br_if $block1 - (i32.and - (i32.eqz - (i8x16.all_true - (local.get $17) - ) - ) - (i32.eq - (local.get $1) - (i32.const -1) - ) - ) - ) - (br_if $block2 - (i32.eqz - (local.tee $7 - (i8x16.bitmask - (local.get $20) - ) - ) - ) - ) - (loop $label3 - (if - (i32.lt_u - (local.get $1) - (local.tee $5 - (i32.ctz - (local.get $7) - ) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (br_if $block5 - (i32.eqz - (block $block4 (result i32) - (local.set $9 - (i32.add - (local.tee $8 - (i32.add - (local.get $0) - (local.get $5) - ) - ) - (i32.const 1) - ) - ) - (local.set $5 - (local.get $16) - ) - (local.set $12 - (i32.const 0) - ) - (block $block3 - (if - (i32.ge_u - (local.tee $10 - (local.get $11) - ) - (i32.const 16) - ) - (then - (local.set $12 - (i32.const 1) - ) - (loop $label1 - (br_if $block3 - (v128.any_true - (v128.xor - (v128.load align=1 - (local.get $5) - ) - (v128.load align=1 - (local.get $9) - ) - ) - ) - ) - (local.set $5 - (i32.add - (local.get $5) - (local.tee $13 - (i32.add - (i32.and - (i32.sub - (local.get $10) - (i32.const 1) - ) - (i32.const 15) - ) - (i32.const 1) - ) - ) - ) - ) - (local.set $9 - (i32.add - (local.get $9) - (local.get $13) - ) - ) - (br_if $label1 - (local.tee $10 - (i32.sub - (local.get $10) - (local.get $13) - ) - ) - ) - ) - (br $block4 - (i32.const 0) - ) - ) - ) - (br_if $block3 - (i32.eqz - (local.get $10) - ) - ) - (loop $label2 - (drop - (br_if $block4 - (i32.const 1) - (i32.ne - (i32.load8_u - (local.get $9) - ) - (i32.load8_u - (local.get $5) - ) - ) - ) - ) - (local.set $5 - (i32.add - (local.get $5) - (i32.const 1) - ) - ) - (local.set $9 - (i32.add - (local.get $9) - (i32.const 1) - ) - ) - (br_if $label2 - (local.tee $10 - (i32.sub - (local.get $10) - (i32.const 1) - ) - ) - ) - ) - ) - (local.get $12) - ) - ) - ) - (br_if $label3 - (local.tee $7 - (i32.and - (i32.sub - (local.get $7) - (i32.const 1) - ) - (local.get $7) - ) - ) - ) - ) - ) - (block $block6 - (if - (i32.eq - (local.get $1) - (i32.const -1) - ) - (then - (if - (i8x16.all_true - (local.get $17) - ) - (then - (local.set $7 - (i32.const 16) - ) - (local.set $5 - (i32.const -1) - ) - (br $block6) - ) - ) - (return - (i32.const 0) - ) - ) - ) - (br_if $block6 - (i32.le_u - (local.tee $5 - (i32.sub - (local.get $1) - (local.tee $7 - (if (result i32) - (local.get $4) - (then - (i32.add - (i32.load8_u - (i32.add - (local.get $4) - (i32.load8_u - (i32.add - (local.get $0) - (local.get $15) - ) - ) - ) - ) - (i32.const 16) - ) - ) - (else - (i32.const 16) - ) - ) - ) - ) - ) - (local.get $1) - ) - ) - (return - (i32.const 0) - ) - ) - (local.set $1 - (local.get $5) - ) - (br_if $label4 - (i32.le_u - (local.tee $0 - (i32.add - (local.get $0) - (local.get $7) - ) - ) - (local.get $14) - ) - ) - ) - ) - (local.set $1 - (i32.const 0) - ) - (local.set $4 - (i32.ne - (local.get $5) - (i32.const -1) - ) - ) - (loop $label5 - (local.set $6 - (i32.const 0) - ) - (loop $label6 - (local.set $8 - (i32.const 0) - ) - (br_if $block5 - (i32.eqz - (i32.or - (local.get $4) - (local.tee $11 - (i32.load8_u - (i32.add - (local.get $0) - (local.get $6) - ) - ) - ) - ) - ) - ) - (if - (i32.ne - (local.get $11) - (i32.load8_u - (i32.add - (local.get $2) - (local.get $6) - ) - ) - ) - (then - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label5 - (i32.le_u - (local.tee $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.get $5) - ) - ) - (br $block5) - ) - ) - (br_if $label6 - (i32.ne - (local.get $3) - (local.tee $6 - (i32.add - (local.get $6) - (i32.const 1) - ) - ) - ) - ) - ) - ) - (local.set $8 - (local.get $0) - ) - ) - (local.get $8) - ) - (func $strstr (param $0 i32) (param $1 i32) (result i32) - (local $2 v128) - (local $3 v128) - (local $4 i32) - (local $5 i32) - (local $6 i32) - (block $block - (br_if $block - (i32.eqz - (local.tee $5 - (i32.load8_u - (local.get $1) - ) - ) - ) - ) - (block $block1 - (if - (v128.any_true - (local.tee $2 - (v128.or - (i8x16.eq - (local.tee $2 - (v128.load - (local.tee $4 - (i32.and - (local.get $0) - (i32.const -16) - ) - ) - ) - ) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (i8x16.eq - (local.get $2) - (local.tee $3 - (i8x16.splat - (local.get $5) - ) - ) - ) - ) - ) - ) - (then - (br_if $block1 - (local.tee $6 - (i32.and - (i8x16.bitmask - (local.get $2) - ) - (i32.shl - (i32.const -1) - (i32.and - (local.get $0) - (i32.const 15) - ) - ) - ) - ) - ) - ) - ) - (loop $label - (local.set $2 - (v128.load offset=16 - (local.get $4) - ) - ) - (local.set $4 - (i32.add - (local.get $4) - (i32.const 16) - ) - ) - (br_if $label - (i32.eqz - (v128.any_true - (local.tee $2 - (v128.or - (i8x16.eq - (local.get $2) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (i8x16.eq - (local.get $2) - (local.get $3) - ) - ) - ) - ) - ) - ) - ) - (local.set $6 - (i8x16.bitmask - (local.get $2) - ) - ) - ) - (local.set $0 - (i32.const 0) - ) - (br_if $block - (i32.ne - (local.get $5) - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $4) - (i32.ctz - (local.get $6) - ) - ) - ) - ) - ) - ) - (if - (i32.eqz - (i32.load8_u offset=1 - (local.get $1) - ) - ) - (then - (return - (local.get $4) - ) - ) - ) - (local.set $0 - (call $__memmem - (local.get $4) - (i32.const -1) - (local.get $1) - (call $strlen - (local.get $1) - ) - (i32.const 0) - ) - ) - ) - (local.get $0) - ) - (func $strcasestr (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) - (local $3 i32) - (local $4 i32) - (local $5 i32) - (local $6 i32) - (local $7 i32) - (local $8 i32) - (local $9 i32) - (local $10 v128) - (local $11 v128) - (local $12 v128) - (local $13 v128) - (local $14 v128) - (local $15 v128) - (if - (i32.eqz - (local.tee $2 - (i32.load8_u - (local.get $1) - ) - ) - ) - (then - (return - (local.get $0) - ) - ) - ) - (local.set $3 - (i32.extend8_s - (local.get $2) - ) - ) - (block $block - (br_if $block - (i32.eqz - (local.tee $6 - (i32.sub - (local.tee $7 - (call $strlen - (local.get $1) - ) - ) - (i32.const 1) - ) - ) - ) - ) - (local.set $5 - (local.get $6) - ) - (loop $label - (if - (i32.ne - (local.tee $4 - (i32.load8_u - (i32.add - (local.get $1) - (local.get $5) - ) - ) - ) - (local.get $2) - ) - (then - (local.set $2 - (local.get $4) - ) - (br $block) - ) - ) - (br_if $label - (local.tee $5 - (i32.sub - (local.get $5) - (i32.const 1) - ) - ) - ) - ) - (local.set $2 - (i32.load8_u - (i32.add - (local.get $1) - (local.get $6) - ) - ) - ) - (local.set $5 - (local.get $6) - ) - ) - (local.set $4 - (select - (i32.or - (local.get $3) - (i32.const 32) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.set $3 - (select - (i32.and - (local.get $3) - (i32.const 95) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 97) - ) - (i32.const 26) - ) - ) - ) - (local.set $8 - (select - (i32.or - (local.tee $2 - (i32.extend8_s - (local.get $2) - ) - ) - (i32.const 32) - ) - (local.get $2) - (i32.lt_u - (i32.sub - (local.get $2) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.set $2 - (select - (i32.and - (local.get $2) - (i32.const 95) - ) - (local.get $2) - (i32.lt_u - (i32.sub - (local.get $2) - (i32.const 97) - ) - (i32.const 26) - ) - ) - ) - (block $block3 - (block $block4 - (block $block1 - (br_if $block1 - (i32.lt_u - (local.tee $9 - (i32.sub - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (local.get $5) - ) - (i32.const 16) - ) - ) - (local.get $0) - ) - ) - (local.set $11 - (i8x16.splat - (local.get $4) - ) - ) - (local.set $12 - (i8x16.splat - (local.get $3) - ) - ) - (local.set $13 - (i8x16.splat - (local.get $8) - ) - ) - (local.set $14 - (i8x16.splat - (local.get $2) - ) - ) - (local.set $3 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (loop $label2 - (local.set $2 - (i8x16.all_true - (local.tee $10 - (v128.load align=1 - (local.get $0) - ) - ) - ) - ) - (block $block2 - (if - (v128.any_true - (local.tee $10 - (v128.and - (v128.or - (i8x16.eq - (local.get $13) - (local.tee $15 - (v128.load align=1 - (i32.add - (local.get $0) - (local.get $5) - ) - ) - ) - ) - (i8x16.eq - (local.get $14) - (local.get $15) - ) - ) - (v128.or - (i8x16.eq - (local.get $11) - (local.get $10) - ) - (i8x16.eq - (local.get $12) - (local.get $10) - ) - ) - ) - ) - ) - (then - (br_if $block1 - (i32.eqz - (local.get $2) - ) - ) - (br_if $block2 - (i32.eqz - (local.tee $2 - (i8x16.bitmask - (local.get $10) - ) - ) - ) - ) - (loop $label1 - (br_if $block3 - (i32.eqz - (call $strncasecmp - (i32.add - (local.tee $4 - (i32.add - (local.get $0) - (i32.ctz - (local.get $2) - ) - ) - ) - (i32.const 1) - ) - (local.get $3) - (local.get $6) - ) - ) - ) - (br_if $label1 - (local.tee $2 - (i32.and - (i32.sub - (local.get $2) - (i32.const 1) - ) - (local.get $2) - ) - ) - ) - ) - (br $block2) - ) - ) - (br_if $block4 - (i32.eqz - (local.get $2) - ) - ) - ) - (br_if $label2 - (i32.le_u - (local.tee $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (local.get $9) - ) - ) - ) - ) - (local.set $4 - (local.get $0) - ) - (local.set $2 - (i32.const 0) - ) - (loop $label3 - (br_if $block4 - (i32.eqz - (local.tee $0 - (i32.load8_s - (i32.add - (local.get $2) - (local.get $4) - ) - ) - ) - ) - ) - (if - (i32.eq - (select - (i32.or - (local.tee $5 - (i32.load8_s - (i32.add - (local.get $1) - (local.get $2) - ) - ) - ) - (i32.const 32) - ) - (local.get $5) - (i32.lt_u - (i32.sub - (local.get $5) - (i32.const 65) - ) - (i32.const 26) - ) - ) - (select - (i32.or - (local.get $0) - (i32.const 32) - ) - (local.get $0) - (i32.lt_u - (i32.sub - (local.get $0) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (then - (br_if $block3 - (i32.eq - (local.get $7) - (local.tee $2 - (i32.add - (local.get $2) - (i32.const 1) - ) - ) - ) - ) - ) - (else - (local.set $4 - (i32.add - (local.get $4) - (i32.const 1) - ) - ) - (local.set $2 - (i32.const 0) - ) - ) - ) - (br $label3) - ) - (unreachable) - ) - (local.set $4 - (i32.const 0) - ) - ) - (local.get $4) - ) - (func $memccpy (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) - (memory.copy - (local.get $0) - (local.get $1) - (select - (local.tee $1 - (i32.add - (i32.sub - (local.tee $2 - (call $memchr - (local.get $1) - (local.get $2) - (local.get $3) - ) - ) - (local.get $1) - ) - (i32.const 1) - ) - ) - (local.get $3) - (local.get $2) - ) - ) - (select - (i32.add - (local.get $0) - (local.get $1) - ) - (i32.const 0) - (local.get $2) - ) - ) - (func $strncat (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (local $3 i32) - (memory.copy - (local.tee $3 - (i32.add - (call $strlen - (local.get $0) - ) - (local.get $0) - ) - ) - (local.get $1) - (local.tee $1 - (call $strnlen - (local.get $1) - (local.get $2) - ) - ) - ) - (i32.store8 - (i32.add - (local.get $1) - (local.get $3) - ) - (i32.const 0) - ) - (local.get $0) - ) - (func $stpcpy (param $0 i32) (param $1 i32) (result i32) - (memory.copy - (local.get $0) - (local.get $1) - (i32.add - (local.tee $1 - (call $strlen - (local.get $1) - ) - ) - (i32.const 1) - ) - ) - (i32.add - (local.get $0) - (local.get $1) - ) - ) - (func $strcpy (param $0 i32) (param $1 i32) (result i32) - (memory.copy - (local.get $0) - (local.get $1) - (i32.add - (call $strlen - (local.get $1) - ) - (i32.const 1) - ) - ) - (local.get $0) - ) - (func $stpncpy (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (memory.copy - (local.get $0) - (local.get $1) - (local.tee $1 - (call $strnlen - (local.get $1) - (local.get $2) - ) - ) - ) - (memory.fill - (local.tee $0 - (i32.add - (local.get $0) - (local.get $1) - ) - ) - (i32.const 0) - (i32.sub - (local.get $2) - (local.get $1) - ) - ) - (local.get $0) - ) - (func $strncpy (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (memory.copy - (local.get $0) - (local.get $1) - (local.tee $1 - (call $strnlen - (local.get $1) - (local.get $2) - ) - ) - ) - (memory.fill - (i32.add - (local.get $0) - (local.get $1) - ) - (i32.const 0) - (i32.sub - (local.get $2) - (local.get $1) - ) - ) - (local.get $0) - ) - (func $strnlen (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) - (select - (i32.sub - (local.tee $2 - (call $memchr - (local.get $0) - (i32.const 0) - (local.get $1) - ) - ) - (local.get $0) - ) - (local.get $1) - (local.get $2) - ) - ) ;; features section: mutable-globals, nontrapping-float-to-int, simd, bulk-memory, sign-ext, reference-types, multivalue, bulk-memory-opt ) diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index 89bc538..c44190c 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -24,25 +24,18 @@ const ( ) var ( - memory []byte - module api.Module - memset api.Function - memcpy api.Function - memchr api.Function - memcmp api.Function - memmem api.Function - strlen api.Function - strchr api.Function - strcmp api.Function - strstr api.Function - strspn api.Function - strrchr api.Function - strncmp api.Function - strcspn api.Function - strcasecmp api.Function - strcasestr api.Function - strncasecmp api.Function - stack [8]uint64 + memory []byte + module api.Module + memset api.Function + memcpy api.Function + memchr api.Function + memcmp api.Function + strlen api.Function + strchr api.Function + strspn api.Function + strrchr api.Function + strcspn api.Function + stack [8]uint64 ) func call(fn api.Function, arg ...uint64) uint64 { @@ -68,18 +61,11 @@ func TestMain(m *testing.M) { memcpy = mod.ExportedFunction("memcpy") memchr = mod.ExportedFunction("memchr") memcmp = mod.ExportedFunction("memcmp") - memmem = mod.ExportedFunction("memmem") strlen = mod.ExportedFunction("strlen") strchr = mod.ExportedFunction("strchr") - strcmp = mod.ExportedFunction("strcmp") - strstr = mod.ExportedFunction("strstr") strspn = mod.ExportedFunction("strspn") strrchr = mod.ExportedFunction("strrchr") - strncmp = mod.ExportedFunction("strncmp") strcspn = mod.ExportedFunction("strcspn") - strcasecmp = mod.ExportedFunction("strcasecmp") - strcasestr = mod.ExportedFunction("strcasestr") - strncasecmp = mod.ExportedFunction("strncasecmp") memory, _ = mod.Memory().Read(0, mod.Memory().Size()) os.Exit(m.Run()) @@ -166,58 +152,6 @@ func Benchmark_memcmp(b *testing.B) { } } -func Benchmark_strcmp(b *testing.B) { - clear(memory) - fill(memory[ptr1:ptr1+size-1], 7) - fill(memory[ptr2:ptr2+size/2], 7) - fill(memory[ptr2+size/2:ptr2+size-1], 5) - - b.SetBytes(size/2 + 1) - b.ResetTimer() - for range b.N { - call(strcmp, ptr1, ptr2, size) - } -} - -func Benchmark_strncmp(b *testing.B) { - clear(memory) - fill(memory[ptr1:ptr1+size-1], 7) - fill(memory[ptr2:ptr2+size/2], 7) - fill(memory[ptr2+size/2:ptr2+size-1], 5) - - b.SetBytes(size/2 + 1) - b.ResetTimer() - for range b.N { - call(strncmp, ptr1, ptr2, size-1) - } -} - -func Benchmark_strcasecmp(b *testing.B) { - clear(memory) - fill(memory[ptr1:ptr1+size-1], 7) - fill(memory[ptr2:ptr2+size/2], 7) - fill(memory[ptr2+size/2:ptr2+size-1], 5) - - b.SetBytes(size/2 + 1) - b.ResetTimer() - for range b.N { - call(strcasecmp, ptr1, ptr2, size) - } -} - -func Benchmark_strncasecmp(b *testing.B) { - clear(memory) - fill(memory[ptr1:ptr1+size-1], 7) - fill(memory[ptr2:ptr2+size/2], 7) - fill(memory[ptr2+size/2:ptr2+size-1], 5) - - b.SetBytes(size/2 + 1) - b.ResetTimer() - for range b.N { - call(strncasecmp, ptr1, ptr2, size-1) - } -} - func Benchmark_strspn(b *testing.B) { clear(memory) fill(memory[ptr1:ptr1+size/2], 7) @@ -248,51 +182,6 @@ func Benchmark_strcspn(b *testing.B) { } } -//go:embed string.h -var source string - -func Benchmark_memmem(b *testing.B) { - needle := "memcpy(dest, src, slen)" - - clear(memory) - copy(memory[ptr1:], source) - copy(memory[ptr2:], needle) - - b.SetBytes(int64(len(source))) - b.ResetTimer() - for range b.N { - call(memmem, ptr1, uint64(len(source)), ptr2, uint64(len(needle))) - } -} - -func Benchmark_strstr(b *testing.B) { - needle := "memcpy(dest, src, slen)" - - clear(memory) - copy(memory[ptr1:], source) - copy(memory[ptr2:], needle) - - b.SetBytes(int64(len(source))) - b.ResetTimer() - for range b.N { - call(strstr, ptr1, ptr2) - } -} - -func Benchmark_strcasestr(b *testing.B) { - needle := "MEMCPY(dest, src, slen)" - - clear(memory) - copy(memory[ptr1:], source) - copy(memory[ptr2:], needle) - - b.SetBytes(int64(len(source))) - b.ResetTimer() - for range b.N { - call(strcasestr, ptr1, ptr2) - } -} - func Test_strlen(t *testing.T) { for length := range 64 { for alignment := range 24 { @@ -498,48 +387,6 @@ func Test_memcmp(t *testing.T) { } } -func Test_strcmp(t *testing.T) { - const s1 = compareTest1 - const s2 = compareTest2 - - ptr2 := len(memory) - len(s2) - 1 - - clear(memory) - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) - - for i := range len(s1) + 1 { - want := strings.Compare(term(s1[i:]), term(s2[i:])) - got := call(strcmp, uint64(ptr1+i), uint64(ptr2+i)) - if sign(int32(got)) != want { - t.Errorf("strcmp(%d, %d) = %d, want %d", - ptr1+i, ptr2+i, int32(got), want) - } - } -} - -func Test_strncmp(t *testing.T) { - const s1 = compareTest1 - const s2 = compareTest2 - - ptr2 := len(memory) - len(s2) - 1 - - clear(memory) - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) - - for i := range len(s1) + 1 { - for j := range len(s1) - i + 1 { - want := strings.Compare(term(s1[i:i+j]), term(s2[i:i+j])) - got := call(strncmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j)) - if sign(int32(got)) != want { - t.Errorf("strncmp(%d, %d, %d) = %d, want %d", - ptr1+i, ptr2+i, j, int32(got), want) - } - } - } -} - func Test_strspn(t *testing.T) { for length := range 64 { for pos := range length + 2 { @@ -782,102 +629,6 @@ var searchTests = []searchTest{ {"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5}, } -func Test_memmem(t *testing.T) { - tt := append(searchTests, - searchTest{"abcABCabc", "A", 3}, - searchTest{"fofofofofofo\x00foffofoobar", "foffof", 13}, - searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", 17}, - ) - - for i := range tt { - ptr1 := uint64(len(memory) - len(tt[i].haystk)) - - clear(memory) - copy(memory[ptr1:], tt[i].haystk) - copy(memory[ptr2:], tt[i].needle) - - var want uint64 - if tt[i].out >= 0 { - want = ptr1 + uint64(tt[i].out) - } - - got := call(memmem, - uint64(ptr1), uint64(len(tt[i].haystk)), - uint64(ptr2), uint64(len(tt[i].needle))) - if got != want { - t.Errorf("memmem(%q, %q) = %d, want %d", - tt[i].haystk, tt[i].needle, - uint32(got), uint32(want)) - } - } -} - -func Test_strstr(t *testing.T) { - tt := append(searchTests, - searchTest{"abcABCabc", "A", 3}, - searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, - searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, - ) - - for i := range tt { - ptr1 := uint64(len(memory) - len(tt[i].haystk) - 1) - - clear(memory) - copy(memory[ptr1:], tt[i].haystk) - copy(memory[ptr2:], tt[i].needle) - - var want uint64 - if tt[i].out >= 0 { - want = ptr1 + uint64(tt[i].out) - } - - got := call(strstr, uint64(ptr1), uint64(ptr2)) - if got != want { - t.Errorf("strstr(%q, %q) = %d, want %d", - tt[i].haystk, tt[i].needle, - uint32(got), uint32(want)) - } - } -} - -func Test_strcasestr(t *testing.T) { - tt := append(searchTests[1:], - searchTest{"A", "a", 0}, - searchTest{"a", "A", 0}, - searchTest{"Z", "z", 0}, - searchTest{"z", "Z", 0}, - searchTest{"@", "`", -1}, - searchTest{"`", "@", -1}, - searchTest{"[", "{", -1}, - searchTest{"{", "[", -1}, - searchTest{"abcABCabc", "A", 0}, - searchTest{"fofofofofofofoffofoobarfoo", "FoFFoF", 12}, - searchTest{"fofofofofofofOffOfoobarfoo", "FoFFoF", 12}, - searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, - searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, - ) - - for i := range tt { - ptr1 := uint64(len(memory) - len(tt[i].haystk) - 1) - - clear(memory) - copy(memory[ptr1:], tt[i].haystk) - copy(memory[ptr2:], tt[i].needle) - - var want uint64 - if tt[i].out >= 0 { - want = ptr1 + uint64(tt[i].out) - } - - got := call(strcasestr, uint64(ptr1), uint64(ptr2)) - if got != want { - t.Errorf("strcasestr(%q, %q) = %d, want %d", - tt[i].haystk, tt[i].needle, - uint32(got), uint32(want)) - } - } -} - func Fuzz_memchr(f *testing.F) { f.Fuzz(func(t *testing.T, s string, c, i byte) { if len(s) > 128 || int(i) > len(s) { @@ -971,120 +722,6 @@ func Fuzz_memcmp(f *testing.F) { }) } -func Fuzz_strcmp(f *testing.F) { - const s1 = compareTest1 - const s2 = compareTest2 - - for i := range len(compareTest1) + 1 { - f.Add(term(s1[i:]), term(s2[i:])) - } - - f.Fuzz(func(t *testing.T, s1, s2 string) { - if len(s1) > 128 || len(s2) > 128 { - t.SkipNow() - } - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) - memory[ptr1+len(s1)] = 0 - memory[ptr2+len(s2)] = 0 - - got := call(strcmp, uint64(ptr1), uint64(ptr2)) - want := strings.Compare(term(s1), term(s2)) - - if sign(int32(got)) != want { - t.Errorf("strcmp(%q, %q) = %d, want %d", - s1, s2, uint32(got), uint32(want)) - } - }) -} - -func Fuzz_strncmp(f *testing.F) { - const s1 = compareTest1 - const s2 = compareTest2 - - for i := range len(compareTest1) + 1 { - f.Add(term(s1[i:]), term(s2[i:]), byte(len(s1))) - } - - f.Fuzz(func(t *testing.T, s1, s2 string, n byte) { - if len(s1) > 128 || len(s2) > 128 { - t.SkipNow() - } - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) - memory[ptr1+len(s1)] = 0 - memory[ptr2+len(s2)] = 0 - - got := call(strncmp, uint64(ptr1), uint64(ptr2), uint64(n)) - want := bytes.Compare( - term(memory[ptr1:][:n]), - term(memory[ptr2:][:n])) - - if sign(int32(got)) != want { - t.Errorf("strncmp(%q, %q, %d) = %d, want %d", - s1, s2, n, uint32(got), uint32(want)) - } - }) -} - -func Fuzz_strcasecmp(f *testing.F) { - const s1 = compareTest1 - const s2 = compareTest2 - - for i := range len(compareTest1) + 1 { - f.Add(term(s1[i:]), term(s2[i:])) - } - - f.Fuzz(func(t *testing.T, s1, s2 string) { - if len(s1) > 128 || len(s2) > 128 { - t.SkipNow() - } - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) - memory[ptr1+len(s1)] = 0 - memory[ptr2+len(s2)] = 0 - - got := call(strcasecmp, uint64(ptr1), uint64(ptr2)) - want := bytes.Compare( - lower(term(memory[ptr1:])), - lower(term(memory[ptr2:]))) - - if sign(int32(got)) != want { - t.Errorf("strcasecmp(%q, %q) = %d, want %d", - s1, s2, uint32(got), uint32(want)) - } - }) -} - -func Fuzz_strncasecmp(f *testing.F) { - const s1 = compareTest1 - const s2 = compareTest2 - - for i := range len(compareTest1) + 1 { - f.Add(term(s1[i:]), term(s2[i:]), byte(len(s1))) - } - - f.Fuzz(func(t *testing.T, s1, s2 string, n byte) { - if len(s1) > 128 || len(s2) > 128 { - t.SkipNow() - } - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) - memory[ptr1+len(s1)] = 0 - memory[ptr2+len(s2)] = 0 - - got := call(strncasecmp, uint64(ptr1), uint64(ptr2), uint64(n)) - want := bytes.Compare( - lower(term(memory[ptr1:][:n])), - lower(term(memory[ptr2:][:n]))) - - if sign(int32(got)) != want { - t.Errorf("strncasecmp(%q, %q, %d) = %d, want %d", - s1, s2, n, uint32(got), uint32(want)) - } - }) -} - func Fuzz_strspn(f *testing.F) { for _, t := range searchTests { f.Add(t.haystk, t.needle) @@ -1155,129 +792,6 @@ func Fuzz_strcspn(f *testing.F) { }) } -func Fuzz_memmem(f *testing.F) { - tt := append(searchTests, - searchTest{"abcABCabc", "A", 3}, - searchTest{"fofofofofofo\x00foffofoobar", "foffof", 13}, - searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", 17}, - ) - - for _, t := range tt { - f.Add(t.haystk, t.needle) - } - - f.Fuzz(func(t *testing.T, haystk, needle string) { - if len(haystk) > 128 || len(needle) > 128 { - t.SkipNow() - } - copy(memory[ptr1:], haystk) - copy(memory[ptr2:], needle) - - got := call(memmem, - uint64(ptr1), uint64(len(haystk)), - uint64(ptr2), uint64(len(needle))) - - want := strings.Index(haystk, needle) - if want >= 0 { - want = ptr1 + want - } else { - want = 0 - } - - if uint32(got) != uint32(want) { - t.Errorf("memmem(%q, %q) = %d, want %d", - haystk, needle, uint32(got), uint32(want)) - } - }) -} - -func Fuzz_strstr(f *testing.F) { - tt := append(searchTests, - searchTest{"abcABCabc", "A", 3}, - searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, - searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, - ) - - for _, t := range tt { - f.Add(t.haystk, t.needle) - } - - f.Fuzz(func(t *testing.T, haystk, needle string) { - if len(haystk) > 128 || len(needle) > 128 { - t.SkipNow() - } - copy(memory[ptr1:], haystk) - copy(memory[ptr2:], needle) - memory[ptr1+len(haystk)] = 0 - memory[ptr2+len(needle)] = 0 - - got := call(strstr, uint64(ptr1), uint64(ptr2)) - - want := strings.Index(term(haystk), term(needle)) - if want >= 0 { - want = ptr1 + want - } else { - want = 0 - } - - if uint32(got) != uint32(want) { - t.Errorf("strstr(%q, %q) = %d, want %d", - haystk, needle, uint32(got), uint32(want)) - } - }) -} - -func Fuzz_strcasestr(f *testing.F) { - tt := append(searchTests, - searchTest{"A", "a", 0}, - searchTest{"a", "A", 0}, - searchTest{"Z", "z", 0}, - searchTest{"z", "Z", 0}, - searchTest{"@", "`", -1}, - searchTest{"`", "@", -1}, - searchTest{"[", "{", -1}, - searchTest{"{", "[", -1}, - searchTest{"abcABCabc", "A", 0}, - searchTest{"fofofofofofofoffofoobarfoo", "FoFFoF", 12}, - searchTest{"fofofofofofofOffOfoobarfoo", "FoFFoF", 12}, - searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, - searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, - ) - - for _, t := range tt { - f.Add(t.haystk, t.needle) - } - - f.Fuzz(func(t *testing.T, haystk, needle string) { - if len(haystk) > 128 || len(needle) > 128 { - t.SkipNow() - } - if len(needle) == 0 { - t.Skip("musl bug") - } - copy(memory[ptr1:], haystk) - copy(memory[ptr2:], needle) - memory[ptr1+len(haystk)] = 0 - memory[ptr2+len(needle)] = 0 - - got := call(strcasestr, uint64(ptr1), uint64(ptr2)) - - want := bytes.Index( - lower(term(memory[ptr1:])), - lower(term(memory[ptr2:]))) - if want >= 0 { - want = ptr1 + want - } else { - want = 0 - } - - if uint32(got) != uint32(want) { - t.Errorf("strcasestr(%q, %q) = %d, want %d", - haystk, needle, uint32(got), uint32(want)) - } - }) -} - func sign(x int32) int { switch { case x > 0: @@ -1295,15 +809,6 @@ func fill(s []byte, v byte) { } } -func lower(s []byte) []byte { - for i, c := range s { - if 'A' <= c && c <= 'Z' { - s[i] = c - 'A' + 'a' - } - } - return s -} - func term[T interface{ []byte | string }](s T) T { for i, c := range []byte(s) { if c == 0 { diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 8758393..2d0981c 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -19,17 +19,17 @@ extern "C" { // Clang will intrinsify using SIMD for small, constant N. // For everything else, this helps inlining. -__attribute__((weak)) +__attribute__((weak, always_inline)) void *memset(void *dest, int c, size_t n) { return __builtin_memset(dest, c, n); } -__attribute__((weak)) +__attribute__((weak, always_inline)) void *memcpy(void *__restrict dest, const void *__restrict src, size_t n) { return __builtin_memcpy(dest, src, n); } -__attribute__((weak)) +__attribute__((weak, always_inline)) void *memmove(void *dest, const void *src, size_t n) { return __builtin_memmove(dest, src, n); } @@ -80,7 +80,7 @@ int memcmp(const void *vl, const void *vr, size_t n) { return 0; } -__attribute__((weak)) +__attribute__((weak, noinline)) void *memchr(const void *s, int c, size_t n) { // When n is zero, a function that locates a character finds no occurrence. // Otherwise, decrement n to ensure sub_overflow overflows @@ -126,7 +126,7 @@ void *memchr(const void *s, int c, size_t n) { } } -__attribute__((weak)) +__attribute__((weak, noinline)) void *memrchr(const void *s, int c, size_t n) { // memrchr is allowed to read up to n bytes from the object. // Search backward for the last matching character. @@ -150,7 +150,7 @@ void *memrchr(const void *s, int c, size_t n) { return NULL; } -__attribute__((weak)) +__attribute__((weak, noinline)) size_t strlen(const char *s) { // strlen must stop as soon as it finds the terminator. // Aligning ensures loads beyond the terminator are safe. @@ -180,93 +180,6 @@ size_t strlen(const char *s) { } } -static int __strcmp_s(const char *s1, const char *s2) { - // Scalar algorithm. - const unsigned char *u1 = (unsigned char *)s1; - const unsigned char *u2 = (unsigned char *)s2; - for (;;) { - if (*u1 != *u2) return *u1 - *u2; - if (*u1 == 0) break; - u1++; - u2++; - } - return 0; -} - -static int __strcmp(const char *s1, const char *s2) { - // How many bytes can be read before pointers go out of bounds. - size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // - (size_t)(s1 > s2 ? s1 : s2); - - // Unaligned loads handle the case where the strings - // have mismatching alignments. - const v128_t *w1 = (v128_t *)s1; - const v128_t *w2 = (v128_t *)s2; - for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) { - // Find any single bit difference. - if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { - // The terminator may come before the difference. - break; - } - // We know all characters are equal. - // If any is a terminator the strings are equal. - if (!wasm_i8x16_all_true(wasm_v128_load(w1))) { - return 0; - } - w1++; - w2++; - } - - return __strcmp_s((char *)w1, (char *)w2); -} - -__attribute__((weak, always_inline)) -int strcmp(const char *s1, const char *s2) { - // Skip the vector search when comparing against small literal strings. - if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) { - return __strcmp_s(s1, s2); - } - return __strcmp(s1, s2); -} - -__attribute__((weak)) -int strncmp(const char *s1, const char *s2, size_t n) { - // How many bytes can be read before pointers go out of bounds. - size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // - (size_t)(s1 > s2 ? s1 : s2); - if (n > N) n = N; - - // Unaligned loads handle the case where the strings - // have mismatching alignments. - const v128_t *w1 = (v128_t *)s1; - const v128_t *w2 = (v128_t *)s2; - for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { - // Find any single bit difference. - if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { - // The terminator may come before the difference. - break; - } - // We know all characters are equal. - // If any is a terminator the strings are equal. - if (!wasm_i8x16_all_true(wasm_v128_load(w1))) { - return 0; - } - w1++; - w2++; - } - - // Scalar algorithm. - const unsigned char *u1 = (unsigned char *)w1; - const unsigned char *u2 = (unsigned char *)w2; - while (n--) { - if (*u1 != *u2) return *u1 - *u2; - if (*u1 == 0) break; - u1++; - u2++; - } - return 0; -} - static char *__strchrnul(const char *s, int c) { // strchrnul must stop as soon as it finds the terminator. // Aligning ensures loads beyond the terminator are safe. @@ -371,7 +284,7 @@ static v128_t __wasm_v128_chkbits(__wasm_v128_bitmap256_t bitmap, v128_t v) { #undef wasm_i8x16_relaxed_swizzle -__attribute__((weak)) +__attribute__((weak, noinline)) size_t strspn(const char *s, const char *c) { // strspn must stop as soon as it finds the terminator. // Aligning ensures loads beyond the terminator are safe. @@ -433,7 +346,7 @@ size_t strspn(const char *s, const char *c) { } } -__attribute__((weak)) +__attribute__((weak, noinline)) size_t strcspn(const char *s, const char *c) { if (!c[0] || !c[1]) return __strchrnul(s, *c) - s; @@ -472,215 +385,6 @@ size_t strcspn(const char *s, const char *c) { } } -// SIMD-friendly algorithms for substring searching -// http://0x80.pl/notesen/2016-11-28-simd-strfind.html - -// For haystacks of known length and large enough needles, -// Boyer-Moore's bad-character rule may be useful, -// as proposed by Horspool, Sunday and Raita. -// -// We augment the SIMD algorithm with Quick Search's -// bad-character shift. -// -// https://igm.univ-mlv.fr/~lecroq/string/node14.html -// https://igm.univ-mlv.fr/~lecroq/string/node18.html -// https://igm.univ-mlv.fr/~lecroq/string/node19.html -// https://igm.univ-mlv.fr/~lecroq/string/node22.html - -static const char *__memmem(const char *haystk, size_t sh, // - const char *needle, size_t sn, // - uint8_t bmbc[256]) { - // We've handled empty and single character needles. - // The needle is not longer than the haystack. - __builtin_assume(2 <= sn && sn <= sh); - - // Find the farthest character not equal to the first one. - size_t i = sn - 1; - while (i > 0 && needle[0] == needle[i]) i--; - if (i == 0) i = sn - 1; - - // Subtracting ensures sub_overflow overflows - // when we reach the end of the haystack. - if (sh != SIZE_MAX) sh -= sn; - - const v128_t fst = wasm_i8x16_splat(needle[0]); - const v128_t lst = wasm_i8x16_splat(needle[i]); - - // The last haystack offset for which loading blk_lst is safe. - const char *H = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE - // - (sizeof(v128_t) + i)); - - while (haystk <= H) { - const v128_t blk_fst = wasm_v128_load((v128_t *)(haystk)); - const v128_t blk_lst = wasm_v128_load((v128_t *)(haystk + i)); - const v128_t eq_fst = wasm_i8x16_eq(fst, blk_fst); - const v128_t eq_lst = wasm_i8x16_eq(lst, blk_lst); - - const v128_t cmp = eq_fst & eq_lst; - if (wasm_v128_any_true(cmp)) { - // The terminator may come before the match. - if (sh == SIZE_MAX && !wasm_i8x16_all_true(blk_fst)) break; - // Find the offset of the first one bit (little-endian). - // Each iteration clears that bit, tries again. - for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) { - size_t ctz = __builtin_ctz(mask); - // The match may be after the end of the haystack. - if (ctz > sh) return NULL; - // We know the first character matches. - if (!bcmp(haystk + ctz + 1, needle + 1, sn - 1)) { - return haystk + ctz; - } - } - } - - size_t skip = sizeof(v128_t); - if (sh == SIZE_MAX) { - // Have we reached the end of the haystack? - if (!wasm_i8x16_all_true(blk_fst)) return NULL; - } else { - // Apply the bad-character rule to the character to the right - // of the righmost character of the search window. - if (bmbc) skip += bmbc[(unsigned char)haystk[sn - 1 + sizeof(v128_t)]]; - // Have we reached the end of the haystack? - if (__builtin_sub_overflow(sh, skip, &sh)) return NULL; - } - haystk += skip; - } - - // Scalar algorithm. - for (size_t j = 0; j <= sh; j++) { - for (size_t i = 0;; i++) { - if (sn == i) return haystk; - if (sh == SIZE_MAX && !haystk[i]) return NULL; - if (needle[i] != haystk[i]) break; - } - haystk++; - } - return NULL; -} - -__attribute__((weak)) -void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) { - // Return immediately on empty needle. - if (sn == 0) return (void *)vh; - - // Return immediately when needle is longer than haystack. - if (sn > sh) return NULL; - - // Skip to the first matching character using memchr, - // thereby handling single character needles. - const char *needle = (char *)vn; - const char *haystk = (char *)memchr(vh, *needle, sh); - if (!haystk || sn == 1) return (void *)haystk; - - // The haystack got shorter, is the needle now longer than it? - sh -= haystk - (char *)vh; - if (sn > sh) return NULL; - - // Is Boyer-Moore's bad-character rule useful? - if (sn < sizeof(v128_t) || sh - sn < sizeof(v128_t)) { - return (void *)__memmem(haystk, sh, needle, sn, NULL); - } - - // Compute Boyer-Moore's bad-character shift function. - // Only the last 255 characters of the needle matter for shifts up to 255, - // which is good enough for most needles. - size_t c = sn; - size_t i = 0; - if (c >= 255) { - i = sn - 255; - c = 255; - } - -#ifndef _REENTRANT - static -#endif - uint8_t bmbc[256]; - memset(bmbc, c, sizeof(bmbc)); - for (; i < sn; i++) { - // One less than the usual offset because - // we advance at least one vector at a time. - bmbc[(unsigned char)needle[i]] = sn - i - 1; - } - - return (void *)__memmem(haystk, sh, needle, sn, bmbc); -} - -__attribute__((weak)) -char *strstr(const char *haystk, const char *needle) { - // Return immediately on empty needle. - if (!needle[0]) return (char *)haystk; - - // Skip to the first matching character using strchr, - // thereby handling single character needles. - haystk = strchr(haystk, *needle); - if (!haystk || !needle[1]) return (char *)haystk; - - return (char *)__memmem(haystk, SIZE_MAX, needle, strlen(needle), NULL); -} - -__attribute__((weak)) -char *strcasestr(const char *haystk, const char *needle) { - // Return immediately on empty needle. - if (!needle[0]) return (char *)haystk; - - // We've handled empty needles. - size_t sn = strlen(needle); - __builtin_assume(sn >= 1); - - // Find the farthest character not equal to the first one. - size_t i = sn - 1; - while (i > 0 && needle[0] == needle[i]) i--; - if (i == 0) i = sn - 1; - - const v128_t fstl = wasm_i8x16_splat(tolower(needle[0])); - const v128_t fstu = wasm_i8x16_splat(toupper(needle[0])); - const v128_t lstl = wasm_i8x16_splat(tolower(needle[i])); - const v128_t lstu = wasm_i8x16_splat(toupper(needle[i])); - - // The last haystk offset for which loading blk_lst is safe. - const char *H = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE - // - (sizeof(v128_t) + i)); - - while (haystk <= H) { - const v128_t blk_fst = wasm_v128_load((v128_t *)(haystk)); - const v128_t blk_lst = wasm_v128_load((v128_t *)(haystk + i)); - const v128_t eq_fst = - wasm_i8x16_eq(fstl, blk_fst) | wasm_i8x16_eq(fstu, blk_fst); - const v128_t eq_lst = - wasm_i8x16_eq(lstl, blk_lst) | wasm_i8x16_eq(lstu, blk_lst); - - const v128_t cmp = eq_fst & eq_lst; - if (wasm_v128_any_true(cmp)) { - // The terminator may come before the match. - if (!wasm_i8x16_all_true(blk_fst)) break; - // Find the offset of the first one bit (little-endian). - // Each iteration clears that bit, tries again. - for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) { - size_t ctz = __builtin_ctz(mask); - if (!strncasecmp(haystk + ctz + 1, needle + 1, sn - 1)) { - return (char *)haystk + ctz; - } - } - } - - // Have we reached the end of the haystack? - if (!wasm_i8x16_all_true(blk_fst)) return NULL; - haystk += sizeof(v128_t); - } - - // Scalar algorithm. - for (;;) { - for (size_t i = 0;; i++) { - if (sn == i) return (char *)haystk; - if (!haystk[i]) return NULL; - if (tolower(needle[i]) != tolower(haystk[i])) break; - } - haystk++; - } - return NULL; -} - // Given the above SIMD implementations, // these are best implemented as // small wrappers over those functions. diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h deleted file mode 100644 index 4f44eda..0000000 --- a/sqlite3/libc/strings.h +++ /dev/null @@ -1,172 +0,0 @@ -#include_next // the system strings.h - -#ifndef _WASM_SIMD128_STRINGS_H -#define _WASM_SIMD128_STRINGS_H - -#include -#include -#include -#include <__macro_PAGESIZE.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef __wasm_simd128__ - -#ifdef __OPTIMIZE_SIZE__ - -// bcmp is the same as memcmp but only compares for equality. -int bcmp(const void *v1, const void *v2, size_t n); - -#else // __OPTIMIZE_SIZE__ - -__attribute__((weak)) -int bcmp(const void *v1, const void *v2, size_t n) { - // Scalar algorithm. - if (n < sizeof(v128_t)) { - const unsigned char *u1 = (unsigned char *)v1; - const unsigned char *u2 = (unsigned char *)v2; - while (n--) { - if (*u1 != *u2) return 1; - u1++; - u2++; - } - return 0; - } - - // bcmp is allowed to read up to n bytes from each object. - // Unaligned loads handle the case where the objects - // have mismatching alignments. - const v128_t *w1 = (v128_t *)v1; - const v128_t *w2 = (v128_t *)v2; - while (n) { - // Find any single bit difference. - if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { - return 1; - } - // This makes n a multiple of sizeof(v128_t) - // for every iteration except the first. - size_t align = (n - 1) % sizeof(v128_t) + 1; - w1 = (v128_t *)((char *)w1 + align); - w2 = (v128_t *)((char *)w2 + align); - n -= align; - } - return 0; -} - -#endif // __OPTIMIZE_SIZE__ - -__attribute__((always_inline)) -static v128_t __tolower8x16(v128_t v) { - __i8x16 i = v; - i = i + wasm_i8x16_splat(INT8_MAX - ('Z')); - i = i > wasm_i8x16_splat(INT8_MAX - ('Z' - 'A' + 1)); - i = i & wasm_i8x16_splat('a' - 'A'); - return v | i; -} - -static int __strcasecmp_s(const char *s1, const char *s2) { - // Scalar algorithm. - const unsigned char *u1 = (unsigned char *)s1; - const unsigned char *u2 = (unsigned char *)s2; - for (;;) { - int c1 = tolower(*u1); - int c2 = tolower(*u2); - if (c1 != c2) return c1 - c2; - if (c1 == 0) break; - u1++; - u2++; - } - return 0; -} - -static int __strcasecmp(const char *s1, const char *s2) { - // How many bytes can be read before pointers go out of bounds. - size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // - (size_t)(s1 > s2 ? s1 : s2); - - // Unaligned loads handle the case where the strings - // have mismatching alignments. - const v128_t *w1 = (v128_t *)s1; - const v128_t *w2 = (v128_t *)s2; - for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) { - v128_t v1 = __tolower8x16(wasm_v128_load(w1)); - v128_t v2 = __tolower8x16(wasm_v128_load(w2)); - - // Find any single bit difference. - if (wasm_v128_any_true(v1 ^ v2)) { - // The terminator may come before the difference. - break; - } - // We know all characters are equal. - // If any is a terminator the strings are equal. - if (!wasm_i8x16_all_true(v1)) { - return 0; - } - w1++; - w2++; - } - - return __strcasecmp_s((char *)w1, (char *)w2); -} - -__attribute__((weak)) -int strcasecmp(const char *s1, const char *s2) { - // Skip the vector search when comparing against small literal strings. - if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) { - return __strcasecmp_s(s1, s2); - } - return __strcasecmp(s1, s2); -} - -__attribute__((weak)) -int strncasecmp(const char *s1, const char *s2, size_t n) { - // How many bytes can be read before pointers go out of bounds. - size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // - (size_t)(s1 > s2 ? s1 : s2); - if (n > N) n = N; - - // Unaligned loads handle the case where the strings - // have mismatching alignments. - const v128_t *w1 = (v128_t *)s1; - const v128_t *w2 = (v128_t *)s2; - for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { - v128_t v1 = __tolower8x16(wasm_v128_load(w1)); - v128_t v2 = __tolower8x16(wasm_v128_load(w2)); - - // Find any single bit difference. - if (wasm_v128_any_true(v1 ^ v2)) { - // The terminator may come before the difference. - break; - } - // We know all characters are equal. - // If any is a terminator the strings are equal. - if (!wasm_i8x16_all_true(v1)) { - return 0; - } - w1++; - w2++; - } - - // Scalar algorithm. - const unsigned char *u1 = (unsigned char *)w1; - const unsigned char *u2 = (unsigned char *)w2; - while (n--) { - int c1 = tolower(*u1); - int c2 = tolower(*u2); - if (c1 != c2) return c1 - c2; - if (c1 == 0) break; - u1++; - u2++; - } - return 0; -} - -#endif // __wasm_simd128__ - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // _WASM_SIMD128_STRINGS_H \ No newline at end of file diff --git a/sqlite3/strcasecmp.patch b/sqlite3/strcasecmp.patch deleted file mode 100644 index d24d078..0000000 --- a/sqlite3/strcasecmp.patch +++ /dev/null @@ -1,41 +0,0 @@ -# Use strcasecmp and strncasecmp. ---- sqlite3.c.orig -+++ sqlite3.c -@@ -35685,35 +35685,15 @@ - return sqlite3StrICmp(zLeft, zRight); - } - SQLITE_PRIVATE int sqlite3StrICmp(const char *zLeft, const char *zRight){ -- unsigned char *a, *b; -- int c, x; -- a = (unsigned char *)zLeft; -- b = (unsigned char *)zRight; -- for(;;){ -- c = *a; -- x = *b; -- if( c==x ){ -- if( c==0 ) break; -- }else{ -- c = (int)UpperToLower[c] - (int)UpperToLower[x]; -- if( c ) break; -- } -- a++; -- b++; -- } -- return c; -+ return strcasecmp(zLeft, zRight); - } - SQLITE_API int sqlite3_strnicmp(const char *zLeft, const char *zRight, int N){ -- register unsigned char *a, *b; - if( zLeft==0 ){ - return zRight ? -1 : 0; - }else if( zRight==0 ){ - return 1; - } -- a = (unsigned char *)zLeft; -- b = (unsigned char *)zRight; -- while( N-- > 0 && *a!=0 && UpperToLower[*a]==UpperToLower[*b]){ a++; b++; } -- return N<0 ? 0 : UpperToLower[*a] - UpperToLower[*b]; -+ return strncasecmp(zLeft, zRight, N); - } - - /* diff --git a/vfs/tests/mptest/wasm/mptest.wasm b/vfs/tests/mptest/wasm/mptest.wasm index d6689a9..8918876 100644 Binary files a/vfs/tests/mptest/wasm/mptest.wasm and b/vfs/tests/mptest/wasm/mptest.wasm differ diff --git a/vfs/tests/speedtest1/wasm/speedtest1.wasm b/vfs/tests/speedtest1/wasm/speedtest1.wasm index aa98811..ff840a4 100644 Binary files a/vfs/tests/speedtest1/wasm/speedtest1.wasm and b/vfs/tests/speedtest1/wasm/speedtest1.wasm differ