diff --git a/embed/bcw2/bcw2.wasm b/embed/bcw2/bcw2.wasm index 837fa85..33157b2 100755 Binary files a/embed/bcw2/bcw2.wasm and b/embed/bcw2/bcw2.wasm differ diff --git a/embed/bcw2/build.sh b/embed/bcw2/build.sh index 16b7808..b96ac1a 100755 --- a/embed/bcw2/build.sh +++ b/embed/bcw2/build.sh @@ -43,8 +43,8 @@ cd ~- "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ -Wall -Wextra -Wno-unused-parameter -Wno-unused-function \ - -o bcw2.wasm "build/main.c" \ - -I"build" \ + -o bcw2.wasm build/main.c \ + -I"$ROOT/sqlite3/libc" -I"build" \ -mexec-model=reactor \ -msimd128 -mmutable-globals -mmultivalue \ -mbulk-memory -mreference-types \ diff --git a/embed/sqlite3.wasm b/embed/sqlite3.wasm index 88ebe99..b46a351 100755 Binary files a/embed/sqlite3.wasm and b/embed/sqlite3.wasm differ diff --git a/go.mod b/go.mod index 3417b16..24a494e 100644 --- a/go.mod +++ b/go.mod @@ -8,16 +8,16 @@ require ( github.com/ncruces/julianday v1.0.0 github.com/ncruces/sort v0.1.5 github.com/tetratelabs/wazero v1.9.0 - golang.org/x/crypto v0.37.0 - golang.org/x/sys v0.32.0 + golang.org/x/crypto v0.38.0 + golang.org/x/sys v0.33.0 ) require ( github.com/dchest/siphash v1.2.3 // ext/bloom github.com/google/uuid v1.6.0 // ext/uuid github.com/psanford/httpreadat v0.1.0 // example - golang.org/x/sync v0.13.0 // test - golang.org/x/text v0.24.0 // ext/unicode + golang.org/x/sync v0.14.0 // test + golang.org/x/text v0.25.0 // ext/unicode lukechampine.com/adiantum v1.1.1 // vfs/adiantum ) diff --git a/go.sum b/go.sum index 55dc895..d901f5d 100644 --- a/go.sum +++ b/go.sum @@ -10,13 +10,13 @@ github.com/psanford/httpreadat v0.1.0 h1:VleW1HS2zO7/4c7c7zNl33fO6oYACSagjJIyMIw github.com/psanford/httpreadat v0.1.0/go.mod h1:Zg7P+TlBm3bYbyHTKv/EdtSJZn3qwbPwpfZ/I9GKCRE= github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I= github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM= -golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= -golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= -golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= -golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= -golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= +golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= +golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= +golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= lukechampine.com/adiantum v1.1.1 h1:4fp6gTxWCqpEbLy40ExiYDDED3oUNWx5cTqBCtPdZqA= lukechampine.com/adiantum v1.1.1/go.mod h1:LrAYVnTYLnUtE/yMp5bQr0HstAf060YUF8nM0B6+rUw= diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index fd4661b..cfb717c 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -27,16 +27,24 @@ EOF -Wl,--stack-first \ -Wl,--import-undefined \ -Wl,--initial-memory=16777216 \ + -Wl,--export=memccpy \ -Wl,--export=memchr \ -Wl,--export=memcmp \ -Wl,--export=memcpy \ + -Wl,--export=memmove \ + -Wl,--export=memrchr \ -Wl,--export=memset \ + -Wl,--export=stpcpy \ + -Wl,--export=stpncpy \ -Wl,--export=strchr \ -Wl,--export=strchrnul \ -Wl,--export=strcmp \ + -Wl,--export=strcpy \ -Wl,--export=strcspn \ -Wl,--export=strlen \ + -Wl,--export=strncat \ -Wl,--export=strncmp \ + -Wl,--export=strncpy \ -Wl,--export=strrchr \ -Wl,--export=strspn \ -Wl,--export=qsort diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 1efb229..be51968 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index e8e5d8f..d988fcd 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -2,15 +2,18 @@ (type $0 (func (param i32 i32) (result i32))) (type $1 (func (param i32 i32 i32) (result i32))) (type $2 (func (param i32) (result i32))) - (type $3 (func (param i32 i32 i32 i32))) + (type $3 (func (param i32 i32 i32 i32) (result i32))) + (type $4 (func (param i32 i32 i32 i32))) (memory $0 256) (data $0 (i32.const 1024) "\01") (table $0 1 1 funcref) (export "memory" (memory $0)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) + (export "memmove" (func $memcpy)) (export "memcmp" (func $memcmp)) (export "memchr" (func $memchr)) + (export "memrchr" (func $memrchr)) (export "strlen" (func $strlen)) (export "strcmp" (func $strcmp)) (export "strncmp" (func $strncmp)) @@ -19,6 +22,12 @@ (export "strrchr" (func $strrchr)) (export "strspn" (func $strspn)) (export "strcspn" (func $strcspn)) + (export "memccpy" (func $memccpy)) + (export "strncat" (func $strncat)) + (export "stpcpy" (func $stpcpy)) + (export "strcpy" (func $strcpy)) + (export "stpncpy" (func $stpncpy)) + (export "strncpy" (func $strncpy)) (export "qsort" (func $qsort)) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (memory.fill @@ -37,130 +46,142 @@ (local.get $0) ) (func $memcmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (local $3 v128) + (local $3 i32) (local $4 i32) - (local $5 i32) + (local $5 v128) (block $block - (br_if $block - (i32.lt_u + (if + (i32.ge_u (local.get $2) (i32.const 16) ) - ) - (loop $label - (if - (i8x16.all_true - (local.tee $3 - (i8x16.eq - (v128.load align=1 - (local.get $0) + (then + (loop $label + (if + (i32.eqz + (i8x16.all_true + (local.tee $5 + (i8x16.eq + (v128.load align=1 + (local.get $0) + ) + (v128.load align=1 + (local.get $1) + ) + ) + ) ) - (v128.load align=1 - (local.get $1) + ) + (then + (return + (i32.sub + (i32.load8_u + (i32.add + (local.get $0) + (local.tee $2 + (i32.ctz + (i32.xor + (i8x16.bitmask + (local.get $5) + ) + (i32.const -1) + ) + ) + ) + ) + ) + (i32.load8_u + (i32.add + (local.get $1) + (local.get $2) + ) + ) + ) ) ) ) - ) - (then (local.set $1 (i32.add (local.get $1) - (i32.const 16) + (local.tee $3 + (i32.add + (i32.and + (i32.sub + (local.get $2) + (i32.const 1) + ) + (i32.const 15) + ) + (i32.const 1) + ) + ) ) ) (local.set $0 (i32.add (local.get $0) - (i32.const 16) + (local.get $3) ) ) (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - (br $block) - ) - ) - ) - (return - (i32.sub - (i32.load8_u - (i32.add - (local.get $0) (local.tee $2 - (i32.ctz - (i32.xor - (i8x16.bitmask - (local.get $3) - ) - (i32.const -1) - ) + (i32.sub + (local.get $2) + (local.get $3) ) ) ) ) - (i32.load8_u - (i32.add - (local.get $1) - (local.get $2) + (br $block) + ) + ) + (br_if $block + (i32.eqz + (local.get $2) + ) + ) + (loop $label1 + (if + (i32.ne + (local.tee $3 + (i32.load8_u + (local.get $0) + ) + ) + (local.tee $4 + (i32.load8_u + (local.get $1) + ) + ) + ) + (then + (return + (i32.sub + (local.get $3) + (local.get $4) + ) ) ) ) - ) - ) - (if - (local.get $2) - (then - (loop $label1 - (if - (i32.ne - (local.tee $4 - (i32.load8_u - (local.get $0) - ) - ) - (local.tee $5 - (i32.load8_u - (local.get $1) - ) - ) - ) - (then - (return - (i32.sub - (local.get $4) - (local.get $5) - ) - ) - ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) ) - (local.set $1 - (i32.add - (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (local.tee $2 + (i32.sub + (local.get $2) (i32.const 1) ) ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label1 - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 1) - ) - ) - ) ) ) ) @@ -324,6 +345,118 @@ ) ) ) + (func $memrchr (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 v128) + (local $4 v128) + (block $block + (br_if $block + (i32.lt_u + (local.get $2) + (i32.const 16) + ) + ) + (local.set $3 + (i8x16.splat + (local.get $1) + ) + ) + (loop $label + (if + (i32.eqz + (v128.any_true + (local.tee $4 + (i8x16.eq + (v128.load align=1 + (i32.sub + (i32.add + (local.get $0) + (local.get $2) + ) + (i32.const 16) + ) + ) + (local.get $3) + ) + ) + ) + ) + (then + (br_if $label + (i32.gt_u + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 16) + ) + ) + (i32.const 15) + ) + ) + (br $block) + ) + ) + ) + (return + (i32.add + (i32.add + (i32.sub + (local.get $0) + (i32.clz + (i8x16.bitmask + (local.get $4) + ) + ) + ) + (local.get $2) + ) + (i32.const 15) + ) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (local.get $2) + ) + ) + (local.set $1 + (i32.extend8_s + (local.get $1) + ) + ) + (loop $label1 + (if + (i32.eqz + (local.get $2) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label1 + (i32.ne + (local.get $1) + (i32.load8_s + (local.tee $0 + (i32.sub + (local.get $0) + (i32.const 1) + ) + ) + ) + ) + ) + ) + (local.get $0) + ) (func $strlen (param $0 i32) (result i32) (local $1 i32) (local $2 i32) @@ -912,123 +1045,15 @@ ) ) (func $strrchr (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) - (local $3 v128) - (local $4 v128) - (block $block1 (result i32) - (block $block - (br_if $block - (i32.lt_u - (local.tee $2 - (i32.add - (call $strlen - (local.get $0) - ) - (i32.const 1) - ) - ) - (i32.const 16) - ) - ) - (local.set $3 - (i8x16.splat - (local.get $1) - ) - ) - (loop $label - (if - (i32.eqz - (v128.any_true - (local.tee $4 - (i8x16.eq - (v128.load align=1 - (i32.sub - (i32.add - (local.get $0) - (local.get $2) - ) - (i32.const 16) - ) - ) - (local.get $3) - ) - ) - ) - ) - (then - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - (br $block) - ) - ) - ) - (br $block1 - (i32.add - (i32.add - (i32.sub - (local.get $0) - (i32.clz - (i8x16.bitmask - (local.get $4) - ) - ) - ) - (local.get $2) - ) - (i32.const 15) - ) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (local.get $2) - ) - ) - (local.set $1 - (i32.extend8_s - (local.get $1) - ) - ) - (loop $label1 - (drop - (br_if $block1 - (i32.const 0) - (i32.eqz - (local.get $2) - ) - ) - ) - (local.set $2 - (i32.sub - (local.get $2) - (i32.const 1) - ) - ) - (br_if $label1 - (i32.ne - (local.get $1) - (i32.load8_s - (local.tee $0 - (i32.sub - (local.get $0) - (i32.const 1) - ) - ) - ) - ) - ) - ) + (call $memrchr (local.get $0) + (local.get $1) + (i32.add + (call $strlen + (local.get $0) + ) + (i32.const 1) + ) ) ) (func $strspn (param $0 i32) (param $1 i32) (result i32) @@ -1170,69 +1195,10 @@ (br $block) ) ) - (v128.store - (i32.const 1280) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1264) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1248) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1232) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1216) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1200) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1184) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1168) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1152) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1136) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1120) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1104) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1088) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1072) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1056) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store + (memory.fill (i32.const 1040) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i32.const 0) + (i32.const 256) ) (local.set $1 (i32.add @@ -1368,272 +1334,211 @@ (local $3 v128) (local $4 v128) (local $scratch i32) - (block $block1 - (block $block2 - (block $block3 - (block $block - (br_if $block - (i32.eqz - (local.tee $2 - (i32.load8_u - (local.get $1) - ) - ) - ) + (block $block + (if + (local.tee $2 + (i32.load8_u + (local.get $1) + ) + ) + (then + (br_if $block + (i32.load8_u offset=1 + (local.get $1) ) - (br_if $block - (i32.eqz - (i32.load8_u offset=1 - (local.get $1) - ) - ) - ) - (v128.store - (i32.const 1536) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1520) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1504) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1488) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1472) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1456) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1440) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1424) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1408) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1392) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1376) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1360) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1344) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1328) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1312) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (v128.store - (i32.const 1296) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (loop $label - (i32.store8 - (i32.add - (local.tee $2 - (i32.load8_u - (local.get $1) - ) - ) - (i32.const 1296) - ) - (i32.const 1) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (br_if $label - (local.get $2) - ) - ) - (local.set $1 - (local.get $0) - ) - (loop $label1 - (br_if $block1 - (i32.load8_u - (i32.add - (i32.load8_u - (local.get $1) - ) - (i32.const 1296) - ) - ) - ) - (br_if $block2 - (i32.load8_u - (i32.add - (i32.load8_u offset=1 - (local.get $1) - ) - (i32.const 1296) - ) - ) - ) - (br_if $block3 - (i32.load8_u - (i32.add - (i32.load8_u offset=2 - (local.get $1) - ) - (i32.const 1296) - ) - ) - ) - (br_if $label1 - (i32.eqz - (i32.load8_u - (i32.add - (block (result i32) - (local.set $scratch - (i32.load8_u offset=3 - (local.get $1) - ) + ) + ) + ) + (block $block1 + (if + (v128.any_true + (local.tee $3 + (v128.or + (i8x16.eq + (local.tee $3 + (v128.load + (local.tee $1 + (i32.and + (local.get $0) + (i32.const -16) ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 4) - ) - ) - (local.get $scratch) ) - (i32.const 1296) + ) + ) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $3) + (local.tee $4 + (i8x16.splat + (local.get $2) ) ) ) ) ) - (return - (i32.sub - (i32.sub - (local.get $1) - (i32.const 1) - ) - (local.get $0) - ) - ) ) - (block $block4 - (if + (then + (br_if $block1 + (local.tee $2 + (i32.and + (i8x16.bitmask + (local.get $3) + ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) + ) + ) + ) + ) + ) + (loop $label + (local.set $3 + (v128.load offset=16 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (br_if $label + (i32.eqz (v128.any_true (local.tee $3 (v128.or (i8x16.eq - (local.tee $3 - (v128.load - (local.tee $1 - (i32.and - (local.get $0) - (i32.const -16) - ) - ) - ) - ) + (local.get $3) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq (local.get $3) - (local.tee $4 - (i8x16.splat - (local.get $2) - ) - ) - ) - ) - ) - ) - (then - (br_if $block4 - (local.tee $2 - (i32.and - (i8x16.bitmask - (local.get $3) - ) - (i32.shl - (i32.const -1) - (i32.and - (local.get $0) - (i32.const 15) - ) - ) + (local.get $4) ) ) ) ) ) - (loop $label2 - (local.set $3 - (v128.load offset=16 - (local.get $1) - ) - ) - (local.set $1 + ) + ) + (local.set $2 + (i8x16.bitmask + (local.get $3) + ) + ) + ) + (return + (i32.sub + (i32.add + (local.get $1) + (i32.ctz + (local.get $2) + ) + ) + (local.get $0) + ) + ) + ) + (memory.fill + (i32.const 1296) + (i32.const 0) + (i32.const 256) + ) + (loop $label1 + (i32.store8 + (i32.add + (local.tee $2 + (i32.load8_u + (local.get $1) + ) + ) + (i32.const 1296) + ) + (i32.const 1) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label1 + (local.get $2) + ) + ) + (local.set $1 + (local.get $0) + ) + (block $block2 + (block $block3 + (block $block4 + (loop $label2 + (br_if $block2 + (i32.load8_u (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (br_if $label2 - (i32.eqz - (v128.any_true - (local.tee $3 - (v128.or - (i8x16.eq - (local.get $3) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - (i8x16.eq - (local.get $3) - (local.get $4) - ) - ) - ) + (i32.load8_u + (local.get $1) ) + (i32.const 1296) ) ) ) - (local.set $2 - (i8x16.bitmask - (local.get $3) + (br_if $block3 + (i32.load8_u + (i32.add + (i32.load8_u offset=1 + (local.get $1) + ) + (i32.const 1296) + ) + ) + ) + (br_if $block4 + (i32.load8_u + (i32.add + (i32.load8_u offset=2 + (local.get $1) + ) + (i32.const 1296) + ) + ) + ) + (br_if $label2 + (i32.eqz + (i32.load8_u + (i32.add + (block (result i32) + (local.set $scratch + (i32.load8_u offset=3 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (local.get $scratch) + ) + (i32.const 1296) + ) + ) ) ) ) (return (i32.sub - (i32.add + (i32.sub (local.get $1) - (i32.ctz - (local.get $2) - ) + (i32.const 1) ) (local.get $0) ) @@ -1661,6 +1566,148 @@ (local.get $0) ) ) + (func $memccpy (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (memory.copy + (local.get $0) + (local.get $1) + (select + (local.tee $1 + (i32.add + (i32.sub + (local.tee $2 + (call $memchr + (local.get $1) + (local.get $2) + (local.get $3) + ) + ) + (local.get $1) + ) + (i32.const 1) + ) + ) + (local.get $3) + (local.get $2) + ) + ) + (select + (i32.add + (local.get $0) + (local.get $1) + ) + (i32.const 0) + (local.get $2) + ) + ) + (func $strncat (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (memory.copy + (local.tee $3 + (i32.add + (call $strlen + (local.get $0) + ) + (local.get $0) + ) + ) + (local.get $1) + (local.tee $1 + (call $strnlen + (local.get $1) + (local.get $2) + ) + ) + ) + (i32.store8 + (i32.add + (local.get $1) + (local.get $3) + ) + (i32.const 0) + ) + (local.get $0) + ) + (func $stpcpy (param $0 i32) (param $1 i32) (result i32) + (memory.copy + (local.get $0) + (local.get $1) + (i32.add + (local.tee $1 + (call $strlen + (local.get $1) + ) + ) + (i32.const 1) + ) + ) + (i32.add + (local.get $0) + (local.get $1) + ) + ) + (func $strcpy (param $0 i32) (param $1 i32) (result i32) + (memory.copy + (local.get $0) + (local.get $1) + (i32.add + (call $strlen + (local.get $1) + ) + (i32.const 1) + ) + ) + (local.get $0) + ) + (func $stpncpy (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (memory.copy + (local.get $0) + (local.get $1) + (local.tee $1 + (call $strnlen + (local.get $1) + (local.get $2) + ) + ) + ) + (memory.fill + (local.tee $0 + (i32.add + (local.get $0) + (local.get $1) + ) + ) + (i32.const 0) + (i32.sub + (local.get $2) + (local.get $1) + ) + ) + (local.get $0) + ) + (func $strncpy (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (memory.copy + (local.get $0) + (local.get $1) + (local.tee $1 + (call $strnlen + (local.get $1) + (local.get $2) + ) + ) + ) + (memory.fill + (i32.add + (local.get $0) + (local.get $1) + ) + (i32.const 0) + (i32.sub + (local.get $2) + (local.get $1) + ) + ) + (local.get $0) + ) (func $qsort (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (local $4 i32) (local $5 i32) @@ -2107,6 +2154,23 @@ ) ) ) + (func $strnlen (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (select + (i32.sub + (local.tee $2 + (call $memchr + (local.get $0) + (i32.const 0) + (local.get $1) + ) + ) + (local.get $0) + ) + (local.get $1) + (local.get $2) + ) + ) ;; features section: mutable-globals, nontrapping-float-to-int, simd, bulk-memory, sign-ext, reference-types, multivalue, bulk-memory-opt ) diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index a12bfbd..67cc2ea 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -42,13 +42,25 @@ void *memmove(void *dest, const void *src, size_t n) { __attribute__((weak)) int memcmp(const void *v1, const void *v2, size_t n) { + // Baseline algorithm. + if (n < sizeof(v128_t)) { + const unsigned char *u1 = (unsigned char *)v1; + const unsigned char *u2 = (unsigned char *)v2; + while (n--) { + if (*u1 != *u2) return *u1 - *u2; + u1++; + u2++; + } + return 0; + } + // memcmp is allowed to read up to n bytes from each object. // Find the first different character in the objects. // Unaligned loads handle the case where the objects // have mismatching alignments. const v128_t *w1 = (v128_t *)v1; const v128_t *w2 = (v128_t *)v2; - for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + while (n) { const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w1), wasm_v128_load(w2)); // Bitmask is slow on AArch64, all_true is much faster. if (!wasm_i8x16_all_true(cmp)) { @@ -60,17 +72,12 @@ int memcmp(const void *v1, const void *v2, size_t n) { __builtin_assume(*u1 - *u2 != 0); return *u1 - *u2; } - w1++; - w2++; - } - - // Baseline algorithm. - const unsigned char *u1 = (unsigned char *)w1; - const unsigned char *u2 = (unsigned char *)w2; - while (n--) { - if (*u1 != *u2) return *u1 - *u2; - u1++; - u2++; + // This makes n a multiple of sizeof(v128_t) + // for every iteration except the first. + size_t align = (n - 1) % sizeof(v128_t) + 1; + w1 = (v128_t *)((char *)w1 + align); + w2 = (v128_t *)((char *)w2 + align); + n -= align; } return 0; } @@ -359,29 +366,13 @@ size_t strspn(const char *s, const char *c) { return s - a; } -#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__ - - // Unoptimized version. memset(byteset, 0, sizeof(byteset)); - while (*c && (byteset[*(unsigned char *)c] = 1)) c++; - while (byteset[*(unsigned char *)s]) s++; - -#else // __OPTIMIZE__ - - // This is faster than memset. - // Going backward helps bounds check elimination. - volatile v128_t *w = (v128_t *)byteset; - #pragma unroll - for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){}; - static_assert(sizeof(byteset) % sizeof(v128_t) == 0); - // Keeping byteset[0] = 0 avoids the next loop needing that check. while (*c && (byteset[*(unsigned char *)c] = 1)) c++; - #pragma unroll 4 +#if __OPTIMIZE__ && !__OPTIMIZE_SIZE__ +#pragma unroll 4 +#endif while (byteset[*(unsigned char *)s]) s++; - -#endif // __OPTIMIZE__ - return s - a; } @@ -395,29 +386,13 @@ size_t strcspn(const char *s, const char *c) { if (!c[0] || !c[1]) return __strchrnul(s, *c) - s; -#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__ - - // Unoptimized version. memset(byteset, 0, sizeof(byteset)); - while ((byteset[*(unsigned char *)c] = 1) && *c) c++; - while (!byteset[*(unsigned char *)s]) s++; - -#else // __OPTIMIZE__ - - // This is faster than memset. - // Going backward helps bounds check elimination. - volatile v128_t *w = (v128_t *)byteset; - #pragma unroll - for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){}; - static_assert(sizeof(byteset) % sizeof(v128_t) == 0); - // Setting byteset[0] = 1 avoids the next loop needing that check. while ((byteset[*(unsigned char *)c] = 1) && *c) c++; - #pragma unroll 4 +#if __OPTIMIZE__ && !__OPTIMIZE_SIZE__ +#pragma unroll 4 +#endif while (!byteset[*(unsigned char *)s]) s++; - -#endif // __OPTIMIZE__ - return s - a; } @@ -435,8 +410,9 @@ size_t strcspn(const char *s, const char *c) { // - strsep // - strtok -__attribute__((weak, always_inline)) +__attribute__((weak)) void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n) { + void *memchr(const void *v, int c, size_t n); const void *m = memchr(src, c, n); if (m != NULL) { n = (char *)m - (char *)src + 1; @@ -446,15 +422,23 @@ void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n return (void *)m; } -__attribute__((weak, always_inline)) -char *stpcpy(char *__restrict dest, const char *__restrict src) { +__attribute__((weak)) +char *strncat(char *__restrict dest, const char *__restrict src, size_t n) { + size_t strnlen(const char *s, size_t n); + size_t dlen = strlen(dest); + size_t slen = strnlen(src, n); + memcpy(dest + dlen, src, slen); + dest[dlen + slen] = 0; + return dest; +} + +static char *__stpcpy(char *__restrict dest, const char *__restrict src) { size_t slen = strlen(src); memcpy(dest, src, slen + 1); return dest + slen; } -__attribute__((weak, always_inline)) -char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) { +static char *__stpncpy(char *__restrict dest, const char *__restrict src, size_t n) { size_t strnlen(const char *s, size_t n); size_t slen = strnlen(src, n); memcpy(dest, src, slen); @@ -463,24 +447,23 @@ char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) { } __attribute__((weak, always_inline)) +char *stpcpy(char *__restrict dest, const char *__restrict src) { + return __stpcpy(dest, src); +} + char *strcpy(char *__restrict dest, const char *__restrict src) { - stpcpy(dest, src); + __stpcpy(dest, src); return dest; } +__attribute__((weak, always_inline)) +char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) { + return __stpncpy(dest, src, n); +} + __attribute__((weak, always_inline)) char *strncpy(char *__restrict dest, const char *__restrict src, size_t n) { - stpncpy(dest, src, n); - return dest; -} - -__attribute__((weak, always_inline)) -char *strncat(char *__restrict dest, const char *__restrict src, size_t n) { - size_t strnlen(const char *s, size_t n); - size_t dlen = strlen(dest); - size_t slen = strnlen(src, n); - memcpy(dest + dlen, src, slen); - dest[dlen + slen] = 0; + __stpncpy(dest, src, n); return dest; } diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h index 6915aba..9d427fd 100644 --- a/sqlite3/libc/strings.h +++ b/sqlite3/libc/strings.h @@ -16,24 +16,34 @@ __attribute__((weak)) int bcmp(const void *v1, const void *v2, size_t n) { // bcmp is the same as memcmp but only compares for equality. + // Baseline algorithm. + if (n < sizeof(v128_t)) { + const unsigned char *u1 = (unsigned char *)v1; + const unsigned char *u2 = (unsigned char *)v2; + while (n--) { + if (*u1 != *u2) return 1; + u1++; + u2++; + } + return 0; + } + + // bcmp is allowed to read up to n bytes from each object. + // Unaligned loads handle the case where the objects + // have mismatching alignments. const v128_t *w1 = (v128_t *)v1; const v128_t *w2 = (v128_t *)v2; - for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + while (n) { // Find any single bit difference. if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { return 1; } - w1++; - w2++; - } - - // Continue byte-by-byte. - const unsigned char *u1 = (unsigned char *)w1; - const unsigned char *u2 = (unsigned char *)w2; - while (n--) { - if (*u1 != *u2) return 1; - u1++; - u2++; + // This makes n a multiple of sizeof(v128_t) + // for every iteration except the first. + size_t align = (n - 1) % sizeof(v128_t) + 1; + w1 = (v128_t *)((char *)w1 + align); + w2 = (v128_t *)((char *)w2 + align); + n -= align; } return 0; } diff --git a/vfs/tests/mptest/wasm/mptest.wasm b/vfs/tests/mptest/wasm/mptest.wasm index 1655131..2517ff4 100644 Binary files a/vfs/tests/mptest/wasm/mptest.wasm and b/vfs/tests/mptest/wasm/mptest.wasm differ diff --git a/vfs/tests/speedtest1/wasm/speedtest1.wasm b/vfs/tests/speedtest1/wasm/speedtest1.wasm index 299df6f..c206243 100644 Binary files a/vfs/tests/speedtest1/wasm/speedtest1.wasm and b/vfs/tests/speedtest1/wasm/speedtest1.wasm differ