From e17a432fde86ef1069085f4d8a2f3e13ea129d7c Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Fri, 9 May 2025 00:59:39 +0100 Subject: [PATCH] Adds `strstr` and `memmem`. (#275) --- sqlite3/libc/build.sh | 4 +- sqlite3/libc/libc.wasm | Bin 3761 -> 5536 bytes sqlite3/libc/libc.wat | 1176 ++++++++++++++++++++++++++++++++++++- sqlite3/libc/libc_test.go | 196 ++++++- sqlite3/libc/string.h | 110 +++- sqlite3/libc/strings.h | 2 + 6 files changed, 1449 insertions(+), 39 deletions(-) diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index cfb717c..9893823 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -23,7 +23,7 @@ EOF -mbulk-memory -mreference-types \ -mnontrapping-fptoint -msign-ext \ -fno-stack-protector -fno-stack-clash-protection \ - -Wl,-z,stack-size=1024 \ + -Wl,-z,stack-size=4096 \ -Wl,--stack-first \ -Wl,--import-undefined \ -Wl,--initial-memory=16777216 \ @@ -31,6 +31,7 @@ EOF -Wl,--export=memchr \ -Wl,--export=memcmp \ -Wl,--export=memcpy \ + -Wl,--export=memmem \ -Wl,--export=memmove \ -Wl,--export=memrchr \ -Wl,--export=memset \ @@ -47,6 +48,7 @@ EOF -Wl,--export=strncpy \ -Wl,--export=strrchr \ -Wl,--export=strspn \ + -Wl,--export=strstr \ -Wl,--export=qsort "$BINARYEN/wasm-ctor-eval" -g -c _initialize libc.wasm -o libc.tmp diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index e34936036794e6898620413504bd48177ce76ae2..9d2dab0530d96f35f707239367b44cd71cf91cc4 100755 GIT binary patch delta 2500 zcmbVO&2Ah;5bo;fnc1D6opFBcn6N!#86iSAL4>&A5hPI*8Dr&=TW}mBv+H$c$D1H4 zhsg;E!6DMVKn@6r3sRH=r-(Ze;=%(2DF+T5@&IXtuX=VJXO(bZw7pYZ)%A6CRaZ?t zIDP-jy@guUc&Pq@_|Hy+b83O?;|LP`NGT*JA|;edC1kE5aaa0JXxz=$gI6`S47O(WGdzz@Dm*?8&x9Ztv1=9xdwM*B#uB>GVAENMFn*X z!g7~+$D3uKPPoIpo7G61jEI82mKSQ0wOrA%NY~U#L78$^G#NNZ3;s9j=1MHb-_z2m zLL{M<=VO5t;w%;r{w}i8_zzlivIFyxGSH90=79tiu-{(FqcIpieO-j z(ZNVCve-ne8bli-zzCU{m`LI6Vt5!hzQ|I;x`HqyhJ%sKVY70W`q+?|3?u)P^E+70N5hU#yWwvnom$?3w+KkNgVF_BaJozWCgkoobB~`x z988jXX32b>yXFcua?OpR#=>Mc;%Y`A;=>v#7YV)}Qd`xlE^5+54dB?*+(5O4g{Y+n zGS^6ntzi|LK>P`i{PEK=_24N=J(PFoiSP`c6dcH$RE-D0at9T%mpctJogd#1-XL%M zD40LVAI4`WbWmG9P{WFUf@PY2cqBY0YnAhthv62(utlP{t3x93H>ur?!IlWxJ6~Pj zeDCXxy#f763qoB-M`4>2bSX_oQtmem$)Hq|#>AcWCAJwsSK{0LU{JQ2reILEr!7f+ zYbUV6?J8s;cO(%r(1yg!;C`etX-AUQwQEgdlG-JsYHr$=9BGg@Mn@sxrsSAKE%&(( Xu^q{YW<|Mwvgt?W?R@-J=evIaP3{p$ delta 792 zcmYLHJ#W-N5S{VbUVmIJc5)7Z@Zmdg5>kK$L=n+g{0l_EVOa{v*|)JxkZFz|KoR6W zKuH@NT?z^sYFZkoX!!v|VrG4YwPZiNdGlu1`#$WS-#EK^8<)6zZZ2u{Ul2PH@-hH{ zmw2B<%Wt@2=UKie8v__rSq&PbssWV5%V#Kt{<1WDu^*X02~j6{qD$uGf#u}!k}jr@$VJ#g-zT&2zvOnYnAk~R4Ywj&pM zcZNn{o=&XZmF(7Y*}0sESadRLJ(RXRM(SPQ_>BNl;Z zABWSS6~hr(X)+kKi|{&H?YnUI3jc*a9vue!>AyBoT!d2`t$v1&jHRHn!!g_K`M!gY zymfp;_=;vR%QNuY`O6pS{nxW9$4?mbY3Y)onl2c*66=7NeLi$5C?2-Hx?IWbzH;UK z&xT92r<@3niwt~LQyd0Na4t6bSPZ-VS#TiLFkdn(ZzSqc(`n{sJhB B%HRM1 diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 2c62f2a..1dbd28a 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -1,11 +1,11 @@ (module $libc.wasm (type $0 (func (param i32 i32) (result i32))) (type $1 (func (param i32 i32 i32) (result i32))) - (type $2 (func (param i32) (result i32))) - (type $3 (func (param i32 i32 i32 i32) (result i32))) + (type $2 (func (param i32 i32 i32 i32) (result i32))) + (type $3 (func (param i32) (result i32))) (type $4 (func (param i32 i32 i32 i32))) (memory $0 256) - (data $0 (i32.const 1024) "\01") + (data $0 (i32.const 4096) "\01") (table $0 1 1 funcref) (export "memory" (memory $0)) (export "memset" (func $memset)) @@ -22,6 +22,8 @@ (export "strrchr" (func $strrchr)) (export "strspn" (func $strspn)) (export "strcspn" (func $strcspn)) + (export "memmem" (func $memmem)) + (export "strstr" (func $strstr)) (export "memccpy" (func $memccpy)) (export "strncat" (func $strncat)) (export "stpcpy" (func $stpcpy)) @@ -29,6 +31,114 @@ (export "stpncpy" (func $stpncpy)) (export "strncpy" (func $strncpy)) (export "qsort" (func $qsort)) + (func $bcmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (block $block + (if + (i32.ge_u + (local.get $2) + (i32.const 16) + ) + (then + (local.set $4 + (i32.const 1) + ) + (loop $label + (br_if $block + (v128.any_true + (v128.xor + (v128.load align=1 + (local.get $1) + ) + (v128.load align=1 + (local.get $0) + ) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (local.tee $3 + (i32.add + (i32.and + (i32.sub + (local.get $2) + (i32.const 1) + ) + (i32.const 15) + ) + (i32.const 1) + ) + ) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (local.get $3) + ) + ) + (br_if $label + (local.tee $2 + (i32.sub + (local.get $2) + (local.get $3) + ) + ) + ) + ) + (return + (i32.const 0) + ) + ) + ) + (br_if $block + (i32.eqz + (local.get $2) + ) + ) + (loop $label1 + (if + (i32.ne + (i32.load8_u + (local.get $0) + ) + (i32.load8_u + (local.get $1) + ) + ) + (then + (return + (i32.const 1) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + ) + ) + ) + (local.get $4) + ) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (memory.fill (local.get $0) @@ -1118,7 +1228,7 @@ ) (loop $label (v128.store - (i32.const 992) + (i32.const 4064) (local.get $6) ) (i32.store8 @@ -1130,7 +1240,7 @@ (i32.const 15) ) ) - (i32.const 992) + (i32.const 4064) ) ) (i32.or @@ -1152,14 +1262,14 @@ ) ) (v128.store - (i32.const 1008) + (i32.const 4080) (local.get $7) ) (i32.store8 (local.tee $2 (i32.or (local.get $2) - (i32.const 1008) + (i32.const 4080) ) ) (i32.or @@ -1182,12 +1292,12 @@ ) (local.set $6 (v128.load - (i32.const 992) + (i32.const 4064) ) ) (local.set $7 (v128.load - (i32.const 1008) + (i32.const 4080) ) ) (local.set $1 @@ -1307,7 +1417,7 @@ ) (loop $label2 (v128.store - (i32.const 976) + (i32.const 4048) (select (local.get $7) (local.get $6) @@ -1342,7 +1452,7 @@ (local.get $0) (i32.const 15) ) - (i32.const 976) + (i32.const 4048) ) ) (i32.and @@ -1609,7 +1719,7 @@ ) (loop $label1 (v128.store - (i32.const 1008) + (i32.const 4080) (local.get $6) ) (i32.store8 @@ -1624,13 +1734,13 @@ (i32.const 15) ) ) - (i32.const 1008) + (i32.const 4080) ) (i32.or (i32.load8_u (i32.or (local.get $3) - (i32.const 1008) + (i32.const 4080) ) ) (i32.shl @@ -1648,14 +1758,14 @@ ) ) (v128.store - (i32.const 992) + (i32.const 4064) (local.get $7) ) (i32.store8 (local.tee $3 (i32.or (local.get $3) - (i32.const 992) + (i32.const 4064) ) ) (i32.or @@ -1676,12 +1786,12 @@ ) (local.set $6 (v128.load - (i32.const 1008) + (i32.const 4080) ) ) (local.set $7 (v128.load - (i32.const 992) + (i32.const 4064) ) ) (br_if $label1 @@ -1787,7 +1897,7 @@ ) (loop $label3 (v128.store - (i32.const 976) + (i32.const 4048) (select (local.get $6) (local.get $7) @@ -1816,7 +1926,7 @@ (local.get $1) (i32.const 15) ) - (i32.const 976) + (i32.const 4048) ) ) (i32.and @@ -1837,6 +1947,1032 @@ (local.get $0) ) ) + (func $memmem (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (local $4 i32) + (if + (i32.eqz + (local.get $3) + ) + (then + (return + (local.get $0) + ) + ) + ) + (block $block + (br_if $block + (i32.lt_u + (local.get $1) + (local.get $3) + ) + ) + (local.set $4 + (call $memchr + (local.get $0) + (i32.load8_s + (local.get $2) + ) + (local.get $1) + ) + ) + (br_if $block + (i32.eq + (local.get $3) + (i32.const 1) + ) + ) + (br_if $block + (i32.eqz + (local.get $4) + ) + ) + (local.set $4 + (call $__memmem + (local.get $4) + (i32.sub + (i32.add + (local.get $0) + (local.get $1) + ) + (local.get $4) + ) + (local.get $2) + (local.get $3) + ) + ) + ) + (local.get $4) + ) + (func $__memmem (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 v128) + (local $11 v128) + (local $12 v128) + (local $13 v128) + (local $14 v128) + (local $15 v128) + (local $16 v128) + (local $17 v128) + (if + (i32.lt_u + (local.get $1) + (local.get $3) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (block $block2 + (block $block + (if + (i32.le_u + (local.get $3) + (i32.const 15) + ) + (then + (if + (i32.gt_u + (local.get $0) + (local.tee $6 + (i32.sub + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (local.get $3) + ) + (i32.const 16) + ) + ) + ) + (then + (local.set $4 + (local.get $1) + ) + (br $block) + ) + ) + (local.set $10 + (v128.load8_splat + (i32.sub + (i32.add + (local.get $2) + (local.get $3) + ) + (i32.const 1) + ) + ) + ) + (local.set $12 + (v128.load8_splat + (local.get $2) + ) + ) + (local.set $7 + (i32.sub + (local.get $3) + (i32.const 2) + ) + ) + (local.set $8 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (loop $label1 + (block $block1 + (br_if $block1 + (i32.eqz + (v128.any_true + (local.tee $13 + (v128.and + (i8x16.eq + (local.get $10) + (v128.load align=1 + (i32.sub + (i32.add + (local.get $0) + (local.get $3) + ) + (i32.const 1) + ) + ) + ) + (i8x16.eq + (local.get $12) + (v128.load align=1 + (local.get $0) + ) + ) + ) + ) + ) + ) + ) + (br_if $block1 + (i32.eqz + (local.tee $4 + (i8x16.bitmask + (local.get $13) + ) + ) + ) + ) + (loop $label + (br_if $block2 + (i32.eqz + (call $bcmp + (i32.add + (local.tee $5 + (i32.add + (local.get $0) + (i32.ctz + (local.get $4) + ) + ) + ) + (i32.const 1) + ) + (local.get $8) + (local.get $7) + ) + ) + ) + (br_if $label + (local.tee $4 + (i32.and + (i32.sub + (local.get $4) + (i32.const 1) + ) + (local.get $4) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.tee $4 + (i32.sub + (local.get $1) + (i32.const 16) + ) + ) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (if + (i32.gt_u + (local.get $3) + (local.get $4) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (local.set $1 + (local.get $4) + ) + (br_if $label1 + (i32.ge_u + (local.get $6) + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + ) + (br $block) + ) + ) + (memory.fill + (i32.const 4112) + (select + (i32.const 255) + (local.tee $6 + (i32.sub + (local.get $3) + (i32.const 1) + ) + ) + (i32.ge_u + (local.get $6) + (i32.const 255) + ) + ) + (i32.const 256) + ) + (block $block3 + (if + (i32.ne + (local.get $3) + (i32.const 16) + ) + (then + (local.set $12 + (i32x4.splat + (local.get $6) + ) + ) + (local.set $13 + (v128.const i32x4 0x0000000c 0x0000000d 0x0000000e 0x0000000f) + ) + (local.set $15 + (v128.const i32x4 0x00000008 0x00000009 0x0000000a 0x0000000b) + ) + (local.set $16 + (v128.const i32x4 0x00000004 0x00000005 0x00000006 0x00000007) + ) + (local.set $17 + (v128.const i32x4 0x00000000 0x00000001 0x00000002 0x00000003) + ) + (local.set $5 + (local.tee $7 + (i32.and + (local.get $6) + (i32.const -16) + ) + ) + ) + (local.set $4 + (local.get $2) + ) + (loop $label2 + (v128.store8_lane 0 + (i32.add + (i32x4.extract_lane 0 + (local.tee $11 + (i32x4.extend_low_i16x8_u + (i16x8.extend_low_i8x16_u + (local.tee $14 + (v128.load align=1 + (local.get $4) + ) + ) + ) + ) + ) + ) + (i32.const 4112) + ) + (local.tee $10 + (i8x16.narrow_i16x8_u + (i16x8.narrow_i32x4_u + (i32x4.min_u + (i32x4.add + (local.get $12) + (v128.not + (local.get $17) + ) + ) + (v128.const i32x4 0x000000ff 0x000000ff 0x000000ff 0x000000ff) + ) + (i32x4.min_u + (i32x4.add + (local.get $12) + (v128.not + (local.get $16) + ) + ) + (v128.const i32x4 0x000000ff 0x000000ff 0x000000ff 0x000000ff) + ) + ) + (i16x8.narrow_i32x4_u + (i32x4.min_u + (i32x4.add + (local.get $12) + (v128.not + (local.get $15) + ) + ) + (v128.const i32x4 0x000000ff 0x000000ff 0x000000ff 0x000000ff) + ) + (i32x4.min_u + (i32x4.add + (local.get $12) + (v128.not + (local.get $13) + ) + ) + (v128.const i32x4 0x000000ff 0x000000ff 0x000000ff 0x000000ff) + ) + ) + ) + ) + ) + (v128.store8_lane 1 + (i32.add + (i32x4.extract_lane 1 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 2 + (i32.add + (i32x4.extract_lane 2 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 3 + (i32.add + (i32x4.extract_lane 3 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 4 + (i32.add + (i32x4.extract_lane 0 + (local.tee $11 + (i32x4.extend_low_i16x8_u + (i16x8.extend_low_i8x16_u + (i8x16.shuffle 4 5 6 7 0 0 0 0 0 0 0 0 0 0 0 0 + (local.get $14) + (local.get $10) + ) + ) + ) + ) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 5 + (i32.add + (i32x4.extract_lane 1 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 6 + (i32.add + (i32x4.extract_lane 2 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 7 + (i32.add + (i32x4.extract_lane 3 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 8 + (i32.add + (i32x4.extract_lane 0 + (local.tee $11 + (i32x4.extend_low_i16x8_u + (i16x8.extend_low_i8x16_u + (i8x16.shuffle 8 9 10 11 0 0 0 0 0 0 0 0 0 0 0 0 + (local.get $14) + (local.get $10) + ) + ) + ) + ) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 9 + (i32.add + (i32x4.extract_lane 1 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 10 + (i32.add + (i32x4.extract_lane 2 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 11 + (i32.add + (i32x4.extract_lane 3 + (local.get $11) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 12 + (i32.add + (i32x4.extract_lane 0 + (local.tee $14 + (i32x4.extend_low_i16x8_u + (i16x8.extend_low_i8x16_u + (i8x16.shuffle 12 13 14 15 0 0 0 0 0 0 0 0 0 0 0 0 + (local.get $14) + (local.get $10) + ) + ) + ) + ) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 13 + (i32.add + (i32x4.extract_lane 1 + (local.get $14) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 14 + (i32.add + (i32x4.extract_lane 2 + (local.get $14) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (v128.store8_lane 15 + (i32.add + (i32x4.extract_lane 3 + (local.get $14) + ) + (i32.const 4112) + ) + (local.get $10) + ) + (local.set $17 + (i32x4.add + (local.get $17) + (v128.const i32x4 0x00000010 0x00000010 0x00000010 0x00000010) + ) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (local.set $16 + (i32x4.add + (local.get $16) + (v128.const i32x4 0x00000010 0x00000010 0x00000010 0x00000010) + ) + ) + (local.set $15 + (i32x4.add + (local.get $15) + (v128.const i32x4 0x00000010 0x00000010 0x00000010 0x00000010) + ) + ) + (local.set $13 + (i32x4.add + (local.get $13) + (v128.const i32x4 0x00000010 0x00000010 0x00000010 0x00000010) + ) + ) + (br_if $label2 + (local.tee $5 + (i32.sub + (local.get $5) + (i32.const 16) + ) + ) + ) + ) + (br_if $block3 + (i32.eq + (local.get $6) + (local.get $7) + ) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $2) + (local.get $7) + ) + ) + (local.set $4 + (i32.sub + (i32.sub + (local.get $3) + (local.get $7) + ) + (i32.const 2) + ) + ) + (loop $label3 + (i32.store8 + (i32.add + (i32.load8_u + (local.get $5) + ) + (i32.const 4112) + ) + (select + (i32.const 255) + (local.get $4) + (i32.ge_u + (local.get $4) + (i32.const 255) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label3 + (i32.ne + (local.tee $4 + (i32.sub + (local.get $4) + (i32.const 1) + ) + ) + (i32.const -1) + ) + ) + ) + ) + (block $block4 + (if + (i32.gt_u + (local.get $0) + (local.tee $6 + (i32.sub + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (local.get $3) + ) + (i32.const 16) + ) + ) + ) + (then + (local.set $4 + (local.get $1) + ) + (br $block4) + ) + ) + (local.set $10 + (v128.load8_splat + (i32.sub + (i32.add + (local.get $2) + (local.get $3) + ) + (i32.const 1) + ) + ) + ) + (local.set $12 + (v128.load8_splat + (local.get $2) + ) + ) + (local.set $7 + (i32.sub + (local.get $3) + (i32.const 2) + ) + ) + (local.set $8 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (loop $label5 + (block $block5 + (br_if $block5 + (i32.eqz + (v128.any_true + (local.tee $15 + (v128.and + (i8x16.eq + (local.get $10) + (local.tee $13 + (v128.load align=1 + (i32.sub + (i32.add + (local.get $0) + (local.get $3) + ) + (i32.const 1) + ) + ) + ) + ) + (i8x16.eq + (local.get $12) + (v128.load align=1 + (local.get $0) + ) + ) + ) + ) + ) + ) + ) + (br_if $block5 + (i32.eqz + (local.tee $4 + (i8x16.bitmask + (local.get $15) + ) + ) + ) + ) + (loop $label4 + (br_if $block2 + (i32.eqz + (call $bcmp + (i32.add + (local.tee $5 + (i32.add + (local.get $0) + (i32.ctz + (local.get $4) + ) + ) + ) + (i32.const 1) + ) + (local.get $8) + (local.get $7) + ) + ) + ) + (br_if $label4 + (local.tee $4 + (i32.and + (i32.sub + (local.get $4) + (i32.const 1) + ) + (local.get $4) + ) + ) + ) + ) + ) + (local.set $5 + (i32.const 0) + ) + (br_if $block2 + (i32.gt_u + (local.tee $4 + (i32.sub + (local.get $1) + (local.tee $9 + (i32.add + (i32.load8_u + (i32.add + (i8x16.extract_lane_s 15 + (local.get $13) + ) + (i32.const 4112) + ) + ) + (i32.const 16) + ) + ) + ) + ) + (local.get $1) + ) + ) + (br_if $block2 + (i32.gt_u + (local.get $3) + (local.get $4) + ) + ) + (local.set $1 + (local.get $4) + ) + (br_if $label5 + (i32.le_u + (local.tee $0 + (i32.add + (local.get $0) + (local.get $9) + ) + ) + (local.get $6) + ) + ) + ) + ) + ) + (local.set $6 + (i32.sub + (local.get $4) + (local.get $3) + ) + ) + (local.set $1 + (i32.const 0) + ) + (loop $label6 + (local.set $4 + (i32.const 0) + ) + (loop $label7 + (if + (i32.ne + (i32.load8_u + (i32.add + (local.get $2) + (local.get $4) + ) + ) + (i32.load8_u + (i32.add + (local.get $0) + (local.get $4) + ) + ) + ) + (then + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $5 + (i32.const 0) + ) + (br_if $label6 + (i32.le_u + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $6) + ) + ) + (br $block2) + ) + ) + (br_if $label7 + (i32.ne + (local.get $3) + (local.tee $4 + (i32.add + (local.get $4) + (i32.const 1) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $0) + ) + ) + (local.get $5) + ) + (func $strstr (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 v128) + (local $6 v128) + (block $block + (br_if $block + (i32.eqz + (local.tee $3 + (i32.load8_u + (local.get $1) + ) + ) + ) + ) + (block $block1 + (if + (v128.any_true + (local.tee $5 + (v128.or + (i8x16.eq + (local.tee $5 + (v128.load + (local.tee $2 + (i32.and + (local.get $0) + (i32.const -16) + ) + ) + ) + ) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $5) + (local.tee $6 + (i8x16.splat + (local.get $3) + ) + ) + ) + ) + ) + ) + (then + (br_if $block1 + (local.tee $4 + (i32.and + (i8x16.bitmask + (local.get $5) + ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) + ) + ) + ) + ) + ) + (loop $label + (local.set $5 + (v128.load offset=16 + (local.get $2) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + (br_if $label + (i32.eqz + (v128.any_true + (local.tee $5 + (v128.or + (i8x16.eq + (local.get $5) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + (i8x16.eq + (local.get $5) + (local.get $6) + ) + ) + ) + ) + ) + ) + ) + (local.set $4 + (i8x16.bitmask + (local.get $5) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + (br_if $block + (i32.ne + (local.get $3) + (i32.load8_u + (local.tee $2 + (i32.add + (local.get $2) + (i32.ctz + (local.get $4) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.load8_u offset=1 + (local.get $1) + ) + ) + (then + (return + (local.get $2) + ) + ) + ) + (local.set $0 + (call $__memmem + (local.get $2) + (call $strlen + (local.get $2) + ) + (local.get $1) + (call $strlen + (local.get $1) + ) + ) + ) + ) + (local.get $0) + ) (func $memccpy (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) (memory.copy (local.get $0) diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index e83b9c5..327bc72 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -31,6 +31,7 @@ var ( strlen api.Function strchr api.Function strcmp api.Function + strstr api.Function strspn api.Function strrchr api.Function strncmp api.Function @@ -64,6 +65,7 @@ func TestMain(m *testing.M) { strlen = mod.ExportedFunction("strlen") strchr = mod.ExportedFunction("strchr") strcmp = mod.ExportedFunction("strcmp") + strstr = mod.ExportedFunction("strstr") strspn = mod.ExportedFunction("strspn") strrchr = mod.ExportedFunction("strrchr") strncmp = mod.ExportedFunction("strncmp") @@ -210,6 +212,21 @@ func Benchmark_strcspn(b *testing.B) { } } +//go:embed string.h +var source string + +func Benchmark_strstr(b *testing.B) { + clear(memory) + copy(memory[ptr1:], source) + copy(memory[ptr2:], "memcpy(dest, src, slen)") + + b.SetBytes(int64(len(source))) + b.ResetTimer() + for range b.N { + call(strstr, ptr1, ptr2) + } +} + func Test_memcmp(t *testing.T) { const s1 string = "" + "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + @@ -230,17 +247,16 @@ func Test_memcmp(t *testing.T) { "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + "\x80\xf3\x93\x01\x00\x02" - p1 := ptr1 - p2 := len(memory) - len(s2) + ptr2 := len(memory) - len(s2) clear(memory) - copy(memory[p1:], s1) - copy(memory[p2:], s2) + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) for i := range len(s1) + 1 { for j := range len(s1) - i { want := strings.Compare(s1[i:i+j], s2[i:i+j]) - got := call(memcmp, uint64(p1+i), uint64(p2+i), uint64(j)) + got := call(memcmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j)) if sign(int32(got)) != want { t.Errorf("strcmp(%d, %d, %d) = %d, want %d", ptr1+i, ptr2+i, j, int32(got), want) @@ -269,19 +285,18 @@ func Test_strcmp(t *testing.T) { "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + "\x80\xf3\x93\x01\x00\x02" - p1 := ptr1 - p2 := len(memory) - len(s2) - 1 + ptr2 := len(memory) - len(s2) - 1 clear(memory) - copy(memory[p1:], s1) - copy(memory[p2:], s2) + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) for i := range len(s1) + 1 { want := strings.Compare(term(s1[i:]), term(s2[i:])) - got := call(strcmp, uint64(p1+i), uint64(p2+i)) + got := call(strcmp, uint64(ptr1+i), uint64(ptr2+i)) if sign(int32(got)) != want { t.Errorf("strcmp(%d, %d) = %d, want %d", - p1+i, ptr2+i, int32(got), want) + ptr1+i, ptr2+i, int32(got), want) } } } @@ -306,17 +321,16 @@ func Test_strncmp(t *testing.T) { "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + "\x80\xf3\x93\x01\x00\x02" - p1 := ptr1 - p2 := len(memory) - len(s2) - 1 + ptr2 := len(memory) - len(s2) - 1 clear(memory) - copy(memory[p1:], s1) - copy(memory[p2:], s2) + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) for i := range len(s1) + 1 { for j := range len(s1) - i + 1 { want := strings.Compare(term(s1[i:i+j]), term(s2[i:i+j])) - got := call(strncmp, uint64(p1+i), uint64(p2+i), uint64(j)) + got := call(strncmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j)) if sign(int32(got)) != want { t.Errorf("strncmp(%d, %d, %d) = %d, want %d", ptr1+i, ptr2+i, j, int32(got), want) @@ -595,6 +609,156 @@ func Test_strcspn(t *testing.T) { } } +func Test_strstr(t *testing.T) { + var tt = []struct { + h string + n string + out int + }{ + {"", "", 0}, + {"", "a", -1}, + {"", "foo", -1}, + {"fo", "foo", -1}, + {"foo", "foo", 0}, + {"oofofoofooo", "f", 2}, + {"oofofoofooo", "foo", 4}, + {"barfoobarfoo", "foo", 3}, + {"foo", "", 0}, + {"foo", "o", 1}, + {"abcABCabc", "A", 3}, + {"jrzm6jjhorimglljrea4w3rlgosts0w2gia17hno2td4qd1jz", "jz", 47}, + {"ekkuk5oft4eq0ocpacknhwouic1uua46unx12l37nioq9wbpnocqks6", "ks6", 52}, + {"999f2xmimunbuyew5vrkla9cpwhmxan8o98ec", "98ec", 33}, + {"9lpt9r98i04k8bz6c6dsrthb96bhi", "96bhi", 24}, + {"55u558eqfaod2r2gu42xxsu631xf0zobs5840vl", "5840vl", 33}, + {"", "a", -1}, + {"x", "a", -1}, + {"x", "x", 0}, + {"abc", "a", 0}, + {"abc", "b", 1}, + {"abc", "c", 2}, + {"abc", "x", -1}, + {"", "ab", -1}, + {"bc", "ab", -1}, + {"ab", "ab", 0}, + {"xab", "ab", 1}, + {"xab"[:2], "ab", -1}, + {"", "abc", -1}, + {"xbc", "abc", -1}, + {"abc", "abc", 0}, + {"xabc", "abc", 1}, + {"xabc"[:3], "abc", -1}, + {"xabxc", "abc", -1}, + {"", "abcd", -1}, + {"xbcd", "abcd", -1}, + {"abcd", "abcd", 0}, + {"xabcd", "abcd", 1}, + {"xyabcd"[:5], "abcd", -1}, + {"xbcqq", "abcqq", -1}, + {"abcqq", "abcqq", 0}, + {"xabcqq", "abcqq", 1}, + {"xyabcqq"[:6], "abcqq", -1}, + {"xabxcqq", "abcqq", -1}, + {"xabcqxq", "abcqq", -1}, + {"", "01234567", -1}, + {"32145678", "01234567", -1}, + {"01234567", "01234567", 0}, + {"x01234567", "01234567", 1}, + {"x0123456x01234567", "01234567", 9}, + {"xx01234567"[:9], "01234567", -1}, + {"", "0123456789", -1}, + {"3214567844", "0123456789", -1}, + {"0123456789", "0123456789", 0}, + {"x0123456789", "0123456789", 1}, + {"x012345678x0123456789", "0123456789", 11}, + {"xyz0123456789"[:12], "0123456789", -1}, + {"x01234567x89", "0123456789", -1}, + {"", "0123456789012345", -1}, + {"3214567889012345", "0123456789012345", -1}, + {"0123456789012345", "0123456789012345", 0}, + {"x0123456789012345", "0123456789012345", 1}, + {"x012345678901234x0123456789012345", "0123456789012345", 17}, + {"", "01234567890123456789", -1}, + {"32145678890123456789", "01234567890123456789", -1}, + {"01234567890123456789", "01234567890123456789", 0}, + {"x01234567890123456789", "01234567890123456789", 1}, + {"x0123456789012345678x01234567890123456789", "01234567890123456789", 21}, + {"xyz01234567890123456789"[:22], "01234567890123456789", -1}, + {"", "0123456789012345678901234567890", -1}, + {"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1}, + {"0123456789012345678901234567890", "0123456789012345678901234567890", 0}, + {"x0123456789012345678901234567890", "0123456789012345678901234567890", 1}, + {"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32}, + {"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1}, + {"", "01234567890123456789012345678901", -1}, + {"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1}, + {"01234567890123456789012345678901", "01234567890123456789012345678901", 0}, + {"x01234567890123456789012345678901", "01234567890123456789012345678901", 1}, + {"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33}, + {"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1}, + {"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6}, + {"", "0123456789012345678901234567890123456789", -1}, + {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2}, + {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1}, + {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1}, + {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65}, + {"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33}, + {"fofofofooofoboo", "oo", 7}, + {"fofofofofofoboo", "ob", 11}, + {"fofofofofofoboo", "boo", 12}, + {"fofofofofofoboo", "oboo", 11}, + {"fofofofofoooboo", "fooo", 8}, + {"fofofofofofoboo", "foboo", 10}, + {"fofofofofofoboo", "fofob", 8}, + {"fofofofofofofoffofoobarfoo", "foffof", 12}, + {"fofofofofoofofoffofoobarfoo", "foffof", 13}, + {"fofofofofofofoffofoobarfoo", "foffofo", 12}, + {"fofofofofoofofoffofoobarfoo", "foffofo", 13}, + {"fofofofofoofofoffofoobarfoo", "foffofoo", 13}, + {"fofofofofofofoffofoobarfoo", "foffofoo", 12}, + {"fofofofofoofofoffofoobarfoo", "foffofoob", 13}, + {"fofofofofofofoffofoobarfoo", "foffofoob", 12}, + {"fofofofofoofofoffofoobarfoo", "foffofooba", 13}, + {"fofofofofofofoffofoobarfoo", "foffofooba", 12}, + {"fofofofofoofofoffofoobarfoo", "foffofoobar", 13}, + {"fofofofofofofoffofoobarfoo", "foffofoobar", 12}, + {"fofofofofoofofoffofoobarfoo", "foffofoobarf", 13}, + {"fofofofofofofoffofoobarfoo", "foffofoobarf", 12}, + {"fofofofofoofofoffofoobarfoo", "foffofoobarfo", 13}, + {"fofofofofofofoffofoobarfoo", "foffofoobarfo", 12}, + {"fofofofofoofofoffofoobarfoo", "foffofoobarfoo", 13}, + {"fofofofofofofoffofoobarfoo", "foffofoobarfoo", 12}, + {"fofofofofoofofoffofoobarfoo", "ofoffofoobarfoo", 12}, + {"fofofofofofofoffofoobarfoo", "ofoffofoobarfoo", 11}, + {"fofofofofoofofoffofoobarfoo", "fofoffofoobarfoo", 11}, + {"fofofofofofofoffofoobarfoo", "fofoffofoobarfoo", 10}, + {"fofofofofoofofoffofoobarfoo", "foobars", -1}, + {"foofyfoobarfoobar", "y", 4}, + {"oooooooooooooooooooooo", "r", -1}, + {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, + {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, + } + + for i := range tt { + ptr1 := uint64(len(memory) - len(tt[i].h) - 1) + + clear(memory) + copy(memory[ptr1:], tt[i].h) + copy(memory[ptr2:], tt[i].n) + + var want uint64 + if tt[i].out >= 0 { + want = ptr1 + uint64(tt[i].out) + } + + got := call(strstr, uint64(ptr1), uint64(ptr2)) + if got != want { + t.Errorf("strstr(%q, %q) = %d, want %d", + tt[i].h, tt[i].n, uint32(got), uint32(want)) + } + } +} + func fill(s []byte, v byte) { for i := range s { s[i] = v diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 71b97c3..72a864e 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -1,9 +1,9 @@ #ifndef _WASM_SIMD128_STRING_H #define _WASM_SIMD128_STRING_H -#include #include #include +#include #include #include <__macro_PAGESIZE.h> @@ -458,6 +458,110 @@ size_t strcspn(const char *s, const char *c) { #undef _WASM_SIMD128_CHKBITS #undef _WASM_SIMD128_BITMAP256_T +static const char *__memmem_rabin(const char *haystk, size_t sh, + const char *needle, size_t sn, + uint8_t bmbc[256]) { + // http://0x80.pl/notesen/2016-11-28-simd-strfind.html + __builtin_assume(2 <= sn && sn <= sh); + + const v128_t fst = wasm_i8x16_splat(needle[0]); + const v128_t lst = wasm_i8x16_splat(needle[sn - 1]); + const char *N = + (char *)(__builtin_wasm_memory_size(0) * PAGESIZE - sn - sizeof(v128_t)); + + while (haystk <= N) { + const v128_t blk_fst = wasm_v128_load((v128_t *)(haystk)); + const v128_t blk_lst = wasm_v128_load((v128_t *)(haystk + sn - 1)); + const v128_t eq_fst = wasm_i8x16_eq(fst, blk_fst); + const v128_t eq_lst = wasm_i8x16_eq(lst, blk_lst); + + const v128_t cmp = eq_fst & eq_lst; + if (wasm_v128_any_true(cmp)) { + for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) { + size_t ctz = __builtin_ctz(mask); + if (!bcmp(haystk + ctz + 1, needle + 1, sn - 2)) { + return haystk + ctz; + } + } + } + + size_t skip = sizeof(v128_t); + if (bmbc) skip += bmbc[wasm_i8x16_extract_lane(blk_lst, 15)]; + if (__builtin_sub_overflow(sh, skip, &sh)) return NULL; + if (sn > sh) return NULL; + haystk += skip; + } + + // Baseline algorithm. + for (size_t j = 0; j <= sh - sn; j++) { + for (size_t i = 0;; i++) { + if (i >= sn) return haystk; + if (needle[i] != haystk[i]) break; + } + haystk++; + } + return NULL; +} + +static const char *__memmem_raita(const char *haystk, size_t sh, + const char *needle, size_t sn) { + // https://www-igm.univ-mlv.fr/~lecroq/string/node22.html + __builtin_assume(2 <= sn && sn <= sh); + +#ifndef _REENTRANT + static +#endif + uint8_t bmbc[256]; + memset(bmbc, sn - 1 < 255 ? sn - 1 : 255, sizeof(bmbc)); + for (size_t i = 0; i < sn - 1; i++) { + size_t t = sn - 1 - i - 1; + if (t > 255) t = 255; + bmbc[(unsigned char)needle[i]] = t; + } + + return __memmem_rabin(haystk, sh, needle, sn, bmbc); +} + +static const char *__memmem(const char *haystk, size_t sh, // + const char *needle, size_t sn) { + // Return when needle is longer than haystack. + if (sn > sh) return NULL; + + return sn < sizeof(v128_t) ? __memmem_rabin(haystk, sh, needle, sn, NULL) + : __memmem_raita(haystk, sh, needle, sn); +} + +__attribute__((weak)) +void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) { + // Return immediately on empty needle. + if (sn == 0) return (void *)vh; + + // Return immediately when needle is longer than haystack. + if (sn > sh) return NULL; + + // Skip to the first matching character using memchr, + // handling single character needles. + const char *needle = (char *)vn; + const char *haystk = (char *)memchr(vh, *needle, sh); + if (!haystk || sn == 1) return (void *)haystk; + + sh -= haystk - (char *)vh; + return (void *)__memmem(haystk, sh, needle, sn); +} + +__attribute__((weak)) +char *strstr(const char *haystk, const char *needle) { + // Return immediately on empty needle. + if (!needle[0]) return (char *)haystk; + + // Skip to the first matching character using strchr, + // handling single character needles. + haystk = strchr(haystk, *needle); + if (!haystk || !needle[1]) return (char *)haystk; + + return (char *)__memmem(haystk, strlen(haystk), needle, strlen(needle)); +} + // Given the above SIMD implementations, // these are best implemented as // small wrappers over those functions. @@ -500,7 +604,8 @@ static char *__stpcpy(char *__restrict dest, const char *__restrict src) { return dest + slen; } -static char *__stpncpy(char *__restrict dest, const char *__restrict src, size_t n) { +static char *__stpncpy(char *__restrict dest, const char *__restrict src, + size_t n) { size_t strnlen(const char *s, size_t n); size_t slen = strnlen(src, n); memcpy(dest, src, slen); @@ -513,6 +618,7 @@ char *stpcpy(char *__restrict dest, const char *__restrict src) { return __stpcpy(dest, src); } +__attribute__((weak, always_inline)) char *strcpy(char *__restrict dest, const char *__restrict src) { __stpcpy(dest, src); return dest; diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h index 9d427fd..811400d 100644 --- a/sqlite3/libc/strings.h +++ b/sqlite3/libc/strings.h @@ -11,6 +11,7 @@ extern "C" { #endif #ifdef __wasm_simd128__ +#ifndef __OPTIMIZE_SIZE__ __attribute__((weak)) int bcmp(const void *v1, const void *v2, size_t n) { @@ -48,6 +49,7 @@ int bcmp(const void *v1, const void *v2, size_t n) { return 0; } +#endif // __OPTIMIZE_SIZE__ #endif // __wasm_simd128__ #ifdef __cplusplus