From ff3676ff4ac79631ed38e61403b11d7606ed7ef7 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Fri, 16 May 2025 08:15:15 +0100 Subject: [PATCH] Case insensitive search, fixes. --- sqlite3/libc/build.sh | 5 +- sqlite3/libc/libc.wasm | Bin 5799 -> 6398 bytes sqlite3/libc/libc.wat | 1779 ++++++++++++++++++++++--------------- sqlite3/libc/libc_test.go | 427 +++++---- sqlite3/libc/string.h | 237 ++--- sqlite3/libc/strings.h | 45 +- 6 files changed, 1495 insertions(+), 998 deletions(-) diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 89144dc..4d4047c 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -38,16 +38,17 @@ EOF -Wl,--export=memset \ -Wl,--export=stpcpy \ -Wl,--export=stpncpy \ + -Wl,--export=strcasecmp \ + -Wl,--export=strcasestr \ -Wl,--export=strchr \ -Wl,--export=strchrnul \ -Wl,--export=strcmp \ - -Wl,--export=strcasecmp \ -Wl,--export=strcpy \ -Wl,--export=strcspn \ -Wl,--export=strlen \ + -Wl,--export=strncasecmp \ -Wl,--export=strncat \ -Wl,--export=strncmp \ - -Wl,--export=strncasecmp \ -Wl,--export=strncpy \ -Wl,--export=strrchr \ -Wl,--export=strspn \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 407f007c9a2ccabdac71f5262b7f839149d26772..d3ea5f869fe6c8187abd3d51290cc6093327d27e 100755 GIT binary patch delta 1847 zcmZ`)OK;;;6u$TRnOr+@l1^ViqhnWH4Fh5W(kviG>Xa^kK_fOusOp4B!D-Ue37~2g z9@wy9F_O0+5G+9KP$kv_e}KOLKL8=YwnlvCI_V%Jti(P(@0Y*(?d`u`c>H4VOy3f= zG?$QqFXiA5Qc7Vvat@JWOYul~pQ*k(9Zk>j3niTM#aX@-qPbY+!;{5mIGqdW;^lNS z6H0@at=SeEEk;XWGclZB2*(34J^OGZoWk&6>>6U63y+2Q5R}jU*m1)%{y;Sw%o^vj z4^D+PV(0}dHWneX7W0{CF>A-m%nu$$XWSSce{MIn;PoZa_W2{ugZ^mr`Hm% zY;D{zZ#(61XQsg2H}C z>b4NELslv$iH==0um!to_KJC;z_2(U3);CYKWYo z*h`C8!adi3jQ#Sv_RYeZB=RVR!?GwYl>9DbtW!)L~d-G0of)8#6N_#6)SEc!b}pIV`D?iRv^3B*61ST7=@Gm zBqr(ZQ9p)V`Xo`zRv_1G#rw0~Alqz^Q*V%k-D#4-9nQHZ-x`$djs3phVJD3$*#FN} z@u#xc!ZzH4co@SsX*Riueq_cf_&=+Hg6dwihb3uLd(|7TzmF;v_q$QOhYp1p3kIRa zSz;j@^2v>?gV5!*o{&0E6uJ;d^bj^YhKNV()FT$Lg;6SaP+0M2k-mUKlBCRwozmMm z+34n~Eigx)1Rg5}ub9W7vP9htE%T{nwNo7k%{v_wPMu7 zIbi+!7%<>p;23aLr$7z()48_-et(Vo4FkH0Cbr{lLh^dB`$`{1!8S^KE7@ z;72RG@A${ZHS%|j$};aY7cWBxhZR&e*06u0j&ibOtY^^QG9l&{4D<%_cWRCK#csK^ G`S>4&pL>S@ delta 1318 zcmYjRF^}U^5T1FqpY1$5ak57?*+pn;;D~O9_l>#Be4>&>;X>m4LuM-YM zSNOyaKr6YYp@0e@1<^wRqJ#P*T6#Lfn2T$#CS~J}yLw}tFU9vl17oY-657dXCcW$% zUMz1{g8;Gh-SNTp0q&{ovHKTR$Ij>Yef66^+`jH#E3H$^B4_)nxoe|c{oP5iv%MF& zSaxDwsqi61*^WR0jrikyrCiF+0zITCY1Fzoil9lOL(qeg1< z4y4hVoOv3}lQzLP1afzsxJNC%?=fvVi#w#anYu`K9K?~udDJ5yr~0}3R%uQ{;G#(8 zJkexjV|$cB$8cO{Ng$aX=g8M!P$HL&%f+?t|Tjp(2-fSAX;e`@;341)O1y{!e2>xPNpwOt1+NW+^Zm&Yu`T zerZ*|`-AE$YplM->Pio)yV|V+`|V~qtUj`TQBM_vUacNG_nv{$#{8k|BbEeAijkct zg1IzyXpD!DJ|cq%|De5`EN0O1FFyHr`pRdM)dK&-gPX5Ft$qFoG=k4JjoufzeWQEl zR`yzS$EtU0{Vj5@)?CRt^Pfq+RR_-tBS8^-AP!b}C=QW(`$p{@@BaNp_fZX57u{Rc m_Zl@%)*^M${GFOL--yg>!S@?PzS$o>r}x40avxU9!S#RRsw6W2 diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 54eb359..3df9e10 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -10,13 +10,15 @@ (table $0 1 1 funcref) (export "memory" (memory $0)) (export "qsort" (func $qsort)) + (export "strcasecmp" (func $strcasecmp)) + (export "strlen" (func $strlen)) + (export "strncasecmp" (func $strncasecmp)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) (export "memmove" (func $memcpy)) (export "memcmp" (func $memcmp)) (export "memchr" (func $memchr)) (export "memrchr" (func $memrchr)) - (export "strlen" (func $strlen)) (export "strcmp" (func $strcmp)) (export "strncmp" (func $strncmp)) (export "strchrnul" (func $strchrnul)) @@ -26,14 +28,13 @@ (export "strcspn" (func $strcspn)) (export "memmem" (func $memmem)) (export "strstr" (func $strstr)) + (export "strcasestr" (func $strcasestr)) (export "memccpy" (func $memccpy)) (export "strncat" (func $strncat)) (export "stpcpy" (func $stpcpy)) (export "strcpy" (func $strcpy)) (export "stpncpy" (func $stpncpy)) (export "strncpy" (func $strncpy)) - (export "strcasecmp" (func $strcasecmp)) - (export "strncasecmp" (func $strncasecmp)) (func $qsort (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (local $4 i32) (local $5 i32) @@ -480,6 +481,542 @@ ) ) ) + (func $strcasecmp (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 v128) + (block $block + (br_if $block + (i32.lt_u + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (select + (local.get $0) + (local.get $1) + (i32.gt_u + (local.get $0) + (local.get $1) + ) + ) + ) + ) + (i32.const 16) + ) + ) + (loop $label + (br_if $block + (v128.any_true + (v128.xor + (v128.or + (local.tee $4 + (v128.load align=1 + (local.get $1) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i8x16.add + (local.get $4) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + (local.tee $4 + (v128.or + (local.tee $4 + (v128.load align=1 + (local.get $0) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i8x16.add + (local.get $4) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i8x16.all_true + (local.get $4) + ) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (br_if $label + (i32.gt_u + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 16) + ) + ) + (i32.const 15) + ) + ) + ) + ) + (if + (i32.eq + (local.tee $2 + (select + (i32.or + (local.tee $2 + (i32.load8_u + (local.get $0) + ) + ) + (i32.const 32) + ) + (local.get $2) + (i32.lt_u + (i32.sub + (local.get $2) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (local.tee $3 + (select + (i32.or + (local.tee $3 + (i32.load8_u + (local.get $1) + ) + ) + (i32.const 32) + ) + (local.get $3) + (i32.lt_u + (i32.sub + (local.get $3) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + ) + (then + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (loop $label1 + (if + (i32.eqz + (local.get $2) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (local.set $2 + (i32.load8_u + (local.get $0) + ) + ) + (local.set $3 + (i32.load8_u + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (i32.eq + (local.tee $2 + (select + (i32.or + (local.get $2) + (i32.const 32) + ) + (local.get $2) + (i32.lt_u + (i32.sub + (local.get $2) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (local.tee $3 + (select + (i32.or + (local.get $3) + (i32.const 32) + ) + (local.get $3) + (i32.lt_u + (i32.sub + (local.get $3) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + ) + ) + ) + ) + ) + (i32.sub + (local.get $2) + (local.get $3) + ) + ) + (func $strlen (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 v128) + (block $block1 + (block $block + (br_if $block + (i8x16.all_true + (local.tee $3 + (v128.load + (local.tee $1 + (i32.and + (local.get $0) + (i32.const -16) + ) + ) + ) + ) + ) + ) + (br_if $block + (i32.eqz + (local.tee $2 + (i32.and + (i8x16.bitmask + (i8x16.eq + (local.get $3) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + ) + (i32.shl + (i32.const -1) + (i32.and + (local.get $0) + (i32.const 15) + ) + ) + ) + ) + ) + ) + (br $block1) + ) + (loop $label + (local.set $3 + (v128.load offset=16 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (br_if $label + (i8x16.all_true + (local.get $3) + ) + ) + ) + (local.set $2 + (i8x16.bitmask + (i8x16.eq + (local.get $3) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ) + ) + ) + ) + (i32.add + (i32.ctz + (local.get $2) + ) + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + ) + (func $strncasecmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 v128) + (block $block + (if + (i32.ge_u + (local.tee $2 + (select + (local.tee $3 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (select + (local.get $0) + (local.get $1) + (i32.gt_u + (local.get $0) + (local.get $1) + ) + ) + ) + ) + (local.get $2) + (i32.gt_u + (local.get $2) + (local.get $3) + ) + ) + ) + (i32.const 16) + ) + (then + (loop $label + (br_if $block + (v128.any_true + (v128.xor + (v128.or + (local.tee $5 + (v128.load align=1 + (local.get $1) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i8x16.add + (local.get $5) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + (local.tee $5 + (v128.or + (local.tee $5 + (v128.load align=1 + (local.get $0) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i8x16.add + (local.get $5) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i8x16.all_true + (local.get $5) + ) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (br_if $label + (i32.gt_u + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 16) + ) + ) + (i32.const 15) + ) + ) + ) + ) + ) + (br_if $block + (local.get $2) + ) + (return + (i32.const 0) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (loop $label1 + (if + (i32.ne + (local.tee $3 + (select + (i32.or + (local.tee $3 + (i32.load8_u + (local.get $0) + ) + ) + (i32.const 32) + ) + (local.get $3) + (i32.lt_u + (i32.sub + (local.get $3) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (local.tee $4 + (select + (i32.or + (local.tee $4 + (i32.load8_u + (local.get $1) + ) + ) + (i32.const 32) + ) + (local.get $4) + (i32.lt_u + (i32.sub + (local.get $4) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + ) + (then + (return + (i32.sub + (local.get $3) + (local.get $4) + ) + ) + ) + ) + (if + (local.get $3) + (then + (local.set $2 + (i32.sub + (local.tee $3 + (local.get $2) + ) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (local.get $3) + ) + ) + ) + ) + (i32.const 0) + ) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (memory.fill (local.get $0) @@ -908,86 +1445,6 @@ ) (local.get $0) ) - (func $strlen (param $0 i32) (result i32) - (local $1 i32) - (local $2 i32) - (local $3 v128) - (block $block1 - (block $block - (br_if $block - (i8x16.all_true - (local.tee $3 - (v128.load - (local.tee $1 - (i32.and - (local.get $0) - (i32.const -16) - ) - ) - ) - ) - ) - ) - (br_if $block - (i32.eqz - (local.tee $2 - (i32.and - (i8x16.bitmask - (i8x16.eq - (local.get $3) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - ) - (i32.shl - (i32.const -1) - (i32.and - (local.get $0) - (i32.const 15) - ) - ) - ) - ) - ) - ) - (br $block1) - ) - (loop $label - (local.set $3 - (v128.load offset=16 - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (br_if $label - (i8x16.all_true - (local.get $3) - ) - ) - ) - (local.set $2 - (i8x16.bitmask - (i8x16.eq - (local.get $3) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - ) - ) - ) - ) - (i32.add - (i32.ctz - (local.get $2) - ) - (i32.sub - (local.get $1) - (local.get $0) - ) - ) - ) (func $strcmp (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (local $3 i32) @@ -2612,13 +3069,13 @@ (local $18 v128) (local $19 v128) (local $20 v128) - (local.set $10 + (local.set $5 (i32.load8_u (local.get $2) ) ) - (local.set $5 - (local.tee $9 + (local.set $6 + (local.tee $11 (i32.sub (local.get $3) (i32.const 1) @@ -2629,39 +3086,39 @@ (loop $label (br_if $block (i32.ne - (local.get $10) - (local.tee $7 + (local.get $5) + (local.tee $8 (i32.load8_u (i32.add (local.get $2) - (local.get $5) + (local.get $6) ) ) ) ) ) (br_if $label - (local.tee $5 + (local.tee $6 (i32.sub - (local.get $5) + (local.get $6) (i32.const 1) ) ) ) ) - (local.set $7 + (local.set $8 (i32.load8_u (i32.add (local.get $2) - (local.get $9) + (local.get $11) ) ) ) - (local.set $5 - (local.get $9) + (local.set $6 + (local.get $11) ) ) - (block $block5 + (block $block6 (block $block1 (br_if $block1 (i32.lt_u @@ -2672,7 +3129,7 @@ (memory.size) (i32.const 16) ) - (local.get $5) + (local.get $6) ) (i32.const 16) ) @@ -2682,12 +3139,12 @@ ) (local.set $18 (i8x16.splat - (local.get $7) + (local.get $8) ) ) (local.set $19 (i8x16.splat - (local.get $10) + (local.get $5) ) ) (local.set $15 @@ -2696,18 +3153,12 @@ (i32.const 14) ) ) - (local.set $10 + (local.set $16 (i32.add (local.get $2) (i32.const 1) ) ) - (local.set $16 - (i32.lt_u - (local.get $3) - (i32.const 17) - ) - ) (loop $label4 (block $block2 (br_if $block2 @@ -2720,7 +3171,7 @@ (v128.load align=1 (i32.add (local.get $0) - (local.get $5) + (local.get $6) ) ) ) @@ -2756,7 +3207,7 @@ ) (br_if $block2 (i32.eqz - (local.tee $11 + (local.tee $7 (i8x16.bitmask (local.get $20) ) @@ -2768,9 +3219,9 @@ (i32.lt_u (local.get $1) (i32.add - (local.tee $6 + (local.tee $5 (i32.ctz - (local.get $11) + (local.get $7) ) ) (local.get $3) @@ -2782,131 +3233,147 @@ ) ) ) - (local.set $8 - (i32.add - (local.tee $12 - (i32.add - (local.get $0) - (local.get $6) + (br_if $block6 + (i32.eqz + (block $block5 (result i32) + (local.set $9 + (i32.add + (local.tee $8 + (i32.add + (local.get $0) + (local.get $5) + ) + ) + (i32.const 1) + ) ) - ) - (i32.const 1) - ) - ) - (block $block4 - (if - (i32.eqz - (local.get $16) - ) - (then - (local.set $7 - (local.get $10) + (local.set $5 + (local.get $16) ) - (local.set $6 - (local.get $9) + (local.set $12 + (i32.const 0) ) - (loop $label1 - (br_if $block4 - (v128.any_true - (v128.xor - (v128.load align=1 - (local.get $7) + (block $block4 + (if + (i32.ge_u + (local.tee $10 + (local.get $11) + ) + (i32.const 16) + ) + (then + (local.set $12 + (i32.const 1) + ) + (loop $label1 + (br_if $block4 + (v128.any_true + (v128.xor + (v128.load align=1 + (local.get $5) + ) + (v128.load align=1 + (local.get $9) + ) + ) + ) ) - (v128.load align=1 - (local.get $8) + (local.set $5 + (i32.add + (local.get $5) + (local.tee $13 + (i32.add + (i32.and + (i32.sub + (local.get $10) + (i32.const 1) + ) + (i32.const 15) + ) + (i32.const 1) + ) + ) + ) ) + (local.set $9 + (i32.add + (local.get $9) + (local.get $13) + ) + ) + (br_if $label1 + (local.tee $10 + (i32.sub + (local.get $10) + (local.get $13) + ) + ) + ) + ) + (br $block5 + (i32.const 0) ) ) ) - (local.set $7 - (i32.add - (local.get $7) - (local.tee $13 - (i32.add - (i32.and - (i32.sub - (local.get $6) - (i32.const 1) - ) - (i32.const 15) + (br_if $block4 + (i32.eqz + (local.get $10) + ) + ) + (loop $label2 + (drop + (br_if $block5 + (i32.const 1) + (i32.ne + (i32.load8_u + (local.get $9) ) + (i32.load8_u + (local.get $5) + ) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (local.set $9 + (i32.add + (local.get $9) + (i32.const 1) + ) + ) + (br_if $label2 + (local.tee $10 + (i32.sub + (local.get $10) (i32.const 1) ) ) ) ) - (local.set $8 - (i32.add - (local.get $8) - (local.get $13) - ) - ) - (br_if $label1 - (local.tee $6 - (i32.sub - (local.get $6) - (local.get $13) - ) - ) - ) ) - (br $block5) + (local.get $12) ) ) - (local.set $6 - (local.get $9) - ) - (local.set $7 - (local.get $10) - ) - (loop $label2 - (br_if $block4 - (i32.ne - (i32.load8_u - (local.get $8) - ) - (i32.load8_u - (local.get $7) - ) - ) - ) - (local.set $7 - (i32.add + ) + (br_if $label3 + (local.tee $7 + (i32.and + (i32.sub (local.get $7) (i32.const 1) ) - ) - (local.set $8 - (i32.add - (local.get $8) - (i32.const 1) - ) - ) - (br_if $label2 - (local.tee $6 - (i32.sub - (local.get $6) - (i32.const 1) - ) - ) - ) - ) - (br $block5) - ) - (br_if $label3 - (local.tee $11 - (i32.and - (i32.sub - (local.get $11) - (i32.const 1) - ) - (local.get $11) + (local.get $7) ) ) ) ) ) - (block $block6 + (block $block7 (if (i32.eq (local.get $1) @@ -2918,13 +3385,13 @@ (local.get $17) ) (then - (local.set $8 + (local.set $7 (i32.const 16) ) (local.set $1 (i32.const -1) ) - (br $block6) + (br $block7) ) ) (return @@ -2932,16 +3399,16 @@ ) ) ) - (local.set $12 + (local.set $8 (i32.const 0) ) - (br_if $block5 + (br_if $block6 (i32.lt_u (local.get $1) (local.tee $1 (i32.sub (local.get $1) - (local.tee $8 + (local.tee $7 (if (result i32) (local.get $4) (then @@ -2969,7 +3436,7 @@ ) ) ) - (br_if $block5 + (br_if $block6 (i32.lt_u (local.get $1) (local.get $3) @@ -2981,7 +3448,7 @@ (local.tee $0 (i32.add (local.get $0) - (local.get $8) + (local.get $7) ) ) (local.get $14) @@ -2989,62 +3456,62 @@ ) ) ) - (local.set $9 + (local.set $11 (i32.sub (local.get $1) (local.get $3) ) ) - (local.set $6 + (local.set $4 (i32.const 0) ) - (local.set $4 + (local.set $1 (i32.ne (local.get $1) (i32.const -1) ) ) (loop $label6 - (local.set $5 + (local.set $6 (i32.const 0) ) - (block $block7 + (block $block8 (loop $label5 - (local.set $12 + (local.set $8 (i32.const 0) ) - (br_if $block5 + (br_if $block6 (i32.eqz (i32.or - (local.get $4) - (local.tee $1 + (local.get $1) + (local.tee $5 (i32.load8_u (i32.add (local.get $0) - (local.get $5) + (local.get $6) ) ) ) ) ) ) - (br_if $block7 + (br_if $block8 (i32.ne (i32.load8_u (i32.add (local.get $2) - (local.get $5) + (local.get $6) ) ) - (local.get $1) + (local.get $5) ) ) (br_if $label5 (i32.ne (local.get $3) - (local.tee $5 + (local.tee $6 (i32.add - (local.get $5) + (local.get $6) (i32.const 1) ) ) @@ -3063,18 +3530,18 @@ ) (br_if $label6 (i32.le_u - (local.tee $6 + (local.tee $4 (i32.add - (local.get $6) + (local.get $4) (i32.const 1) ) ) - (local.get $9) + (local.get $11) ) ) ) ) - (local.get $12) + (local.get $8) ) (func $strstr (param $0 i32) (param $1 i32) (result i32) (local $2 v128) @@ -3221,6 +3688,390 @@ ) (local.get $0) ) + (func $strcasestr (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 v128) + (local $10 v128) + (local $11 v128) + (local $12 v128) + (if + (i32.eqz + (local.tee $2 + (i32.load8_u + (local.get $1) + ) + ) + ) + (then + (return + (local.get $0) + ) + ) + ) + (local.set $5 + (i32.extend8_s + (local.get $2) + ) + ) + (block $block + (br_if $block + (i32.eqz + (local.tee $6 + (i32.sub + (local.tee $7 + (call $strlen + (local.get $1) + ) + ) + (i32.const 1) + ) + ) + ) + ) + (local.set $4 + (local.get $6) + ) + (loop $label + (if + (i32.ne + (local.tee $3 + (i32.load8_u + (i32.add + (local.get $1) + (local.get $4) + ) + ) + ) + (local.get $2) + ) + (then + (local.set $2 + (local.get $3) + ) + (br $block) + ) + ) + (br_if $label + (local.tee $4 + (i32.sub + (local.get $4) + (i32.const 1) + ) + ) + ) + ) + (local.set $2 + (i32.load8_u + (i32.add + (local.get $1) + (local.get $6) + ) + ) + ) + (local.set $4 + (local.get $6) + ) + ) + (local.set $3 + (select + (i32.or + (local.get $5) + (i32.const 32) + ) + (local.get $5) + (i32.lt_u + (i32.sub + (local.get $5) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (local.set $2 + (select + (i32.or + (local.tee $2 + (i32.extend8_s + (local.get $2) + ) + ) + (i32.const 32) + ) + (local.get $2) + (i32.lt_u + (i32.sub + (local.get $2) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (block $block3 + (block $block4 + (block $block1 + (br_if $block1 + (i32.lt_u + (local.tee $5 + (i32.sub + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (local.get $4) + ) + (i32.const 16) + ) + ) + (local.get $0) + ) + ) + (local.set $10 + (i8x16.splat + (local.get $3) + ) + ) + (local.set $11 + (i8x16.splat + (local.get $2) + ) + ) + (local.set $8 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (loop $label2 + (local.set $2 + (i8x16.all_true + (local.tee $9 + (v128.or + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i8x16.add + (local.tee $9 + (v128.load align=1 + (local.get $0) + ) + ) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + (local.get $9) + ) + ) + ) + ) + (block $block2 + (if + (v128.any_true + (local.tee $9 + (v128.and + (i8x16.eq + (local.get $11) + (v128.or + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i8x16.add + (local.tee $12 + (v128.load align=1 + (i32.add + (local.get $0) + (local.get $4) + ) + ) + ) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + (local.get $12) + ) + ) + (i8x16.eq + (local.get $10) + (local.get $9) + ) + ) + ) + ) + (then + (br_if $block1 + (i32.eqz + (local.get $2) + ) + ) + (br_if $block2 + (i32.eqz + (local.tee $2 + (i8x16.bitmask + (local.get $9) + ) + ) + ) + ) + (loop $label1 + (br_if $block3 + (i32.eqz + (call $strncasecmp + (i32.add + (local.tee $3 + (i32.add + (local.get $0) + (i32.ctz + (local.get $2) + ) + ) + ) + (i32.const 1) + ) + (local.get $8) + (local.get $6) + ) + ) + ) + (br_if $label1 + (local.tee $2 + (i32.and + (i32.sub + (local.get $2) + (i32.const 1) + ) + (local.get $2) + ) + ) + ) + ) + (br $block2) + ) + ) + (br_if $block4 + (i32.eqz + (local.get $2) + ) + ) + ) + (br_if $label2 + (i32.le_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (local.get $5) + ) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (i32.const 0) + ) + (loop $label3 + (br_if $block4 + (i32.eqz + (local.tee $0 + (i32.load8_s + (i32.add + (local.get $2) + (local.get $3) + ) + ) + ) + ) + ) + (if + (i32.eq + (select + (i32.or + (local.tee $4 + (i32.load8_s + (i32.add + (local.get $1) + (local.get $2) + ) + ) + ) + (i32.const 32) + ) + (local.get $4) + (i32.lt_u + (i32.sub + (local.get $4) + (i32.const 65) + ) + (i32.const 26) + ) + ) + (select + (i32.or + (local.get $0) + (i32.const 32) + ) + (local.get $0) + (i32.lt_u + (i32.sub + (local.get $0) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (then + (br_if $block3 + (i32.eq + (local.get $7) + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + ) + ) + ) + (else + (local.set $3 + (i32.add + (local.get $3) + (i32.const 1) + ) + ) + (local.set $2 + (i32.const 0) + ) + ) + ) + (br $label3) + ) + (unreachable) + ) + (local.set $3 + (i32.const 0) + ) + ) + (local.get $3) + ) (func $memccpy (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) (memory.copy (local.get $0) @@ -3363,462 +4214,6 @@ ) (local.get $0) ) - (func $strcasecmp (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) - (local $3 i32) - (local $4 v128) - (block $block - (br_if $block - (i32.lt_u - (local.tee $2 - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (select - (local.get $0) - (local.get $1) - (i32.gt_u - (local.get $0) - (local.get $1) - ) - ) - ) - ) - (i32.const 16) - ) - ) - (loop $label - (br_if $block - (v128.any_true - (v128.xor - (v128.or - (local.tee $4 - (v128.load align=1 - (local.get $1) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i32x4.add - (local.get $4) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - (local.tee $4 - (v128.or - (local.tee $4 - (v128.load align=1 - (local.get $0) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i32x4.add - (local.get $4) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - ) - ) - ) - ) - (if - (i32.eqz - (i8x16.all_true - (local.get $4) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - ) - ) - (if - (i32.eq - (local.tee $2 - (select - (i32.or - (local.tee $2 - (i32.load8_u - (local.get $0) - ) - ) - (i32.const 32) - ) - (local.get $2) - (i32.lt_u - (i32.sub - (local.get $2) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.tee $3 - (select - (i32.or - (local.tee $3 - (i32.load8_u - (local.get $1) - ) - ) - (i32.const 32) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - ) - (then - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (loop $label1 - (if - (i32.eqz - (local.get $2) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $2 - (i32.load8_u - (local.get $0) - ) - ) - (local.set $3 - (i32.load8_u - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label1 - (i32.eq - (local.tee $2 - (select - (i32.or - (local.get $2) - (i32.const 32) - ) - (local.get $2) - (i32.lt_u - (i32.sub - (local.get $2) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.tee $3 - (select - (i32.or - (local.get $3) - (i32.const 32) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - ) - ) - ) - ) - ) - (i32.sub - (local.get $2) - (local.get $3) - ) - ) - (func $strncasecmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (local $3 i32) - (local $4 i32) - (local $5 v128) - (block $block - (if - (i32.ge_u - (local.tee $2 - (select - (local.tee $3 - (i32.sub - (i32.shl - (memory.size) - (i32.const 16) - ) - (select - (local.get $0) - (local.get $1) - (i32.gt_u - (local.get $0) - (local.get $1) - ) - ) - ) - ) - (local.get $2) - (i32.gt_u - (local.get $2) - (local.get $3) - ) - ) - ) - (i32.const 16) - ) - (then - (loop $label - (br_if $block - (v128.any_true - (v128.xor - (v128.or - (local.tee $5 - (v128.load align=1 - (local.get $1) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i32x4.add - (local.get $5) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - (local.tee $5 - (v128.or - (local.tee $5 - (v128.load align=1 - (local.get $0) - ) - ) - (v128.bitselect - (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) - (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) - (i8x16.gt_s - (i32x4.add - (local.get $5) - (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) - ) - (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) - ) - ) - ) - ) - ) - ) - ) - (if - (i32.eqz - (i8x16.all_true - (local.get $5) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) - ) - ) - ) - (br_if $block - (local.get $2) - ) - (return - (i32.const 0) - ) - ) - (local.set $2 - (i32.sub - (local.get $2) - (i32.const 1) - ) - ) - (loop $label1 - (if - (i32.ne - (local.tee $3 - (select - (i32.or - (local.tee $3 - (i32.load8_u - (local.get $0) - ) - ) - (i32.const 32) - ) - (local.get $3) - (i32.lt_u - (i32.sub - (local.get $3) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - (local.tee $4 - (select - (i32.or - (local.tee $4 - (i32.load8_u - (local.get $1) - ) - ) - (i32.const 32) - ) - (local.get $4) - (i32.lt_u - (i32.sub - (local.get $4) - (i32.const 65) - ) - (i32.const 26) - ) - ) - ) - ) - (then - (return - (i32.sub - (local.get $3) - (local.get $4) - ) - ) - ) - ) - (if - (local.get $3) - (then - (local.set $2 - (i32.sub - (local.tee $3 - (local.get $2) - ) - (i32.const 1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label1 - (local.get $3) - ) - ) - ) - ) - (i32.const 0) - ) (func $strnlen (param $0 i32) (param $1 i32) (result i32) (local $2 i32) (select diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index 5543ea0..4d951d5 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -38,6 +38,7 @@ var ( strncmp api.Function strcspn api.Function strcasecmp api.Function + strcasestr api.Function strncasecmp api.Function stack [8]uint64 ) @@ -75,6 +76,7 @@ func TestMain(m *testing.M) { strncmp = mod.ExportedFunction("strncmp") strcspn = mod.ExportedFunction("strcspn") strcasecmp = mod.ExportedFunction("strcasecmp") + strcasestr = mod.ExportedFunction("strcasestr") strncasecmp = mod.ExportedFunction("strncasecmp") memory, _ = mod.Memory().Read(0, mod.Memory().Size()) @@ -102,6 +104,17 @@ func Benchmark_memcpy(b *testing.B) { } } +func Benchmark_strlen(b *testing.B) { + clear(memory) + fill(memory[ptr1:ptr1+size-1], 5) + + b.SetBytes(size) + b.ResetTimer() + for range b.N { + call(strlen, ptr1) + } +} + func Benchmark_memchr(b *testing.B) { clear(memory) fill(memory[ptr1:ptr1+size/2], 7) @@ -114,30 +127,6 @@ func Benchmark_memchr(b *testing.B) { } } -func Benchmark_memcmp(b *testing.B) { - clear(memory) - fill(memory[ptr1:ptr1+size], 7) - fill(memory[ptr2:ptr2+size/2], 7) - fill(memory[ptr2+size/2:ptr2+size], 5) - - b.SetBytes(size/2 + 1) - b.ResetTimer() - for range b.N { - call(memcmp, ptr1, ptr2, size) - } -} - -func Benchmark_strlen(b *testing.B) { - clear(memory) - fill(memory[ptr1:ptr1+size-1], 5) - - b.SetBytes(size) - b.ResetTimer() - for range b.N { - call(strlen, ptr1) - } -} - func Benchmark_strchr(b *testing.B) { clear(memory) fill(memory[ptr1:ptr1+size/2], 7) @@ -162,6 +151,19 @@ func Benchmark_strrchr(b *testing.B) { } } +func Benchmark_memcmp(b *testing.B) { + clear(memory) + fill(memory[ptr1:ptr1+size], 7) + fill(memory[ptr2:ptr2+size/2], 7) + fill(memory[ptr2+size/2:ptr2+size], 5) + + b.SetBytes(size/2 + 1) + b.ResetTimer() + for range b.N { + call(memcmp, ptr1, ptr2, size) + } +} + func Benchmark_strcmp(b *testing.B) { clear(memory) fill(memory[ptr1:ptr1+size-1], 7) @@ -247,20 +249,6 @@ func Benchmark_strcspn(b *testing.B) { //go:embed string.h var source string -func Benchmark_strstr(b *testing.B) { - needle := "memcpy(dest, src, slen)" - - clear(memory) - copy(memory[ptr1:], source) - copy(memory[ptr2:], needle) - - b.SetBytes(int64(len(source))) - b.ResetTimer() - for range b.N { - call(strstr, ptr1, ptr2) - } -} - func Benchmark_memmem(b *testing.B) { needle := "memcpy(dest, src, slen)" @@ -275,115 +263,31 @@ func Benchmark_memmem(b *testing.B) { } } -func Test_memcmp(t *testing.T) { - const s1 string = "" + - "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + - "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + - "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + - "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + - "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + - "\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + - "\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + - "\x7f\xf3\x93\x01\x00\x01" - const s2 string = "" + - "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + - "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + - "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + - "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + - "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + - "\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + - "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + - "\x80\xf3\x93\x01\x00\x02" - - ptr2 := len(memory) - len(s2) +func Benchmark_strstr(b *testing.B) { + needle := "memcpy(dest, src, slen)" clear(memory) - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) + copy(memory[ptr1:], source) + copy(memory[ptr2:], needle) - for i := range len(s1) + 1 { - for j := range len(s1) - i { - want := strings.Compare(s1[i:i+j], s2[i:i+j]) - got := call(memcmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j)) - if sign(int32(got)) != want { - t.Errorf("strcmp(%d, %d, %d) = %d, want %d", - ptr1+i, ptr2+i, j, int32(got), want) - } - } + b.SetBytes(int64(len(source))) + b.ResetTimer() + for range b.N { + call(strstr, ptr1, ptr2) } } -func Test_strcmp(t *testing.T) { - const s1 string = "" + - "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + - "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + - "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + - "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + - "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + - "\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + - "\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + - "\x7f\xf3\x93\x01\x00\x01" - const s2 string = "" + - "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + - "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + - "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + - "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + - "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + - "\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + - "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + - "\x80\xf3\x93\x01\x00\x02" - - ptr2 := len(memory) - len(s2) - 1 +func Benchmark_strcasestr(b *testing.B) { + needle := "MEMCPY(dest, src, slen)" clear(memory) - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) + copy(memory[ptr1:], source) + copy(memory[ptr2:], needle) - for i := range len(s1) + 1 { - want := strings.Compare(term(s1[i:]), term(s2[i:])) - got := call(strcmp, uint64(ptr1+i), uint64(ptr2+i)) - if sign(int32(got)) != want { - t.Errorf("strcmp(%d, %d) = %d, want %d", - ptr1+i, ptr2+i, int32(got), want) - } - } -} - -func Test_strncmp(t *testing.T) { - const s1 string = "" + - "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + - "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + - "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + - "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + - "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + - "\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + - "\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + - "\x7f\xf3\x93\x01\x00\x01" - const s2 string = "" + - "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + - "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + - "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + - "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + - "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + - "\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + - "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + - "\x80\xf3\x93\x01\x00\x02" - - ptr2 := len(memory) - len(s2) - 1 - - clear(memory) - copy(memory[ptr1:], s1) - copy(memory[ptr2:], s2) - - for i := range len(s1) + 1 { - for j := range len(s1) - i + 1 { - want := strings.Compare(term(s1[i:i+j]), term(s2[i:i+j])) - got := call(strncmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j)) - if sign(int32(got)) != want { - t.Errorf("strncmp(%d, %d, %d) = %d, want %d", - ptr1+i, ptr2+i, j, int32(got), want) - } - } + b.SetBytes(int64(len(source))) + b.ResetTimer() + for range b.N { + call(strcasestr, ptr1, ptr2) } } @@ -549,6 +453,118 @@ func Test_strrchr(t *testing.T) { } } +func Test_memcmp(t *testing.T) { + const s1 string = "" + + "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + + "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + + "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + + "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + + "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + + "\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + + "\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + + "\x7f\xf3\x93\x01\x00\x01" + const s2 string = "" + + "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + + "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + + "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + + "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + + "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + + "\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + + "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + + "\x80\xf3\x93\x01\x00\x02" + + ptr2 := len(memory) - len(s2) + + clear(memory) + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + + for i := range len(s1) + 1 { + for j := range len(s1) - i { + want := strings.Compare(s1[i:i+j], s2[i:i+j]) + got := call(memcmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j)) + if sign(int32(got)) != want { + t.Errorf("strcmp(%d, %d, %d) = %d, want %d", + ptr1+i, ptr2+i, j, int32(got), want) + } + } + } +} + +func Test_strcmp(t *testing.T) { + const s1 string = "" + + "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + + "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + + "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + + "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + + "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + + "\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + + "\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + + "\x7f\xf3\x93\x01\x00\x01" + const s2 string = "" + + "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + + "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + + "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + + "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + + "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + + "\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + + "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + + "\x80\xf3\x93\x01\x00\x02" + + ptr2 := len(memory) - len(s2) - 1 + + clear(memory) + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + + for i := range len(s1) + 1 { + want := strings.Compare(term(s1[i:]), term(s2[i:])) + got := call(strcmp, uint64(ptr1+i), uint64(ptr2+i)) + if sign(int32(got)) != want { + t.Errorf("strcmp(%d, %d) = %d, want %d", + ptr1+i, ptr2+i, int32(got), want) + } + } +} + +func Test_strncmp(t *testing.T) { + const s1 string = "" + + "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + + "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + + "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + + "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + + "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + + "\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + + "\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + + "\x7f\xf3\x93\x01\x00\x01" + const s2 string = "" + + "\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" + + "\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" + + "\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" + + "\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" + + "\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" + + "\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" + + "\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" + + "\x80\xf3\x93\x01\x00\x02" + + ptr2 := len(memory) - len(s2) - 1 + + clear(memory) + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + + for i := range len(s1) + 1 { + for j := range len(s1) - i + 1 { + want := strings.Compare(term(s1[i:i+j]), term(s2[i:i+j])) + got := call(strncmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j)) + if sign(int32(got)) != want { + t.Errorf("strncmp(%d, %d, %d) = %d, want %d", + ptr1+i, ptr2+i, j, int32(got), want) + } + } + } +} + func Test_strspn(t *testing.T) { for length := range 64 { for pos := range length + 2 { @@ -675,7 +691,6 @@ var searchTests = []searchTest{ {"barfoobarfoo", "foo", 3}, {"foo", "", 0}, {"foo", "o", 1}, - {"abcABCabc", "A", 3}, {"jrzm6jjhorimglljrea4w3rlgosts0w2gia17hno2td4qd1jz", "jz", 47}, {"ekkuk5oft4eq0ocpacknhwouic1uua46unx12l37nioq9wbpnocqks6", "ks6", 52}, {"999f2xmimunbuyew5vrkla9cpwhmxan8o98ec", "98ec", 33}, @@ -792,35 +807,9 @@ var searchTests = []searchTest{ {"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5}, } -func Test_strstr(t *testing.T) { - tt := append(searchTests, - searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, - searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, - ) - - for i := range tt { - ptr1 := uint64(len(memory) - len(tt[i].haystk) - 1) - - clear(memory) - copy(memory[ptr1:], tt[i].haystk) - copy(memory[ptr2:], tt[i].needle) - - var want uint64 - if tt[i].out >= 0 { - want = ptr1 + uint64(tt[i].out) - } - - got := call(strstr, uint64(ptr1), uint64(ptr2)) - if got != want { - t.Errorf("strstr(%q, %q) = %d, want %d", - tt[i].haystk, tt[i].needle, - uint32(got), uint32(want)) - } - } -} - func Test_memmem(t *testing.T) { tt := append(searchTests, + searchTest{"abcABCabc", "A", 3}, searchTest{"fofofofofofo\x00foffofoobar", "foffof", 13}, searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", 17}, ) @@ -848,42 +837,75 @@ func Test_memmem(t *testing.T) { } } -func Fuzz_strstr(f *testing.F) { +func Test_strstr(t *testing.T) { tt := append(searchTests, + searchTest{"abcABCabc", "A", 3}, searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, ) for i := range tt { - f.Add(tt[i].haystk, tt[i].needle) - } + ptr1 := uint64(len(memory) - len(tt[i].haystk) - 1) - f.Fuzz(func(t *testing.T, haystk, needle string) { - if len(haystk) > 128 || len(needle) > 32 { - t.SkipNow() - } - clear(memory[ptr1 : ptr1+256]) - clear(memory[ptr2 : ptr2+256]) - copy(memory[ptr1:], haystk) - copy(memory[ptr2:], needle) + clear(memory) + copy(memory[ptr1:], tt[i].haystk) + copy(memory[ptr2:], tt[i].needle) - want := strings.Index(term(haystk), term(needle)) - if want >= 0 { - want = ptr1 + want - } else { - want = 0 + var want uint64 + if tt[i].out >= 0 { + want = ptr1 + uint64(tt[i].out) } got := call(strstr, uint64(ptr1), uint64(ptr2)) - if uint32(got) != uint32(want) { + if got != want { t.Errorf("strstr(%q, %q) = %d, want %d", - haystk, needle, uint32(got), uint32(want)) + tt[i].haystk, tt[i].needle, + uint32(got), uint32(want)) } - }) + } +} + +func Test_strcasestr(t *testing.T) { + tt := append(searchTests[1:], + searchTest{"A", "a", 0}, + searchTest{"a", "A", 0}, + searchTest{"Z", "z", 0}, + searchTest{"z", "Z", 0}, + searchTest{"@", "`", -1}, + searchTest{"`", "@", -1}, + searchTest{"[", "{", -1}, + searchTest{"{", "[", -1}, + searchTest{"abcABCabc", "A", 0}, + searchTest{"fofofofofofofoffofoobarfoo", "FoFFoF", 12}, + searchTest{"fofofofofofofOffOfoobarfoo", "FoFFoF", 12}, + searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, + searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, + ) + + for i := range tt { + ptr1 := uint64(len(memory) - len(tt[i].haystk) - 1) + + clear(memory) + copy(memory[ptr1:], tt[i].haystk) + copy(memory[ptr2:], tt[i].needle) + + var want uint64 + if tt[i].out >= 0 { + want = ptr1 + uint64(tt[i].out) + } + + got := call(strcasestr, uint64(ptr1), uint64(ptr2)) + if got != want { + t.Errorf("strcasestr(%q, %q) = %d, want %d", + tt[i].haystk, tt[i].needle, + uint32(got), uint32(want)) + } + } } func Fuzz_memmem(f *testing.F) { tt := append(searchTests, + searchTest{"abcABCabc", "A", 3}, searchTest{"fofofofofofo\x00foffofoobar", "foffof", 13}, searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", 17}, ) @@ -918,6 +940,41 @@ func Fuzz_memmem(f *testing.F) { }) } +func Fuzz_strstr(f *testing.F) { + tt := append(searchTests, + searchTest{"abcABCabc", "A", 3}, + searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, + searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, + ) + + for i := range tt { + f.Add(tt[i].haystk, tt[i].needle) + } + + f.Fuzz(func(t *testing.T, haystk, needle string) { + if len(haystk) > 128 || len(needle) > 32 { + t.SkipNow() + } + clear(memory[ptr1 : ptr1+256]) + clear(memory[ptr2 : ptr2+256]) + copy(memory[ptr1:], haystk) + copy(memory[ptr2:], needle) + + want := strings.Index(term(haystk), term(needle)) + if want >= 0 { + want = ptr1 + want + } else { + want = 0 + } + + got := call(strstr, uint64(ptr1), uint64(ptr2)) + if uint32(got) != uint32(want) { + t.Errorf("strstr(%q, %q) = %d, want %d", + haystk, needle, uint32(got), uint32(want)) + } + }) +} + func fill(s []byte, v byte) { for i := range s { s[i] = v diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 918e81e..55cc61d 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -3,7 +3,9 @@ #ifndef _WASM_SIMD128_STRING_H #define _WASM_SIMD128_STRING_H +#include #include +#include #include #include <__macro_PAGESIZE.h> @@ -36,8 +38,6 @@ void *memmove(void *dest, const void *src, size_t n) { #ifdef __wasm_simd128__ -// SIMD implementations of string.h functions. - __attribute__((weak)) int memcmp(const void *v1, const void *v2, size_t n) { // Scalar algorithm. @@ -80,49 +80,6 @@ int memcmp(const void *v1, const void *v2, size_t n) { return 0; } -#ifdef __OPTIMIZE_SIZE__ - -// __memcmpeq is the same as memcmp but only compares for equality. - -#define __memcmpeq(v1, v2, n) memcmp(v1, v2, n) - -#else // __OPTIMIZE_SIZE__ - -static int __memcmpeq(const void *v1, const void *v2, size_t n) { - // Scalar algorithm. - if (n < sizeof(v128_t)) { - const unsigned char *u1 = (unsigned char *)v1; - const unsigned char *u2 = (unsigned char *)v2; - while (n--) { - if (*u1 != *u2) return 1; - u1++; - u2++; - } - return 0; - } - - // memcmpeq is allowed to read up to n bytes from each object. - // Unaligned loads handle the case where the objects - // have mismatching alignments. - const v128_t *w1 = (v128_t *)v1; - const v128_t *w2 = (v128_t *)v2; - while (n) { - // Find any single bit difference. - if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { - return 1; - } - // This makes n a multiple of sizeof(v128_t) - // for every iteration except the first. - size_t align = (n - 1) % sizeof(v128_t) + 1; - w1 = (v128_t *)((char *)w1 + align); - w2 = (v128_t *)((char *)w2 + align); - n -= align; - } - return 0; -} - -#endif // __OPTIMIZE_SIZE__ - __attribute__((weak)) void *memchr(const void *v, int c, size_t n) { // When n is zero, a function that locates a character finds no occurrence. @@ -364,8 +321,31 @@ char *strrchr(const char *s, int c) { return (char *)memrchr(s, c, strlen(s) + 1); } +// SIMDized check which bytes are in a set // http://0x80.pl/notesen/2018-10-18-simd-byte-lookup.html +typedef struct { + __u8x16 l; + __u8x16 h; +} __wasm_v128_bitmap256_t; + +__attribute__((always_inline)) +static void __wasm_v128_setbit(__wasm_v128_bitmap256_t *bitmap, int i) { + uint8_t hi_nibble = (uint8_t)i >> 4; + uint8_t lo_nibble = (uint8_t)i & 0xf; + bitmap->l[lo_nibble] |= 1 << (hi_nibble - 0); + bitmap->h[lo_nibble] |= 1 << (hi_nibble - 8); +} + +__attribute__((always_inline)) +static int __wasm_v128_chkbit(__wasm_v128_bitmap256_t bitmap, int i) { + uint8_t hi_nibble = (uint8_t)i >> 4; + uint8_t lo_nibble = (uint8_t)i & 0xf; + uint8_t bitmask = 1 << (hi_nibble & 0x7); + uint8_t bitset = (hi_nibble < 8 ? bitmap.l : bitmap.h)[lo_nibble]; + return bitmask & bitset; +} + #ifndef __wasm_relaxed_simd__ #define wasm_i8x16_relaxed_laneselect wasm_v128_bitselect @@ -373,49 +353,25 @@ char *strrchr(const char *s, int c) { #endif // __wasm_relaxed_simd__ -#define _WASM_SIMD128_BITMAP256_T \ - struct { \ - __u8x16 l; \ - __u8x16 h; \ - } +__attribute__((always_inline)) +static v128_t __wasm_v128_chkbits(__wasm_v128_bitmap256_t bitmap, v128_t v) { + v128_t hi_nibbles = wasm_u8x16_shr(v, 4); + v128_t lo_nibbles = v & wasm_u8x16_const_splat(0xf); -#define _WASM_SIMD128_SETBIT(bitmap, i) \ - ({ \ - uint8_t _c = (uint8_t)(i); \ - uint8_t _hi_nibble = _c >> 4; \ - uint8_t _lo_nibble = _c & 0xf; \ - bitmap.l[_lo_nibble] |= 1 << (_hi_nibble - 0); \ - bitmap.h[_lo_nibble] |= 1 << (_hi_nibble - 8); \ - }) + v128_t bitmask_lookup = wasm_u8x16_const(1, 2, 4, 8, 16, 32, 64, 128, // + 1, 2, 4, 8, 16, 32, 64, 128); -#define _WASM_SIMD128_CHKBIT(bitmap, i) \ - ({ \ - uint8_t _c = (uint8_t)(i); \ - uint8_t _hi_nibble = _c >> 4; \ - uint8_t _lo_nibble = _c & 0xf; \ - uint8_t _bitmask = 1 << (_hi_nibble & 0x7); \ - uint8_t _bitset = (_hi_nibble < 8 ? bitmap.l : bitmap.h)[_lo_nibble]; \ - _bitmask & _bitset; \ - }) + v128_t bitmask = wasm_i8x16_relaxed_swizzle(bitmask_lookup, hi_nibbles); + v128_t bitsets = wasm_i8x16_relaxed_laneselect( + wasm_i8x16_relaxed_swizzle(bitmap.l, lo_nibbles), + wasm_i8x16_relaxed_swizzle(bitmap.h, lo_nibbles), + wasm_i8x16_lt(hi_nibbles, wasm_u8x16_const_splat(8))); -#define _WASM_SIMD128_CHKBITS(bitmap, v) \ - ({ \ - v128_t _w = v; \ - v128_t _hi_nibbles = wasm_u8x16_shr(_w, 4); \ - v128_t _lo_nibbles = _w & wasm_u8x16_const_splat(0xf); \ - \ - v128_t _bitmask_lookup = wasm_u8x16_const(1, 2, 4, 8, 16, 32, 64, 128, \ - 1, 2, 4, 8, 16, 32, 64, 128); \ - \ - v128_t _bitmask = \ - wasm_i8x16_relaxed_swizzle(_bitmask_lookup, _hi_nibbles); \ - v128_t _bitsets = wasm_i8x16_relaxed_laneselect( \ - wasm_i8x16_relaxed_swizzle(bitmap.l, _lo_nibbles), \ - wasm_i8x16_relaxed_swizzle(bitmap.h, _lo_nibbles), \ - wasm_i8x16_lt(_hi_nibbles, wasm_u8x16_const_splat(8))); \ - \ - wasm_i8x16_eq(_bitsets & _bitmask, _bitmask); \ - }) + return wasm_i8x16_eq(bitsets & bitmask, bitmask); +} + +#undef wasm_i8x16_relaxed_laneselect +#undef wasm_i8x16_relaxed_swizzle __attribute__((weak)) size_t strspn(const char *s, const char *c) { @@ -443,15 +399,15 @@ size_t strspn(const char *s, const char *c) { return s - a; } - _WASM_SIMD128_BITMAP256_T bitmap = {}; + __wasm_v128_bitmap256_t bitmap = {}; for (; *c; c++) { - _WASM_SIMD128_SETBIT(bitmap, *c); + __wasm_v128_setbit(&bitmap, *c); // Terminator IS NOT on the bitmap. } for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) { - const v128_t cmp = _WASM_SIMD128_CHKBITS(bitmap, wasm_v128_load(w)); + const v128_t cmp = __wasm_v128_chkbits(bitmap, wasm_v128_load(w)); // Bitmask is slow on AArch64, all_true is much faster. if (!wasm_i8x16_all_true(cmp)) { // Find the offset of the first zero bit (little-endian). @@ -462,7 +418,7 @@ size_t strspn(const char *s, const char *c) { } // Scalar algorithm. - for (s = (char *)w; _WASM_SIMD128_CHKBIT(bitmap, *s); s++); + for (s = (char *)w; __wasm_v128_chkbit(bitmap, *s); s++); return s - a; } @@ -475,16 +431,16 @@ size_t strcspn(const char *s, const char *c) { const v128_t *w = (v128_t *)s; const char *const a = s; - _WASM_SIMD128_BITMAP256_T bitmap = {}; + __wasm_v128_bitmap256_t bitmap = {}; for (;;) { - _WASM_SIMD128_SETBIT(bitmap, *c); + __wasm_v128_setbit(&bitmap, *c); // Terminator IS on the bitmap. if (!*c++) break; } for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) { - const v128_t cmp = _WASM_SIMD128_CHKBITS(bitmap, wasm_v128_load(w)); + const v128_t cmp = __wasm_v128_chkbits(bitmap, wasm_v128_load(w)); // Bitmask is slow on AArch64, any_true is much faster. if (wasm_v128_any_true(cmp)) { // Find the offset of the first one bit (little-endian). @@ -495,24 +451,23 @@ size_t strcspn(const char *s, const char *c) { } // Scalar algorithm. - for (s = (char *)w; !_WASM_SIMD128_CHKBIT(bitmap, *s); s++); + for (s = (char *)w; !__wasm_v128_chkbit(bitmap, *s); s++); return s - a; } -#undef wasm_i8x16_relaxed_laneselect -#undef wasm_i8x16_relaxed_swizzle +// SIMD-friendly algorithms for substring searching +// http://0x80.pl/notesen/2016-11-28-simd-strfind.html -#undef _WASM_SIMD128_SETBIT -#undef _WASM_SIMD128_CHKBIT -#undef _WASM_SIMD128_CHKBITS -#undef _WASM_SIMD128_BITMAP256_T +// For haystacks of known length and large enough needles, +// Boyer-Moore's bad-character rule may be useful, +// as proposed by Horspool and Raita. +// https://www-igm.univ-mlv.fr/~lecroq/string/node14.html +// https://www-igm.univ-mlv.fr/~lecroq/string/node18.html +// https://www-igm.univ-mlv.fr/~lecroq/string/node22.html static const char *__memmem(const char *haystk, size_t sh, const char *needle, size_t sn, uint8_t bmbc[256]) { - // https://www-igm.univ-mlv.fr/~lecroq/string/node22.html - // http://0x80.pl/notesen/2016-11-28-simd-strfind.html - // We've handled empty and single character needles. // The needle is not longer than the haystack. __builtin_assume(2 <= sn && sn <= sh); @@ -525,7 +480,7 @@ static const char *__memmem(const char *haystk, size_t sh, const v128_t fst = wasm_i8x16_splat(needle[0]); const v128_t lst = wasm_i8x16_splat(needle[i]); - // The last haystk offset for which loading blk_lst is safe. + // The last haystack offset for which loading blk_lst is safe. const char *H = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE - i - sizeof(v128_t)); @@ -545,7 +500,7 @@ static const char *__memmem(const char *haystk, size_t sh, size_t ctz = __builtin_ctz(mask); // The match may be after the end of the haystack. if (ctz + sn > sh) return NULL; - if (!__memcmpeq(haystk + ctz + 1, needle + 1, sn - 1)) { + if (!bcmp(haystk + ctz + 1, needle + 1, sn - 1)) { return haystk + ctz; } } @@ -556,8 +511,8 @@ static const char *__memmem(const char *haystk, size_t sh, // Have we reached the end of the haystack? if (!wasm_i8x16_all_true(blk_fst)) return NULL; } else { - // Apply the bad-character rule to the last checked - // character of the haystack. + // Apply the bad-character rule to the rightmost + // character of the window. if (bmbc) skip += bmbc[(unsigned char)haystk[sn - 1 + 15]]; // Have we reached the end of the haystack? if (__builtin_sub_overflow(sh, skip, &sh)) return NULL; @@ -588,12 +543,12 @@ void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) { if (sn > sh) return NULL; // Skip to the first matching character using memchr, - // handling single character needles. + // thereby handling single character needles. const char *needle = (char *)vn; const char *haystk = (char *)memchr(vh, *needle, sh); if (!haystk || sn == 1) return (void *)haystk; - // The haystack got shorter, is the needle now longer? + // The haystack got shorter, is the needle now longer than it? sh -= haystk - (char *)vh; if (sn > sh) return NULL; @@ -602,12 +557,6 @@ void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) { return (void *)__memmem(haystk, sh, needle, sn, NULL); } - // https://www-igm.univ-mlv.fr/~lecroq/string/node14.html - - // We've handled empty and single character needles. - // The needle is not longer than the haystack. - __builtin_assume(2 <= sn && sn <= sh); - // Compute Boyer-Moore's bad-character shift function. // Only the last 255 characters of the needle matter for shifts up to 255, // which is good enough for most needles. @@ -640,13 +589,71 @@ char *strstr(const char *haystk, const char *needle) { if (!needle[0]) return (char *)haystk; // Skip to the first matching character using strchr, - // handling single character needles. + // thereby handling single character needles. haystk = strchr(haystk, *needle); if (!haystk || !needle[1]) return (char *)haystk; return (char *)__memmem(haystk, SIZE_MAX, needle, strlen(needle), NULL); } +__attribute__((weak)) +char *strcasestr(const char *haystk, const char *needle) { + // Return immediately on empty needle. + if (!needle[0]) return (char *)haystk; + + // We've handled empty needles. + size_t sn = strlen(needle); + __builtin_assume(sn >= 1); + + // Find the farthest character not equal to the first one. + size_t i = sn - 1; + while (i > 0 && needle[0] == needle[i]) i--; + if (i == 0) i = sn - 1; + + const v128_t fst = wasm_i8x16_splat(tolower(needle[0])); + const v128_t lst = wasm_i8x16_splat(tolower(needle[i])); + + // The last haystk offset for which loading blk_lst is safe. + const char *H = + (char *)(__builtin_wasm_memory_size(0) * PAGESIZE - i - sizeof(v128_t)); + + while (haystk <= H) { + const v128_t blk_fst = __tolower8x16(wasm_v128_load((v128_t *)(haystk))); + const v128_t blk_lst = __tolower8x16(wasm_v128_load((v128_t *)(haystk + i))); + const v128_t eq_fst = wasm_i8x16_eq(fst, blk_fst); + const v128_t eq_lst = wasm_i8x16_eq(lst, blk_lst); + + const v128_t cmp = eq_fst & eq_lst; + if (wasm_v128_any_true(cmp)) { + // The terminator may come before the match. + if (!wasm_i8x16_all_true(blk_fst)) break; + // Find the offset of the first one bit (little-endian). + // Each iteration clears that bit, tries again. + for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) { + size_t ctz = __builtin_ctz(mask); + if (!strncasecmp(haystk + ctz + 1, needle + 1, sn - 1)) { + return (char *)haystk + ctz; + } + } + } + + // Have we reached the end of the haystack? + if (!wasm_i8x16_all_true(blk_fst)) return NULL; + haystk += sizeof(v128_t); + } + + // Scalar algorithm. + for (;;) { + for (size_t i = 0;; i++) { + if (sn == i) return (char *)haystk; + if (!haystk[i]) return NULL; + if (tolower(needle[i]) != tolower(haystk[i])) break; + } + haystk++; + } + return NULL; +} + // Given the above SIMD implementations, // these are best implemented as // small wrappers over those functions. diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h index 0e00d58..61f92b5 100644 --- a/sqlite3/libc/strings.h +++ b/sqlite3/libc/strings.h @@ -5,7 +5,6 @@ #include #include -#include #include #include <__macro_PAGESIZE.h> @@ -15,14 +14,52 @@ extern "C" { #ifdef __wasm_simd128__ +#ifdef __OPTIMIZE_SIZE__ + +// bcmp is the same as memcmp but only compares for equality. +int bcmp(const void *v1, const void *v2, size_t n); + +#else // __OPTIMIZE_SIZE__ + __attribute__((weak)) int bcmp(const void *v1, const void *v2, size_t n) { - return __memcmpeq(v1, v2, n); + // Scalar algorithm. + if (n < sizeof(v128_t)) { + const unsigned char *u1 = (unsigned char *)v1; + const unsigned char *u2 = (unsigned char *)v2; + while (n--) { + if (*u1 != *u2) return 1; + u1++; + u2++; + } + return 0; + } + + // memcmpeq is allowed to read up to n bytes from each object. + // Unaligned loads handle the case where the objects + // have mismatching alignments. + const v128_t *w1 = (v128_t *)v1; + const v128_t *w2 = (v128_t *)v2; + while (n) { + // Find any single bit difference. + if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { + return 1; + } + // This makes n a multiple of sizeof(v128_t) + // for every iteration except the first. + size_t align = (n - 1) % sizeof(v128_t) + 1; + w1 = (v128_t *)((char *)w1 + align); + w2 = (v128_t *)((char *)w2 + align); + n -= align; + } + return 0; } +#endif // __OPTIMIZE_SIZE__ + static v128_t __tolower8x16(v128_t v) { - __i8x16 i; - i = v + wasm_i8x16_splat(INT8_MAX - ('Z')); + __i8x16 i = v; + i = i + wasm_i8x16_splat(INT8_MAX - ('Z')); i = i > wasm_i8x16_splat(INT8_MAX - ('Z' - 'A' + 1)); i = i & wasm_i8x16_splat('a' - 'A'); return v | i;