From 784f82f42f61902e2f2ef63f318511004bdd1e77 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Wed, 25 Jun 2025 15:02:53 +0100 Subject: [PATCH] Avoid UB. --- sqlite3/libc/libc.wasm | Bin 3039 -> 3071 bytes sqlite3/libc/libc.wat | 820 +++++++++++++++++++++-------------------- sqlite3/libc/string.h | 43 ++- 3 files changed, 449 insertions(+), 414 deletions(-) diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index a55a4d2353045f6ec065b531d23a03e7b74f5015..43a150b96899c31db21840dfccef93f68c5b7e75 100755 GIT binary patch delta 1039 zcmZvbziL!L6vpTOy_=hLgDBAi;+cscQN*ONu-XB)Q7}rZEG&#g*hR>$!6wDEwi4zE z1RFcW7w`cLR@UYb?5r;9cV_Q~4WwCS&&>JreZO=6tWH-y{*<4;o%~kySE*F3j#Xdv zNe#M!MC^^dCKct#8w5o>90%11BS<#A;F`@~n>~JMo7b@>eKZb|{~9lrRYjW2Fs_UO z0*?_@ab@V0M)6#tO*ehc_A+h@ua|YzJ|k|=xw~CaRZ;YCLGmETVMKS*BNFMkdB6UF zhn5(6VY;B3{ekJH9eeF2#$p*fSy#5jCC`wDfexri-fk7Rln12JGrL`&6ZGk{?jCAI zI{mVP3u4#Pu?t(DjMB%pCV5wg;LtYD%2s)sTl~Xe!~OXI@=2d zaAr1;2At_szl9a$+y~3@*@t|vGapc!qtxbdhk34_*#kwRA){CC{t?&mL1dJj|0Ow&#*{Sd-Z;S33{PyHk+rRX!h{9$g8%%lwm^l ziZA9+b821CSsHe-;zU-#TQr0WsK}7C?!#Q%R=m~>Q86QSd_wtmMoBRxJLdy1D9X#BXo_uTUsFV?=Uz5T@Bd_4Osno}-hBaTEVN)lTG zM$8ZQtt8=jZ54{V9l8wFSNl%1OtE&;nRYjJns(1OBoFtUj<3q{z9>kNs{E*SoYZ!N zBU$@VWrZv$qYDVCi~x77_RPw@EU+=Q5htF;9}QShcs9fb(lf^@-+>ViQ+2$4%!O!p z$2*bZk(`t53oMmkYOY>O^r9RvjMDnN0EyT)3ODtw0mzCO_OZGu-i4ofl0~hM9!~T_ z8Sh{8)_=8^=E3HP67onl#BVbO@v2NbBMoah1M_fZ#y2#UyUc&vn7bbJ(Cj&BuU^Np z4m?{QX`k1J^ZSe?EUUwvY`mf&9nLEGze( F{1<8=zz6^U diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index c9576d1..27472d1 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -627,9 +627,8 @@ (func $memchr (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (local $3 i32) (local $4 i32) - (local $5 i32) + (local $5 v128) (local $6 v128) - (local $7 v128) (block $block2 (block $block (br_if $block @@ -653,13 +652,8 @@ (br_if $block1 (i32.eqz (v128.any_true - (local.tee $6 + (local.tee $5 (i8x16.eq - (local.tee $7 - (i8x16.splat - (local.get $1) - ) - ) (v128.load (local.tee $2 (i32.and @@ -668,6 +662,11 @@ ) ) ) + (local.tee $6 + (i8x16.splat + (local.get $1) + ) + ) ) ) ) @@ -675,10 +674,10 @@ ) (br_if $block1 (i32.eqz - (local.tee $5 + (local.tee $1 (i32.and (i8x16.bitmask - (local.get $6) + (local.get $5) ) (i32.shl (i32.const -1) @@ -688,14 +687,14 @@ ) ) ) - (local.set $1 + (local.set $0 (local.get $4) ) (br $block2) ) (br_if $block (i32.gt_u - (local.tee $1 + (local.tee $0 (i32.sub (i32.add (local.get $3) @@ -709,29 +708,26 @@ ) (local.set $2 (i32.add - (i32.sub - (local.get $0) - (local.get $3) - ) + (local.get $2) (i32.const 16) ) ) (loop $label (if (v128.any_true - (local.tee $6 + (local.tee $5 (i8x16.eq - (local.get $7) (v128.load (local.get $2) ) + (local.get $6) ) ) ) (then - (local.set $5 + (local.set $1 (i8x16.bitmask - (local.get $6) + (local.get $5) ) ) (local.set $3 @@ -748,10 +744,10 @@ ) (br_if $label (i32.ge_u - (local.get $1) - (local.tee $1 + (local.get $0) + (local.tee $0 (i32.sub - (local.get $1) + (local.get $0) (i32.const 16) ) ) @@ -766,19 +762,19 @@ (select (i32.add (local.get $2) - (local.tee $0 + (local.tee $1 (i32.ctz - (local.get $5) + (local.get $1) ) ) ) (i32.const 0) (i32.le_u (i32.sub - (local.get $0) + (local.get $1) (local.get $3) ) - (local.get $1) + (local.get $0) ) ) ) @@ -898,13 +894,13 @@ (local $1 i32) (local $2 i32) (local $3 v128) - (block $block1 - (block $block - (br_if $block + (block $block + (if + (i32.eqz (i8x16.all_true (local.tee $3 (v128.load - (local.tee $1 + (local.tee $2 (i32.and (local.get $0) (i32.const -16) @@ -914,9 +910,9 @@ ) ) ) - (br_if $block - (i32.eqz - (local.tee $2 + (then + (br_if $block + (local.tee $1 (i32.and (i8x16.bitmask (i8x16.eq @@ -935,11 +931,22 @@ ) ) ) - (br $block1) + ) + (local.set $1 + (i32.add + (local.get $2) + (i32.const 16) + ) ) (loop $label + (local.set $2 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) (local.set $3 - (v128.load offset=16 + (v128.load (local.get $1) ) ) @@ -955,7 +962,7 @@ ) ) ) - (local.set $2 + (local.set $1 (i8x16.bitmask (i8x16.eq (local.get $3) @@ -966,10 +973,10 @@ ) (i32.add (i32.ctz - (local.get $2) + (local.get $1) ) (i32.sub - (local.get $1) + (local.get $2) (local.get $0) ) ) @@ -997,12 +1004,12 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq + (local.get $2) (local.tee $3 (i8x16.splat (local.get $1) ) ) - (local.get $2) ) ) ) @@ -1048,8 +1055,8 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $3) (local.get $2) + (local.get $3) ) ) ) @@ -1093,12 +1100,12 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq + (local.get $2) (local.tee $3 (i8x16.splat (local.get $1) ) ) - (local.get $2) ) ) ) @@ -1144,8 +1151,8 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $3) (local.get $2) + (local.get $3) ) ) ) @@ -1201,9 +1208,10 @@ (local $7 i32) (local $8 i32) (local $9 i32) + (local $scratch v128) (if (i32.eqz - (local.tee $5 + (local.tee $6 (i32.load8_u (local.get $1) ) @@ -1215,7 +1223,7 @@ ) ) ) - (local.set $6 + (local.set $5 (i32.and (local.get $0) (i32.const -16) @@ -1227,303 +1235,323 @@ (i32.const 15) ) ) - (if - (i32.load8_u offset=1 - (local.get $1) - ) - (then - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (loop $label - (v128.store - (i32.const 4064) - (local.get $4) - ) - (i32.store8 - (local.tee $9 - (i32.or - (local.tee $7 - (i32.and - (local.get $5) - (i32.const 15) - ) - ) - (i32.const 4064) - ) - ) - (i32.or - (i32.load8_u - (local.get $9) - ) - (i32.shl - (i32.const 1) - (local.tee $5 - (i32.and - (i32.shr_u - (local.get $5) - (i32.const 4) - ) - (i32.const 15) - ) - ) - ) - ) - ) - (v128.store - (i32.const 4080) - (local.get $3) - ) - (i32.store8 - (local.tee $7 - (i32.or - (local.get $7) - (i32.const 4080) - ) - ) - (i32.or - (i32.load8_u - (local.get $7) - ) - (i32.shl - (i32.const 1) - (i32.sub - (local.get $5) - (i32.const 8) - ) - ) - ) - ) - (local.set $5 - (i32.load8_u - (local.get $1) - ) - ) - (local.set $4 - (v128.load - (i32.const 4064) - ) - ) - (local.set $3 - (v128.load - (i32.const 4080) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (br_if $label - (local.get $5) - ) - ) + (block $block3 + (block $block1 (block $block (if - (i32.eqz - (i8x16.all_true - (local.tee $2 - (i8x16.eq - (v128.and - (v128.or - (i8x16.swizzle - (local.get $4) - (v128.and - (local.tee $2 - (v128.load - (local.get $6) - ) - ) - (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) - ) - ) - (i8x16.swizzle - (local.get $3) - (v128.and - (local.get $2) - (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) - ) - ) - ) - (local.tee $2 - (i8x16.swizzle - (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) - (i8x16.shr_u - (local.get $2) - (i32.const 4) - ) - ) - ) - ) - (local.get $2) - ) - ) - ) - ) - (then - (br_if $block - (local.tee $1 - (i32.and - (i32.xor - (i8x16.bitmask - (local.get $2) - ) - (i32.const 65535) - ) - (i32.shl - (i32.const -1) - (local.get $8) - ) - ) - ) - ) - ) - ) - (loop $label1 - (local.set $2 - (v128.load offset=16 - (local.get $6) - ) - ) - (local.set $6 - (i32.add - (local.get $6) - (i32.const 16) - ) - ) - (br_if $label1 - (i8x16.all_true - (local.tee $2 - (i8x16.eq - (v128.and - (v128.or - (i8x16.swizzle - (local.get $4) - (v128.and - (local.get $2) - (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) - ) - ) - (i8x16.swizzle - (local.get $3) - (v128.and - (local.get $2) - (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) - ) - ) - ) - (local.tee $2 - (i8x16.swizzle - (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) - (i8x16.shr_u - (local.get $2) - (i32.const 4) - ) - ) - ) - ) - (local.get $2) - ) - ) - ) - ) - ) - (local.set $1 - (i32.xor - (i8x16.bitmask - (local.get $2) - ) - (i32.const 65535) - ) - ) - ) - (return - (i32.add - (i32.ctz + (i32.load8_u offset=1 (local.get $1) ) - (i32.sub - (local.get $6) - (local.get $0) - ) - ) - ) - ) - ) - (block $block2 - (block $block1 - (br_if $block1 - (i8x16.all_true - (local.tee $3 - (i8x16.eq - (local.tee $4 - (i8x16.splat - (local.get $5) + (then + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (loop $label + (v128.store + (i32.const 4064) + (local.get $4) + ) + (i32.store8 + (local.tee $9 + (i32.or + (local.tee $7 + (i32.and + (local.get $6) + (i32.const 15) + ) + ) + (i32.const 4064) + ) + ) + (i32.or + (i32.load8_u + (local.get $9) + ) + (i32.shl + (i32.const 1) + (local.tee $6 + (i32.shr_u + (local.get $6) + (i32.const 4) + ) + ) + ) ) ) - (v128.load + (v128.store + (i32.const 4080) + (local.get $3) + ) + (i32.store8 + (local.tee $7 + (i32.or + (local.get $7) + (i32.const 4080) + ) + ) + (i32.or + (i32.load8_u + (local.get $7) + ) + (i32.shl + (i32.const 1) + (i32.sub + (local.get $6) + (i32.const 8) + ) + ) + ) + ) + (local.set $6 + (i32.load8_u + (local.get $1) + ) + ) + (local.set $4 + (v128.load + (i32.const 4064) + ) + ) + (local.set $3 + (v128.load + (i32.const 4080) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label (local.get $6) ) ) + (br_if $block + (i32.eqz + (i8x16.all_true + (local.tee $2 + (i8x16.eq + (v128.and + (v128.or + (i8x16.swizzle + (local.get $4) + (v128.and + (local.tee $2 + (v128.load + (local.get $5) + ) + ) + (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) + ) + ) + (i8x16.swizzle + (local.get $3) + (v128.and + (local.get $2) + (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) + ) + ) + ) + (local.tee $2 + (i8x16.swizzle + (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) + (i8x16.shr_u + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + (local.get $2) + ) + ) + ) + ) + ) + (br $block1) ) ) - ) - (br_if $block1 - (i32.eqz - (local.tee $5 - (i32.and - (i32.xor - (i8x16.bitmask - (local.get $3) + (block $block2 + (if + (i32.eqz + (i8x16.all_true + (local.tee $3 + (i8x16.eq + (v128.load + (local.get $5) + ) + (local.tee $4 + (i8x16.splat + (local.get $6) + ) + ) + ) ) - (i32.const 65535) ) - (i32.shl - (i32.const -1) - (local.get $8) + ) + (then + (br_if $block2 + (local.tee $1 + (i32.and + (i32.xor + (i8x16.bitmask + (local.get $3) + ) + (i32.const 65535) + ) + (i32.shl + (i32.const -1) + (local.get $8) + ) + ) + ) ) ) ) + (local.set $1 + (i32.add + (local.get $5) + (i32.const 16) + ) + ) + (loop $label1 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 16) + ) + ) + (br_if $label1 + (i8x16.all_true + (local.tee $3 + (i8x16.eq + (block (result v128) + (local.set $scratch + (v128.load + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (local.get $scratch) + ) + (local.get $4) + ) + ) + ) + ) + ) + (local.set $1 + (i32.xor + (i8x16.bitmask + (local.get $3) + ) + (i32.const 65535) + ) + ) + ) + (return + (i32.add + (i32.ctz + (local.get $1) + ) + (i32.sub + (local.get $5) + (local.get $0) + ) + ) ) ) - (local.set $1 - (local.get $6) - ) - (br $block2) - ) - (loop $label2 - (local.set $3 - (v128.load offset=16 - (local.get $6) + (br_if $block3 + (local.tee $1 + (i32.and + (i32.xor + (i8x16.bitmask + (local.get $2) + ) + (i32.const 65535) + ) + (i32.shl + (i32.const -1) + (local.get $8) + ) + ) ) ) - (local.set $6 - (local.tee $1 - (i32.add - (local.get $6) - (i32.const 16) - ) + ) + (local.set $1 + (i32.add + (local.get $5) + (i32.const 16) + ) + ) + (loop $label2 + (local.set $2 + (v128.load + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 16) ) ) (br_if $label2 (i8x16.all_true - (local.tee $3 + (local.tee $2 (i8x16.eq - (local.get $4) - (local.get $3) + (v128.and + (v128.or + (i8x16.swizzle + (local.get $4) + (v128.and + (local.get $2) + (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) + ) + ) + (i8x16.swizzle + (local.get $3) + (v128.and + (local.get $2) + (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) + ) + ) + ) + (local.tee $2 + (i8x16.swizzle + (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) + (i8x16.shr_u + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + (local.get $2) ) ) ) ) ) - (local.set $5 + (local.set $1 (i32.xor (i8x16.bitmask - (local.get $3) + (local.get $2) ) (i32.const 65535) ) @@ -1531,26 +1559,25 @@ ) (i32.add (i32.ctz - (local.get $5) + (local.get $1) ) (i32.sub - (local.get $1) + (local.get $5) (local.get $0) ) ) ) (func $strcspn (param $0 i32) (param $1 i32) (result i32) - (local $2 v128) - (local $3 v128) - (local $4 v128) + (local $2 i32) + (local $3 i32) + (local $4 i32) (local $5 i32) - (local $6 i32) - (local $7 i32) - (local $8 i32) - (local $9 i32) + (local $6 v128) + (local $7 v128) + (local $8 v128) (block $block (if - (local.tee $6 + (local.tee $3 (i32.load8_u (local.get $1) ) @@ -1566,12 +1593,12 @@ (block $block1 (if (v128.any_true - (local.tee $3 + (local.tee $7 (v128.or (i8x16.eq - (local.tee $3 + (local.tee $7 (v128.load - (local.tee $5 + (local.tee $2 (i32.and (local.get $0) (i32.const -16) @@ -1582,12 +1609,12 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.tee $4 + (local.get $7) + (local.tee $8 (i8x16.splat - (local.get $6) + (local.get $3) ) ) - (local.get $3) ) ) ) @@ -1597,7 +1624,7 @@ (local.tee $1 (i32.and (i8x16.bitmask - (local.get $3) + (local.get $7) ) (i32.shl (i32.const -1) @@ -1612,29 +1639,29 @@ ) ) (loop $label - (local.set $3 + (local.set $7 (v128.load offset=16 - (local.get $5) + (local.get $2) ) ) - (local.set $5 + (local.set $2 (i32.add - (local.get $5) + (local.get $2) (i32.const 16) ) ) (br_if $label (i32.eqz (v128.any_true - (local.tee $3 + (local.tee $7 (v128.or (i8x16.eq - (local.get $3) + (local.get $7) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $4) - (local.get $3) + (local.get $7) + (local.get $8) ) ) ) @@ -1644,14 +1671,14 @@ ) (local.set $1 (i8x16.bitmask - (local.get $3) + (local.get $7) ) ) ) (return (i32.sub (i32.add - (local.get $5) + (local.get $2) (i32.ctz (local.get $1) ) @@ -1660,27 +1687,22 @@ ) ) ) - (local.set $8 - (i32.sub - (i32.const 0) - (local.tee $7 - (i32.and - (local.get $0) - (i32.const 15) - ) - ) + (local.set $3 + (i32.and + (local.get $0) + (i32.const -16) ) ) (loop $label1 (v128.store (i32.const 4080) - (local.get $3) + (local.get $7) ) (i32.store8 (i32.or - (local.tee $6 + (local.tee $4 (i32.and - (local.tee $5 + (local.tee $2 (i32.load8_u (local.get $1) ) @@ -1693,16 +1715,16 @@ (i32.or (i32.load8_u (i32.or - (local.get $6) + (local.get $4) (i32.const 4080) ) ) (i32.shl (i32.const 1) (i32.sub - (local.tee $9 + (local.tee $5 (i32.shr_u - (local.get $5) + (local.get $2) (i32.const 4) ) ) @@ -1713,22 +1735,22 @@ ) (v128.store (i32.const 4064) - (local.get $4) + (local.get $8) ) (i32.store8 - (local.tee $6 + (local.tee $4 (i32.or - (local.get $6) + (local.get $4) (i32.const 4064) ) ) (i32.or (i32.load8_u - (local.get $6) + (local.get $4) ) (i32.shl (i32.const 1) - (local.get $9) + (local.get $5) ) ) ) @@ -1738,144 +1760,154 @@ (i32.const 1) ) ) - (local.set $3 + (local.set $7 (v128.load (i32.const 4080) ) ) - (local.set $4 + (local.set $8 (v128.load (i32.const 4064) ) ) (br_if $label1 - (local.get $5) + (local.get $2) ) ) (block $block2 (if (v128.any_true - (local.tee $2 + (local.tee $6 (i8x16.eq (v128.and (v128.or (i8x16.swizzle - (local.get $4) + (local.get $8) (v128.and - (local.tee $2 + (local.tee $6 (v128.load - (local.tee $5 - (i32.add - (local.get $0) - (local.get $8) - ) - ) + (local.get $3) ) ) (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) ) ) (i8x16.swizzle - (local.get $3) + (local.get $7) (v128.and - (local.get $2) + (local.get $6) (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) ) ) ) - (local.tee $2 + (local.tee $6 (i8x16.swizzle (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) (i8x16.shr_u - (local.get $2) + (local.get $6) (i32.const 4) ) ) ) ) - (local.get $2) + (local.get $6) ) ) ) (then (br_if $block2 - (local.tee $1 + (local.tee $2 (i32.and (i8x16.bitmask - (local.get $2) + (local.get $6) ) (i32.shl (i32.const -1) - (local.get $7) + (i32.and + (local.get $0) + (i32.const 15) + ) ) ) ) ) ) ) + (local.set $2 + (i32.add + (local.get $3) + (i32.const 16) + ) + ) (loop $label2 - (local.set $2 - (v128.load offset=16 - (local.get $5) + (local.set $6 + (v128.load + (local.get $2) ) ) - (local.set $5 + (local.set $2 (i32.add - (local.get $5) + (local.get $2) + (i32.const 16) + ) + ) + (local.set $3 + (i32.add + (local.get $3) (i32.const 16) ) ) (br_if $label2 (i32.eqz (v128.any_true - (local.tee $2 + (local.tee $6 (i8x16.eq (v128.and (v128.or (i8x16.swizzle - (local.get $4) + (local.get $8) (v128.and - (local.get $2) + (local.get $6) (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) ) ) (i8x16.swizzle - (local.get $3) + (local.get $7) (v128.and - (local.get $2) + (local.get $6) (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) ) ) ) - (local.tee $2 + (local.tee $6 (i8x16.swizzle (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) (i8x16.shr_u - (local.get $2) + (local.get $6) (i32.const 4) ) ) ) ) - (local.get $2) + (local.get $6) ) ) ) ) ) ) - (local.set $1 + (local.set $2 (i8x16.bitmask - (local.get $2) + (local.get $6) ) ) ) (i32.add (i32.ctz - (local.get $1) + (local.get $2) ) (i32.sub - (local.get $5) + (local.get $3) (local.get $0) ) ) diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 2d0981c..86e2197 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -17,7 +17,6 @@ extern "C" { // Use the builtins if compiled with bulk memory operations. // Clang will intrinsify using SIMD for small, constant N. -// For everything else, this helps inlining. __attribute__((weak, always_inline)) void *memset(void *dest, int c, size_t n) { @@ -80,7 +79,7 @@ int memcmp(const void *vl, const void *vr, size_t n) { return 0; } -__attribute__((weak, noinline)) +__attribute__((weak)) void *memchr(const void *s, int c, size_t n) { // When n is zero, a function that locates a character finds no occurrence. // Otherwise, decrement n to ensure sub_overflow overflows @@ -92,9 +91,10 @@ void *memchr(const void *s, int c, size_t n) { // memchr must behave as if it reads characters sequentially // and stops as soon as a match is found. // Aligning ensures loads beyond the first match are safe. - // Volatile avoids compiler tricks around out of bounds loads. + // Casting through uintptr_t makes this implementation-defined, + // rather than undefined behavior. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const volatile v128_t *v = (v128_t *)((char *)s - align); + const v128_t *v = (v128_t *)((uintptr_t)s - align); const v128_t vc = wasm_i8x16_splat(c); for (;;) { @@ -126,7 +126,7 @@ void *memchr(const void *s, int c, size_t n) { } } -__attribute__((weak, noinline)) +__attribute__((weak)) void *memrchr(const void *s, int c, size_t n) { // memrchr is allowed to read up to n bytes from the object. // Search backward for the last matching character. @@ -150,18 +150,19 @@ void *memrchr(const void *s, int c, size_t n) { return NULL; } -__attribute__((weak, noinline)) +__attribute__((weak)) size_t strlen(const char *s) { // strlen must stop as soon as it finds the terminator. // Aligning ensures loads beyond the terminator are safe. + // Casting through uintptr_t makes this implementation-defined, + // rather than undefined behavior. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const volatile v128_t *v = (v128_t *)(s - align); + const v128_t *v = (v128_t *)((uintptr_t)s - align); for (;;) { - const v128_t vv = *v; // Bitmask is slow on AArch64, all_true is much faster. - if (!wasm_i8x16_all_true(vv)) { - const v128_t cmp = wasm_i8x16_eq(vv, (v128_t){}); + if (!wasm_i8x16_all_true(*v)) { + const v128_t cmp = wasm_i8x16_eq(*v, (v128_t){}); // Clear the bits corresponding to alignment (little-endian) // so we can count trailing zeros. int mask = wasm_i8x16_bitmask(cmp) >> align << align; @@ -183,14 +184,14 @@ size_t strlen(const char *s) { static char *__strchrnul(const char *s, int c) { // strchrnul must stop as soon as it finds the terminator. // Aligning ensures loads beyond the terminator are safe. - // Volatile avoids compiler tricks around out of bounds loads. + // Casting through uintptr_t makes this implementation-defined, + // rather than undefined behavior. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const volatile v128_t *v = (v128_t *)(s - align); + const v128_t *v = (v128_t *)((uintptr_t)s - align); const v128_t vc = wasm_i8x16_splat(c); for (;;) { - const v128_t vv = *v; - const v128_t cmp = wasm_i8x16_eq(vv, (v128_t){}) | wasm_i8x16_eq(vv, vc); + const v128_t cmp = wasm_i8x16_eq(*v, (v128_t){}) | wasm_i8x16_eq(*v, vc); // Bitmask is slow on AArch64, any_true is much faster. if (wasm_v128_any_true(cmp)) { // Clear the bits corresponding to alignment (little-endian) @@ -284,13 +285,14 @@ static v128_t __wasm_v128_chkbits(__wasm_v128_bitmap256_t bitmap, v128_t v) { #undef wasm_i8x16_relaxed_swizzle -__attribute__((weak, noinline)) +__attribute__((weak)) size_t strspn(const char *s, const char *c) { // strspn must stop as soon as it finds the terminator. // Aligning ensures loads beyond the terminator are safe. - // Volatile avoids compiler tricks around out of bounds loads. + // Casting through uintptr_t makes this implementation-defined, + // rather than undefined behavior. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const volatile v128_t *v = (v128_t *)(s - align); + const v128_t *v = (v128_t *)((uintptr_t)s - align); if (!c[0]) return 0; if (!c[1]) { @@ -346,15 +348,16 @@ size_t strspn(const char *s, const char *c) { } } -__attribute__((weak, noinline)) +__attribute__((weak)) size_t strcspn(const char *s, const char *c) { if (!c[0] || !c[1]) return __strchrnul(s, *c) - s; // strcspn must stop as soon as it finds the terminator. // Aligning ensures loads beyond the terminator are safe. - // Volatile avoids compiler tricks around out of bounds loads. + // Casting through uintptr_t makes this implementation-defined, + // rather than undefined behavior. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const volatile v128_t *v = (v128_t *)(s - align); + const v128_t *v = (v128_t *)((uintptr_t)s - align); __wasm_v128_bitmap256_t bitmap = {};