From 5c55d8692fb7ada98a40f2a2ee60262fba3765a6 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Fri, 4 Jul 2025 14:58:48 +0100 Subject: [PATCH] Fix bitset. --- sqlite3/libc/build.sh | 1 - sqlite3/libc/libc.wasm | Bin 3071 -> 2991 bytes sqlite3/libc/libc.wat | 818 ++++++++++++++++++-------------------- sqlite3/libc/libc_test.go | 14 +- sqlite3/libc/string.h | 96 +++-- value.go | 2 +- 6 files changed, 434 insertions(+), 497 deletions(-) diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index ea68bfa..4e13232 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -13,7 +13,6 @@ trap 'rm -f libc.c libc.tmp' EXIT cat << EOF > libc.c #include #include -#include EOF "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 43a150b96899c31db21840dfccef93f68c5b7e75..688e81451ab4a1bd18888934a90d00eb44ba06ba 100755 GIT binary patch delta 829 zcma))&1w`u5P zl1GS$58w+#Pk8`+0737zYjw}8D;_j+=;@)Ws;jlF!L|5i%ZokL<#o+bto)^?eLhy9a6BQ?wf>j(fi^e zo7^=Lf2SNI+2olUG0m0-@km^+wQ&i0`JZpf0b(VOaN430|iFq z!jMD1S^1Cbzj4nzRvBJE?xH$Mq9^ zIi;>q4yd?GA=G#D`TDcIIDGprKYn(ew}ekfzP3Z$j%~ tMOvAkwF3SwCHn}iWKnK9$-=oLRR0YR?g9!X5TYE&ggMGjug;tmKLGW={u=-Q diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 27472d1..e8a2510 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -900,7 +900,7 @@ (i8x16.all_true (local.tee $3 (v128.load - (local.tee $2 + (local.tee $1 (i32.and (local.get $0) (i32.const -16) @@ -912,7 +912,7 @@ ) (then (br_if $block - (local.tee $1 + (local.tee $2 (i32.and (i8x16.bitmask (i8x16.eq @@ -932,37 +932,23 @@ ) ) ) - (local.set $1 - (i32.add - (local.get $2) - (i32.const 16) - ) - ) (loop $label - (local.set $2 - (i32.add - (local.get $2) - (i32.const 16) - ) - ) - (local.set $3 - (v128.load - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) (br_if $label (i8x16.all_true - (local.get $3) + (local.tee $3 + (v128.load + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + ) + ) ) ) ) - (local.set $1 + (local.set $2 (i8x16.bitmask (i8x16.eq (local.get $3) @@ -973,10 +959,10 @@ ) (i32.add (i32.ctz - (local.get $1) + (local.get $2) ) (i32.sub - (local.get $2) + (local.get $1) (local.get $0) ) ) @@ -991,7 +977,7 @@ (local.tee $2 (v128.or (i8x16.eq - (local.tee $2 + (local.tee $3 (v128.load (local.tee $4 (i32.and @@ -1004,7 +990,7 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $2) + (local.get $3) (local.tee $3 (i8x16.splat (local.get $1) @@ -1034,24 +1020,22 @@ ) ) (loop $label - (local.set $2 - (v128.load offset=16 - (local.get $4) - ) - ) - (local.set $4 - (i32.add - (local.get $4) - (i32.const 16) - ) - ) (br_if $label (i32.eqz (v128.any_true (local.tee $2 (v128.or (i8x16.eq - (local.get $2) + (local.tee $2 + (v128.load + (local.tee $4 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq @@ -1087,7 +1071,7 @@ (local.tee $2 (v128.or (i8x16.eq - (local.tee $2 + (local.tee $3 (v128.load (local.tee $4 (i32.and @@ -1100,7 +1084,7 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $2) + (local.get $3) (local.tee $3 (i8x16.splat (local.get $1) @@ -1130,24 +1114,22 @@ ) ) (loop $label - (local.set $2 - (v128.load offset=16 - (local.get $4) - ) - ) - (local.set $4 - (i32.add - (local.get $4) - (i32.const 16) - ) - ) (br_if $label (i32.eqz (v128.any_true (local.tee $2 (v128.or (i8x16.eq - (local.get $2) + (local.tee $2 + (v128.load + (local.tee $4 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq @@ -1203,15 +1185,15 @@ (local $2 v128) (local $3 v128) (local $4 v128) - (local $5 i32) + (local $5 v128) (local $6 i32) (local $7 i32) (local $8 i32) (local $9 i32) - (local $scratch v128) + (local $10 i32) (if (i32.eqz - (local.tee $6 + (local.tee $7 (i32.load8_u (local.get $1) ) @@ -1223,326 +1205,295 @@ ) ) ) - (local.set $5 + (local.set $6 (i32.and (local.get $0) (i32.const -16) ) ) - (local.set $8 + (local.set $9 (i32.and (local.get $0) (i32.const 15) ) ) - (block $block3 - (block $block1 - (block $block - (if - (i32.load8_u offset=1 + (if + (i32.load8_u offset=1 + (local.get $1) + ) + (then + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (loop $label + (v128.store + (i32.const 4064) + (local.get $4) + ) + (i32.store8 + (local.tee $10 + (i32.or + (local.tee $8 + (i32.and + (local.get $7) + (i32.const 15) + ) + ) + (i32.const 4064) + ) + ) + (i32.or + (i32.load8_u + (local.get $10) + ) + (i32.shl + (i32.const 1) + (local.tee $7 + (i32.shr_u + (local.get $7) + (i32.const 4) + ) + ) + ) + ) + ) + (v128.store + (i32.const 4080) + (local.get $3) + ) + (i32.store8 + (local.tee $8 + (i32.or + (local.get $8) + (i32.const 4080) + ) + ) + (i32.or + (i32.load8_u + (local.get $8) + ) + (i32.shl + (i32.const 1) + (i32.sub + (local.get $7) + (i32.const 8) + ) + ) + ) + ) + (local.set $7 + (i32.load8_u (local.get $1) ) - (then - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (loop $label - (v128.store - (i32.const 4064) - (local.get $4) - ) - (i32.store8 - (local.tee $9 - (i32.or - (local.tee $7 - (i32.and - (local.get $6) - (i32.const 15) - ) - ) - (i32.const 4064) - ) - ) - (i32.or - (i32.load8_u - (local.get $9) - ) - (i32.shl - (i32.const 1) - (local.tee $6 - (i32.shr_u - (local.get $6) - (i32.const 4) - ) - ) - ) - ) - ) - (v128.store - (i32.const 4080) - (local.get $3) - ) - (i32.store8 - (local.tee $7 - (i32.or - (local.get $7) - (i32.const 4080) - ) - ) - (i32.or - (i32.load8_u - (local.get $7) - ) - (i32.shl - (i32.const 1) - (i32.sub - (local.get $6) - (i32.const 8) - ) - ) - ) - ) - (local.set $6 - (i32.load8_u - (local.get $1) - ) - ) - (local.set $4 - (v128.load - (i32.const 4064) - ) - ) - (local.set $3 - (v128.load - (i32.const 4080) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 1) - ) - ) - (br_if $label - (local.get $6) - ) - ) - (br_if $block - (i32.eqz - (i8x16.all_true - (local.tee $2 - (i8x16.eq - (v128.and - (v128.or - (i8x16.swizzle - (local.get $4) + ) + (local.set $4 + (v128.load + (i32.const 4064) + ) + ) + (local.set $3 + (v128.load + (i32.const 4080) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label + (local.get $7) + ) + ) + (block $block + (if + (i32.eqz + (i8x16.all_true + (local.tee $2 + (i8x16.eq + (v128.and + (v128.or + (i8x16.swizzle + (local.get $3) + (v128.xor + (local.tee $5 (v128.and (local.tee $2 (v128.load - (local.get $5) + (local.get $6) ) ) (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) ) ) - (i8x16.swizzle - (local.get $3) - (v128.and - (local.get $2) - (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) - ) - ) - ) - (local.tee $2 - (i8x16.swizzle - (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) - (i8x16.shr_u - (local.get $2) - (i32.const 4) - ) - ) + (v128.const i32x4 0x80808080 0x80808080 0x80808080 0x80808080) ) ) + (i8x16.swizzle + (local.get $4) + (local.get $5) + ) + ) + (local.tee $2 + (i8x16.swizzle + (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) + (i8x16.shr_u + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + (local.get $2) + ) + ) + ) + ) + (then + (br_if $block + (local.tee $1 + (i32.and + (i32.xor + (i8x16.bitmask (local.get $2) ) + (i32.const 65535) + ) + (i32.shl + (i32.const -1) + (local.get $9) ) ) ) ) - (br $block1) ) ) - (block $block2 - (if - (i32.eqz - (i8x16.all_true - (local.tee $3 - (i8x16.eq - (v128.load - (local.get $5) - ) - (local.tee $4 - (i8x16.splat - (local.get $6) - ) - ) - ) - ) - ) - ) - (then - (br_if $block2 - (local.tee $1 - (i32.and - (i32.xor - (i8x16.bitmask + (loop $label1 + (br_if $label1 + (i8x16.all_true + (local.tee $2 + (i8x16.eq + (v128.and + (v128.or + (i8x16.swizzle (local.get $3) - ) - (i32.const 65535) - ) - (i32.shl - (i32.const -1) - (local.get $8) - ) - ) - ) - ) - ) - ) - (local.set $1 - (i32.add - (local.get $5) - (i32.const 16) - ) - ) - (loop $label1 - (local.set $5 - (i32.add - (local.get $5) - (i32.const 16) - ) - ) - (br_if $label1 - (i8x16.all_true - (local.tee $3 - (i8x16.eq - (block (result v128) - (local.set $scratch - (v128.load - (local.get $1) + (v128.xor + (local.tee $5 + (v128.and + (local.tee $2 + (v128.load + (local.tee $6 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) + ) + ) + (v128.const i32x4 0x80808080 0x80808080 0x80808080 0x80808080) ) ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) + (i8x16.swizzle + (local.get $4) + (local.get $5) + ) + ) + (local.tee $2 + (i8x16.swizzle + (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) + (i8x16.shr_u + (local.get $2) + (i32.const 4) ) ) - (local.get $scratch) ) - (local.get $4) ) + (local.get $2) ) ) ) ) - (local.set $1 - (i32.xor - (i8x16.bitmask - (local.get $3) - ) - (i32.const 65535) - ) - ) ) - (return - (i32.add - (i32.ctz - (local.get $1) - ) - (i32.sub - (local.get $5) - (local.get $0) + (local.set $1 + (i32.xor + (i8x16.bitmask + (local.get $2) ) + (i32.const 65535) ) ) ) - (br_if $block3 - (local.tee $1 - (i32.and - (i32.xor - (i8x16.bitmask - (local.get $2) - ) - (i32.const 65535) - ) - (i32.shl - (i32.const -1) - (local.get $8) - ) + (return + (i32.add + (i32.ctz + (local.get $1) + ) + (i32.sub + (local.get $6) + (local.get $0) ) ) ) ) - (local.set $1 - (i32.add - (local.get $5) - (i32.const 16) + ) + (block $block1 + (if + (i32.eqz + (i8x16.all_true + (local.tee $3 + (i8x16.eq + (v128.load + (local.get $6) + ) + (local.tee $4 + (i8x16.splat + (local.get $7) + ) + ) + ) + ) + ) + ) + (then + (br_if $block1 + (local.tee $1 + (i32.and + (i32.xor + (i8x16.bitmask + (local.get $3) + ) + (i32.const 65535) + ) + (i32.shl + (i32.const -1) + (local.get $9) + ) + ) + ) + ) ) ) (loop $label2 - (local.set $2 - (v128.load - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.set $5 - (i32.add - (local.get $5) - (i32.const 16) - ) - ) (br_if $label2 (i8x16.all_true - (local.tee $2 + (local.tee $3 (i8x16.eq - (v128.and - (v128.or - (i8x16.swizzle - (local.get $4) - (v128.and - (local.get $2) - (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) - ) - ) - (i8x16.swizzle - (local.get $3) - (v128.and - (local.get $2) - (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) - ) - ) - ) - (local.tee $2 - (i8x16.swizzle - (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) - (i8x16.shr_u - (local.get $2) - (i32.const 4) - ) + (v128.load + (local.tee $6 + (i32.add + (local.get $6) + (i32.const 16) ) ) ) - (local.get $2) + (local.get $4) ) ) ) @@ -1551,7 +1502,7 @@ (local.set $1 (i32.xor (i8x16.bitmask - (local.get $2) + (local.get $3) ) (i32.const 65535) ) @@ -1562,22 +1513,23 @@ (local.get $1) ) (i32.sub - (local.get $5) + (local.get $6) (local.get $0) ) ) ) (func $strcspn (param $0 i32) (param $1 i32) (result i32) - (local $2 i32) - (local $3 i32) - (local $4 i32) - (local $5 i32) - (local $6 v128) - (local $7 v128) - (local $8 v128) + (local $2 v128) + (local $3 v128) + (local $4 v128) + (local $5 v128) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) (block $block (if - (local.tee $3 + (local.tee $7 (i32.load8_u (local.get $1) ) @@ -1593,12 +1545,12 @@ (block $block1 (if (v128.any_true - (local.tee $7 + (local.tee $3 (v128.or (i8x16.eq - (local.tee $7 + (local.tee $4 (v128.load - (local.tee $2 + (local.tee $6 (i32.and (local.get $0) (i32.const -16) @@ -1609,10 +1561,10 @@ (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $7) - (local.tee $8 + (local.get $4) + (local.tee $4 (i8x16.splat - (local.get $3) + (local.get $7) ) ) ) @@ -1624,7 +1576,7 @@ (local.tee $1 (i32.and (i8x16.bitmask - (local.get $7) + (local.get $3) ) (i32.shl (i32.const -1) @@ -1639,29 +1591,27 @@ ) ) (loop $label - (local.set $7 - (v128.load offset=16 - (local.get $2) - ) - ) - (local.set $2 - (i32.add - (local.get $2) - (i32.const 16) - ) - ) (br_if $label (i32.eqz (v128.any_true - (local.tee $7 + (local.tee $3 (v128.or (i8x16.eq - (local.get $7) + (local.tee $3 + (v128.load + (local.tee $6 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) + ) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (i8x16.eq - (local.get $7) - (local.get $8) + (local.get $3) + (local.get $4) ) ) ) @@ -1671,38 +1621,38 @@ ) (local.set $1 (i8x16.bitmask - (local.get $7) + (local.get $3) ) ) ) (return - (i32.sub - (i32.add - (local.get $2) - (i32.ctz - (local.get $1) - ) + (i32.add + (i32.ctz + (local.get $1) + ) + (i32.sub + (local.get $6) + (local.get $0) ) - (local.get $0) ) ) ) - (local.set $3 + (local.set $8 (i32.and (local.get $0) - (i32.const -16) + (i32.const 15) ) ) (loop $label1 (v128.store (i32.const 4080) - (local.get $7) + (local.get $4) ) (i32.store8 (i32.or - (local.tee $4 + (local.tee $7 (i32.and - (local.tee $2 + (local.tee $6 (i32.load8_u (local.get $1) ) @@ -1715,16 +1665,16 @@ (i32.or (i32.load8_u (i32.or - (local.get $4) + (local.get $7) (i32.const 4080) ) ) (i32.shl (i32.const 1) (i32.sub - (local.tee $5 + (local.tee $9 (i32.shr_u - (local.get $2) + (local.get $6) (i32.const 4) ) ) @@ -1735,22 +1685,22 @@ ) (v128.store (i32.const 4064) - (local.get $8) + (local.get $3) ) (i32.store8 - (local.tee $4 + (local.tee $7 (i32.or - (local.get $4) + (local.get $7) (i32.const 4064) ) ) (i32.or (i32.load8_u - (local.get $4) + (local.get $7) ) (i32.shl (i32.const 1) - (local.get $5) + (local.get $9) ) ) ) @@ -1760,154 +1710,146 @@ (i32.const 1) ) ) - (local.set $7 + (local.set $4 (v128.load (i32.const 4080) ) ) - (local.set $8 + (local.set $3 (v128.load (i32.const 4064) ) ) (br_if $label1 - (local.get $2) + (local.get $6) ) ) (block $block2 (if (v128.any_true - (local.tee $6 + (local.tee $2 (i8x16.eq (v128.and (v128.or (i8x16.swizzle - (local.get $8) - (v128.and - (local.tee $6 - (v128.load - (local.get $3) + (local.get $4) + (v128.xor + (local.tee $5 + (v128.and + (local.tee $2 + (v128.load + (local.tee $6 + (i32.and + (local.get $0) + (i32.const -16) + ) + ) + ) + ) + (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) ) ) - (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) + (v128.const i32x4 0x80808080 0x80808080 0x80808080 0x80808080) ) ) (i8x16.swizzle - (local.get $7) - (v128.and - (local.get $6) - (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) - ) + (local.get $3) + (local.get $5) ) ) - (local.tee $6 + (local.tee $2 (i8x16.swizzle (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) (i8x16.shr_u - (local.get $6) + (local.get $2) (i32.const 4) ) ) ) ) - (local.get $6) + (local.get $2) ) ) ) (then (br_if $block2 - (local.tee $2 + (local.tee $1 (i32.and (i8x16.bitmask - (local.get $6) + (local.get $2) ) (i32.shl (i32.const -1) - (i32.and - (local.get $0) - (i32.const 15) - ) + (local.get $8) ) ) ) ) ) ) - (local.set $2 - (i32.add - (local.get $3) - (i32.const 16) - ) - ) (loop $label2 - (local.set $6 - (v128.load - (local.get $2) - ) - ) - (local.set $2 - (i32.add - (local.get $2) - (i32.const 16) - ) - ) - (local.set $3 - (i32.add - (local.get $3) - (i32.const 16) - ) - ) (br_if $label2 (i32.eqz (v128.any_true - (local.tee $6 + (local.tee $2 (i8x16.eq (v128.and (v128.or (i8x16.swizzle - (local.get $8) - (v128.and - (local.get $6) - (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) + (local.get $4) + (v128.xor + (local.tee $5 + (v128.and + (local.tee $2 + (v128.load + (local.tee $6 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (v128.const i32x4 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f 0x8f8f8f8f) + ) + ) + (v128.const i32x4 0x80808080 0x80808080 0x80808080 0x80808080) ) ) (i8x16.swizzle - (local.get $7) - (v128.and - (local.get $6) - (v128.const i32x4 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f 0x0f0f0f0f) - ) + (local.get $3) + (local.get $5) ) ) - (local.tee $6 + (local.tee $2 (i8x16.swizzle (v128.const i32x4 0x08040201 0x80402010 0x08040201 0x80402010) (i8x16.shr_u - (local.get $6) + (local.get $2) (i32.const 4) ) ) ) ) - (local.get $6) + (local.get $2) ) ) ) ) ) ) - (local.set $2 + (local.set $1 (i8x16.bitmask - (local.get $6) + (local.get $2) ) ) ) (i32.add (i32.ctz - (local.get $2) + (local.get $1) ) (i32.sub - (local.get $3) + (local.get $6) (local.get $0) ) ) diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index 4a1bb5b..1232cc0 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -408,7 +408,7 @@ func Test_strspn(t *testing.T) { fill(memory[ptr:ptr+max(pos, length)], 5) memory[ptr+pos] = 7 memory[ptr+length] = 0 - memory[128] = 3 + memory[128] = 7 | 128 memory[129] = 5 got := call(strspn, uint64(ptr), 129) @@ -434,7 +434,7 @@ func Test_strspn(t *testing.T) { clear(memory) fill(memory[ptr:ptr+length], 5) memory[len(memory)-1] = 7 - memory[128] = 3 + memory[128] = 7 | 128 memory[129] = 5 got := call(strspn, uint64(ptr), 129) @@ -462,7 +462,7 @@ func Test_strcspn(t *testing.T) { fill(memory[ptr:ptr+max(pos, length)], 5) memory[ptr+pos] = 7 memory[ptr+length] = 0 - memory[128] = 3 + memory[128] = 5 | 128 memory[129] = 7 got := call(strcspn, uint64(ptr), 129) @@ -488,7 +488,7 @@ func Test_strcspn(t *testing.T) { clear(memory) fill(memory[ptr:ptr+length], 5) memory[len(memory)-1] = 7 - memory[128] = 3 + memory[128] = 5 | 128 memory[129] = 7 got := call(strcspn, uint64(ptr), 129) @@ -761,8 +761,10 @@ func Fuzz_strspn(f *testing.F) { } if uint32(got) != uint32(want) { - t.Errorf("strspn(%q, %q) = %d, want %d", - s, chars, uint32(got), uint32(want)) + t.Errorf("strspn(%v, %v) = %d, want %d", + []byte(memory[ptr1:ptr1+len(s)]), + []byte(memory[ptr2:ptr2+len(chars)]), + uint32(got), uint32(want)) } }) } diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 91a395e..b48beeb 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -3,9 +3,7 @@ #ifndef _WASM_SIMD128_STRING_H #define _WASM_SIMD128_STRING_H -#include #include -#include #include #include <__macro_PAGESIZE.h> @@ -90,15 +88,14 @@ void *memchr(const void *s, int c, size_t n) { // memchr must behave as if it reads characters sequentially // and stops as soon as a match is found. - // Aligning ensures loads beyond the first match are safe. - // Casting through uintptr_t makes this implementation-defined, - // rather than undefined behavior. + // Aligning ensures out of bounds loads are safe. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const v128_t *v = (v128_t *)((uintptr_t)s - align); - const v128_t vc = wasm_i8x16_splat(c); + uintptr_t addr = (uintptr_t)s - align; + v128_t vc = wasm_i8x16_splat(c); for (;;) { - const v128_t cmp = wasm_i8x16_eq(*v, vc); + v128_t v = *(v128_t *)addr; + v128_t cmp = wasm_i8x16_eq(v, vc); // Bitmask is slow on AArch64, any_true is much faster. if (wasm_v128_any_true(cmp)) { // Clear the bits corresponding to align (little-endian) @@ -114,7 +111,7 @@ void *memchr(const void *s, int c, size_t n) { // That's a match, unless it is beyond the end of the object. // Recall that we decremented n, so less-than-or-equal-to is correct. size_t ctz = __builtin_ctz(mask); - return ctz - align <= n ? (char *)v + ctz : NULL; + return ctz - align <= n ? (char *)s + (addr - (uintptr_t)s + ctz) : NULL; } } // Decrement n; if it overflows we're done. @@ -122,7 +119,7 @@ void *memchr(const void *s, int c, size_t n) { return NULL; } align = 0; - v++; + addr += sizeof(v128_t); } } @@ -155,16 +152,15 @@ void *memrchr(const void *s, int c, size_t n) { __attribute__((weak)) size_t strlen(const char *s) { // strlen must stop as soon as it finds the terminator. - // Aligning ensures loads beyond the terminator are safe. - // Casting through uintptr_t makes this implementation-defined, - // rather than undefined behavior. + // Aligning ensures out of bounds loads are safe. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const v128_t *v = (v128_t *)((uintptr_t)s - align); + uintptr_t addr = (uintptr_t)s - align; for (;;) { + v128_t v = *(v128_t *)addr; // Bitmask is slow on AArch64, all_true is much faster. - if (!wasm_i8x16_all_true(*v)) { - const v128_t cmp = wasm_i8x16_eq(*v, (v128_t){}); + if (!wasm_i8x16_all_true(v)) { + const v128_t cmp = wasm_i8x16_eq(v, (v128_t){}); // Clear the bits corresponding to align (little-endian) // so we can count trailing zeros. int mask = wasm_i8x16_bitmask(cmp) >> align << align; @@ -175,25 +171,24 @@ size_t strlen(const char *s) { // it's as if we didn't find anything. if (mask) { // Find the offset of the first one bit (little-endian). - return (char *)v - s + __builtin_ctz(mask); + return addr - (uintptr_t)s + __builtin_ctz(mask); } } align = 0; - v++; + addr += sizeof(v128_t); } } static char *__strchrnul(const char *s, int c) { - // strchrnul must stop as soon as it finds the terminator. - // Aligning ensures loads beyond the terminator are safe. - // Casting through uintptr_t makes this implementation-defined, - // rather than undefined behavior. + // strchrnul must stop as soon as a match is found. + // Aligning ensures out of bounds loads are safe. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const v128_t *v = (v128_t *)((uintptr_t)s - align); - const v128_t vc = wasm_i8x16_splat(c); + uintptr_t addr = (uintptr_t)s - align; + v128_t vc = wasm_i8x16_splat(c); for (;;) { - const v128_t cmp = wasm_i8x16_eq(*v, (v128_t){}) | wasm_i8x16_eq(*v, vc); + v128_t v = *(v128_t *)addr; + const v128_t cmp = wasm_i8x16_eq(v, (v128_t){}) | wasm_i8x16_eq(v, vc); // Bitmask is slow on AArch64, any_true is much faster. if (wasm_v128_any_true(cmp)) { // Clear the bits corresponding to align (little-endian) @@ -206,11 +201,11 @@ static char *__strchrnul(const char *s, int c) { // it's as if we didn't find anything. if (mask) { // Find the offset of the first one bit (little-endian). - return (char *)v + __builtin_ctz(mask); + return (char *)s + (addr - (uintptr_t)s + __builtin_ctz(mask)); } } align = 0; - v++; + addr += sizeof(v128_t); } } @@ -269,19 +264,19 @@ static void __wasm_v128_setbit(__wasm_v128_bitmap256_t *bitmap, int i) { __attribute__((always_inline)) static v128_t __wasm_v128_chkbits(__wasm_v128_bitmap256_t bitmap, v128_t v) { + v128_t hi_nibbles = wasm_u8x16_shr(v, 4); + v128_t bitmask_lookup = wasm_u8x16_const(1, 2, 4, 8, 16, 32, 64, 128, // + 1, 2, 4, 8, 16, 32, 64, 128); + v128_t bitmask = wasm_i8x16_relaxed_swizzle(bitmask_lookup, hi_nibbles); + v128_t indices_0_7 = v & wasm_u8x16_const_splat(0x8f); - v128_t indices_8_15 = (v & wasm_u8x16_const_splat(0x80)) ^ indices_0_7; + v128_t indices_8_15 = indices_0_7 ^ wasm_u8x16_const_splat(0x80); v128_t row_0_7 = wasm_i8x16_swizzle(bitmap.l, indices_0_7); v128_t row_8_15 = wasm_i8x16_swizzle(bitmap.h, indices_8_15); v128_t bitsets = row_0_7 | row_8_15; - v128_t hi_nibbles = wasm_u8x16_shr(v, 4); - v128_t bitmask_lookup = wasm_u8x16_const(1, 2, 4, 8, 16, 32, 64, 128, // - 1, 2, 4, 8, 16, 32, 64, 128); - v128_t bitmask = wasm_i8x16_relaxed_swizzle(bitmask_lookup, hi_nibbles); - return wasm_i8x16_eq(bitsets & bitmask, bitmask); } @@ -290,17 +285,16 @@ static v128_t __wasm_v128_chkbits(__wasm_v128_bitmap256_t bitmap, v128_t v) { __attribute__((weak)) size_t strspn(const char *s, const char *c) { // strspn must stop as soon as it finds the terminator. - // Aligning ensures loads beyond the terminator are safe. - // Casting through uintptr_t makes this implementation-defined, - // rather than undefined behavior. + // Aligning ensures out of bounds loads are safe. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const v128_t *v = (v128_t *)((uintptr_t)s - align); + uintptr_t addr = (uintptr_t)s - align; if (!c[0]) return 0; if (!c[1]) { - const v128_t vc = wasm_i8x16_splat(*c); + v128_t vc = wasm_i8x16_splat(*c); for (;;) { - const v128_t cmp = wasm_i8x16_eq(*v, vc); + v128_t v = *(v128_t *)addr; + v128_t cmp = wasm_i8x16_eq(v, vc); // Bitmask is slow on AArch64, all_true is much faster. if (!wasm_i8x16_all_true(cmp)) { // Clear the bits corresponding to align (little-endian) @@ -313,11 +307,11 @@ size_t strspn(const char *s, const char *c) { // it's as if we didn't find anything. if (mask) { // Find the offset of the first one bit (little-endian). - return (char *)v - s + __builtin_ctz(mask); + return addr - (uintptr_t)s + __builtin_ctz(mask); } } align = 0; - v++; + addr += sizeof(v128_t); } } @@ -329,7 +323,8 @@ size_t strspn(const char *s, const char *c) { } for (;;) { - const v128_t cmp = __wasm_v128_chkbits(bitmap, *v); + v128_t v = *(v128_t *)addr; + v128_t cmp = __wasm_v128_chkbits(bitmap, v); // Bitmask is slow on AArch64, all_true is much faster. if (!wasm_i8x16_all_true(cmp)) { // Clear the bits corresponding to align (little-endian) @@ -342,11 +337,11 @@ size_t strspn(const char *s, const char *c) { // it's as if we didn't find anything. if (mask) { // Find the offset of the first one bit (little-endian). - return (char *)v - s + __builtin_ctz(mask); + return addr - (uintptr_t)s + __builtin_ctz(mask); } } align = 0; - v++; + addr += sizeof(v128_t); } } @@ -355,11 +350,9 @@ size_t strcspn(const char *s, const char *c) { if (!c[0] || !c[1]) return __strchrnul(s, *c) - s; // strcspn must stop as soon as it finds the terminator. - // Aligning ensures loads beyond the terminator are safe. - // Casting through uintptr_t makes this implementation-defined, - // rather than undefined behavior. + // Aligning ensures out of bounds loads are safe. uintptr_t align = (uintptr_t)s % sizeof(v128_t); - const v128_t *v = (v128_t *)((uintptr_t)s - align); + uintptr_t addr = (uintptr_t)s - align; __wasm_v128_bitmap256_t bitmap = {}; @@ -369,7 +362,8 @@ size_t strcspn(const char *s, const char *c) { } while (*c++); for (;;) { - const v128_t cmp = __wasm_v128_chkbits(bitmap, *v); + v128_t v = *(v128_t *)addr; + v128_t cmp = __wasm_v128_chkbits(bitmap, v); // Bitmask is slow on AArch64, any_true is much faster. if (wasm_v128_any_true(cmp)) { // Clear the bits corresponding to align (little-endian) @@ -382,11 +376,11 @@ size_t strcspn(const char *s, const char *c) { // it's as if we didn't find anything. if (mask) { // Find the offset of the first one bit (little-endian). - return (char *)v - s + __builtin_ctz(mask); + return addr - (uintptr_t)s + __builtin_ctz(mask); } } align = 0; - v++; + addr += sizeof(v128_t); } } diff --git a/value.go b/value.go index 6753027..c89d536 100644 --- a/value.go +++ b/value.go @@ -57,7 +57,7 @@ func (v Value) Type() Datatype { return Datatype(v.c.call("sqlite3_value_type", v.protected())) } -// Type returns the numeric datatype of the value. +// NumericType returns the numeric datatype of the value. // // https://sqlite.org/c3ref/value_blob.html func (v Value) NumericType() Datatype {