diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index e28dd43..89144dc 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -11,8 +11,9 @@ SRCS="${1:-libc.c}" trap 'rm -f libc.c libc.tmp' EXIT cat << EOF > libc.c -#include #include +#include +#include EOF "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ @@ -40,11 +41,13 @@ EOF -Wl,--export=strchr \ -Wl,--export=strchrnul \ -Wl,--export=strcmp \ + -Wl,--export=strcasecmp \ -Wl,--export=strcpy \ -Wl,--export=strcspn \ -Wl,--export=strlen \ -Wl,--export=strncat \ -Wl,--export=strncmp \ + -Wl,--export=strncasecmp \ -Wl,--export=strncpy \ -Wl,--export=strrchr \ -Wl,--export=strspn \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 3e5583b..db4fc8e 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 7eed352..536c6a4 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -2,13 +2,14 @@ (type $0 (func (param i32 i32) (result i32))) (type $1 (func (param i32 i32 i32) (result i32))) (type $2 (func (param i32 i32 i32 i32) (result i32))) - (type $3 (func (param i32) (result i32))) - (type $4 (func (param i32 i32 i32 i32 i32) (result i32))) - (type $5 (func (param i32 i32 i32 i32))) + (type $3 (func (param i32 i32 i32 i32))) + (type $4 (func (param i32) (result i32))) + (type $5 (func (param i32 i32 i32 i32 i32) (result i32))) (memory $0 256) (data $0 (i32.const 4096) "\01") (table $0 1 1 funcref) (export "memory" (memory $0)) + (export "qsort" (func $qsort)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) (export "memmove" (func $memcpy)) @@ -31,7 +32,454 @@ (export "strcpy" (func $strcpy)) (export "stpncpy" (func $stpncpy)) (export "strncpy" (func $strncpy)) - (export "qsort" (func $qsort)) + (export "strcasecmp" (func $strcasecmp)) + (export "strncasecmp" (func $strncasecmp)) + (func $qsort (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 v128) + (local $scratch i32) + (if + (i32.ge_u + (local.get $1) + (i32.const 2) + ) + (then + (local.set $14 + (i32.mul + (local.get $1) + (local.get $2) + ) + ) + (local.set $15 + (i32.and + (local.get $2) + (i32.const 15) + ) + ) + (local.set $9 + (i32.and + (local.get $2) + (i32.const -16) + ) + ) + (local.set $16 + (i32.add + (local.get $0) + (local.get $2) + ) + ) + (local.set $17 + (i32.lt_u + (local.get $2) + (i32.const 16) + ) + ) + (loop $label5 + (local.set $6 + (i32.eq + (local.get $1) + (i32.const 2) + ) + ) + (local.set $18 + (i32.le_u + (i32.add + (local.get $0) + (i32.mul + (i32.add + (local.tee $13 + (select + (i32.const 1) + (local.tee $1 + (i32.wrap_i64 + (i64.div_u + (i64.sub + (i64.mul + (i64.extend_i32_u + (local.get $1) + ) + (i64.const 5) + ) + (i64.const 1) + ) + (i64.const 11) + ) + ) + ) + (local.get $6) + ) + ) + (i32.const 1) + ) + (local.get $2) + ) + ) + (local.get $0) + ) + ) + (local.set $11 + (local.tee $10 + (i32.mul + (local.get $2) + (local.get $13) + ) + ) + ) + (loop $label4 + (block $block + (br_if $block + (i32.gt_u + (local.tee $5 + (i32.sub + (local.get $11) + (local.get $10) + ) + ) + (local.get $11) + ) + ) + (loop $label3 + (br_if $block + (i32.le_s + (call_indirect $0 (type $0) + (local.tee $4 + (i32.add + (local.get $0) + (local.tee $12 + (local.get $5) + ) + ) + ) + (local.tee $5 + (i32.add + (local.get $4) + (local.get $10) + ) + ) + (local.get $3) + ) + (i32.const 0) + ) + ) + (block $block2 + (block $block3 + (block $block1 + (br_if $block1 + (local.get $17) + ) + (br_if $block1 + (i32.and + (i32.eqz + (local.get $18) + ) + (i32.lt_u + (local.get $5) + (i32.add + (local.get $12) + (local.get $16) + ) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (local.get $9) + ) + ) + (local.set $7 + (i32.add + (local.get $4) + (local.get $9) + ) + ) + (local.set $6 + (local.get $9) + ) + (loop $label + (local.set $20 + (v128.load align=1 + (local.get $4) + ) + ) + (v128.store align=1 + (local.get $4) + (v128.load align=1 + (local.tee $8 + (i32.add + (local.get $4) + (local.get $10) + ) + ) + ) + ) + (v128.store align=1 + (local.get $8) + (local.get $20) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (br_if $label + (local.tee $6 + (i32.sub + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (local.set $6 + (local.get $15) + ) + (br_if $block2 + (i32.eq + (local.get $2) + (local.get $9) + ) + ) + (br $block3) + ) + (local.set $7 + (local.get $4) + ) + (local.set $6 + (local.get $2) + ) + ) + (br_if $block2 + (i32.lt_u + (block (result i32) + (local.set $scratch + (i32.sub + (local.get $6) + (i32.const 1) + ) + ) + (if + (local.tee $4 + (i32.and + (local.get $6) + (i32.const 3) + ) + ) + (then + (local.set $6 + (i32.and + (local.get $6) + (i32.const -4) + ) + ) + (loop $label1 + (local.set $19 + (i32.load8_u + (local.get $7) + ) + ) + (i32.store8 + (local.get $7) + (i32.load8_u + (local.get $5) + ) + ) + (i32.store8 + (local.get $5) + (local.get $19) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (br_if $label1 + (local.tee $4 + (i32.sub + (local.get $4) + (i32.const 1) + ) + ) + ) + ) + ) + ) + (local.get $scratch) + ) + (i32.const 3) + ) + ) + (loop $label2 + (local.set $4 + (i32.load8_u + (local.get $7) + ) + ) + (i32.store8 + (local.get $7) + (i32.load8_u + (local.get $5) + ) + ) + (i32.store8 + (local.get $5) + (local.get $4) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $8 + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $7) + (i32.const 3) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (i32.load8_u + (local.tee $4 + (i32.add + (local.get $5) + (i32.const 3) + ) + ) + ) + ) + (i32.store8 + (local.get $4) + (local.get $8) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (br_if $label2 + (local.tee $6 + (i32.sub + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + ) + (br_if $label3 + (i32.le_u + (local.tee $5 + (i32.sub + (local.get $12) + (local.get $10) + ) + ) + (local.get $12) + ) + ) + ) + ) + (br_if $label4 + (i32.lt_u + (local.tee $11 + (i32.add + (local.get $2) + (local.get $11) + ) + ) + (local.get $14) + ) + ) + ) + (br_if $label5 + (i32.ge_u + (local.get $13) + (i32.const 2) + ) + ) + ) + ) + ) + ) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (memory.fill (local.get $0) @@ -3121,451 +3569,461 @@ ) (local.get $0) ) - (func $qsort (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) - (local $4 i32) - (local $5 i32) - (local $6 i32) - (local $7 i32) - (local $8 i32) - (local $9 i32) - (local $10 i32) - (local $11 i32) - (local $12 i32) - (local $13 i32) - (local $14 i32) - (local $15 i32) - (local $16 i32) - (local $17 i32) - (local $18 i32) - (local $19 i32) - (local $20 v128) - (local $scratch i32) - (if - (i32.ge_u - (local.get $1) - (i32.const 2) + (func $strcasecmp (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 v128) + (block $block + (br_if $block + (i32.lt_u + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (select + (local.get $0) + (local.get $1) + (i32.gt_u + (local.get $0) + (local.get $1) + ) + ) + ) + ) + (i32.const 16) + ) ) - (then - (local.set $14 - (i32.mul - (local.get $1) - (local.get $2) + (loop $label + (br_if $block + (v128.any_true + (v128.xor + (v128.or + (local.tee $4 + (v128.load align=1 + (local.get $1) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i32x4.add + (local.get $4) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + (local.tee $4 + (v128.or + (local.tee $4 + (v128.load align=1 + (local.get $0) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i32x4.add + (local.get $4) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + ) + ) ) ) - (local.set $15 - (i32.and - (local.get $2) - (i32.const 15) + (if + (i32.eqz + (i8x16.all_true + (local.get $4) + ) + ) + (then + (return + (i32.const 0) + ) ) ) - (local.set $9 - (i32.and - (local.get $2) - (i32.const -16) - ) - ) - (local.set $16 + (local.set $1 (i32.add - (local.get $0) - (local.get $2) - ) - ) - (local.set $17 - (i32.lt_u - (local.get $2) + (local.get $1) (i32.const 16) ) ) - (loop $label5 - (local.set $6 - (i32.eq - (local.get $1) - (i32.const 2) - ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 16) ) - (local.set $18 - (i32.le_u - (i32.add - (local.get $0) - (i32.mul - (i32.add - (local.tee $13 - (select - (i32.const 1) - (local.tee $1 - (i32.wrap_i64 - (i64.div_u - (i64.sub - (i64.mul - (i64.extend_i32_u - (local.get $1) - ) - (i64.const 5) - ) - (i64.const 1) - ) - (i64.const 11) - ) - ) - ) - (local.get $6) - ) - ) - (i32.const 1) - ) - (local.get $2) + ) + (br_if $label + (i32.gt_u + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 16) + ) + ) + (i32.const 15) + ) + ) + ) + ) + (if + (i32.eq + (local.tee $2 + (select + (i32.or + (local.tee $2 + (i32.load8_u + (local.get $0) ) ) + (i32.const 32) + ) + (local.get $2) + (i32.lt_u + (i32.sub + (local.get $2) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (local.tee $3 + (select + (i32.or + (local.tee $3 + (i32.load8_u + (local.get $1) + ) + ) + (i32.const 32) + ) + (local.get $3) + (i32.lt_u + (i32.sub + (local.get $3) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + ) + (then + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (loop $label1 + (if + (i32.eqz + (local.get $2) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (local.set $2 + (i32.load8_u (local.get $0) ) ) - (local.set $11 - (local.tee $10 - (i32.mul - (local.get $2) - (local.get $13) - ) + (local.set $3 + (i32.load8_u + (local.get $1) ) ) - (loop $label4 - (block $block - (br_if $block - (i32.gt_u - (local.tee $5 - (i32.sub - (local.get $11) - (local.get $10) - ) - ) - (local.get $11) - ) - ) - (loop $label3 - (br_if $block - (i32.le_s - (call_indirect $0 (type $0) - (local.tee $4 - (i32.add - (local.get $0) - (local.tee $12 - (local.get $5) - ) - ) - ) - (local.tee $5 - (i32.add - (local.get $4) - (local.get $10) - ) - ) - (local.get $3) - ) - (i32.const 0) - ) - ) - (block $block2 - (block $block3 - (block $block1 - (br_if $block1 - (local.get $17) - ) - (br_if $block1 - (i32.and - (i32.eqz - (local.get $18) - ) - (i32.lt_u - (local.get $5) - (i32.add - (local.get $12) - (local.get $16) - ) - ) - ) - ) - (local.set $5 - (i32.add - (local.get $5) - (local.get $9) - ) - ) - (local.set $7 - (i32.add - (local.get $4) - (local.get $9) - ) - ) - (local.set $6 - (local.get $9) - ) - (loop $label - (local.set $20 - (v128.load align=1 - (local.get $4) - ) - ) - (v128.store align=1 - (local.get $4) - (v128.load align=1 - (local.tee $8 - (i32.add - (local.get $4) - (local.get $10) - ) - ) - ) - ) - (v128.store align=1 - (local.get $8) - (local.get $20) - ) - (local.set $4 - (i32.add - (local.get $4) - (i32.const 16) - ) - ) - (br_if $label - (local.tee $6 - (i32.sub - (local.get $6) - (i32.const 16) - ) - ) - ) - ) - (local.set $6 - (local.get $15) - ) - (br_if $block2 - (i32.eq - (local.get $2) - (local.get $9) - ) - ) - (br $block3) - ) - (local.set $7 - (local.get $4) - ) - (local.set $6 - (local.get $2) - ) - ) - (br_if $block2 - (i32.lt_u - (block (result i32) - (local.set $scratch - (i32.sub - (local.get $6) - (i32.const 1) - ) - ) - (if - (local.tee $4 - (i32.and - (local.get $6) - (i32.const 3) - ) - ) - (then - (local.set $6 - (i32.and - (local.get $6) - (i32.const -4) - ) - ) - (loop $label1 - (local.set $19 - (i32.load8_u - (local.get $7) - ) - ) - (i32.store8 - (local.get $7) - (i32.load8_u - (local.get $5) - ) - ) - (i32.store8 - (local.get $5) - (local.get $19) - ) - (local.set $5 - (i32.add - (local.get $5) - (i32.const 1) - ) - ) - (local.set $7 - (i32.add - (local.get $7) - (i32.const 1) - ) - ) - (br_if $label1 - (local.tee $4 - (i32.sub - (local.get $4) - (i32.const 1) - ) - ) - ) - ) - ) - ) - (local.get $scratch) - ) - (i32.const 3) - ) - ) - (loop $label2 - (local.set $4 - (i32.load8_u - (local.get $7) - ) - ) - (i32.store8 - (local.get $7) - (i32.load8_u - (local.get $5) - ) - ) - (i32.store8 - (local.get $5) - (local.get $4) - ) - (local.set $8 - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $7) - (i32.const 1) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $5) - (i32.const 1) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (local.get $8) - ) - (local.set $8 - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $7) - (i32.const 2) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $5) - (i32.const 2) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (local.get $8) - ) - (local.set $8 - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $7) - (i32.const 3) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (i32.load8_u - (local.tee $4 - (i32.add - (local.get $5) - (i32.const 3) - ) - ) - ) - ) - (i32.store8 - (local.get $4) - (local.get $8) - ) - (local.set $7 - (i32.add - (local.get $7) - (i32.const 4) - ) - ) - (local.set $5 - (i32.add - (local.get $5) - (i32.const 4) - ) - ) - (br_if $label2 - (local.tee $6 - (i32.sub - (local.get $6) - (i32.const 4) - ) - ) - ) - ) - ) - (br_if $label3 - (i32.le_u - (local.tee $5 - (i32.sub - (local.get $12) - (local.get $10) - ) - ) - (local.get $12) - ) - ) - ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) ) - (br_if $label4 - (i32.lt_u - (local.tee $11 - (i32.add + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (i32.eq + (local.tee $2 + (select + (i32.or (local.get $2) - (local.get $11) + (i32.const 32) + ) + (local.get $2) + (i32.lt_u + (i32.sub + (local.get $2) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (local.tee $3 + (select + (i32.or + (local.get $3) + (i32.const 32) + ) + (local.get $3) + (i32.lt_u + (i32.sub + (local.get $3) + (i32.const 65) + ) + (i32.const 26) ) ) - (local.get $14) ) - ) - ) - (br_if $label5 - (i32.ge_u - (local.get $13) - (i32.const 2) ) ) ) ) ) + (i32.sub + (local.get $2) + (local.get $3) + ) + ) + (func $strncasecmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 v128) + (block $block + (if + (i32.ge_u + (local.tee $2 + (select + (local.tee $3 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (select + (local.get $0) + (local.get $1) + (i32.gt_u + (local.get $0) + (local.get $1) + ) + ) + ) + ) + (local.get $2) + (i32.gt_u + (local.get $2) + (local.get $3) + ) + ) + ) + (i32.const 16) + ) + (then + (loop $label + (br_if $block + (v128.any_true + (v128.xor + (v128.or + (local.tee $5 + (v128.load align=1 + (local.get $1) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i32x4.add + (local.get $5) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + (local.tee $5 + (v128.or + (local.tee $5 + (v128.load align=1 + (local.get $0) + ) + ) + (v128.bitselect + (v128.const i32x4 0x20202020 0x20202020 0x20202020 0x20202020) + (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + (i8x16.gt_s + (i32x4.add + (local.get $5) + (v128.const i32x4 0x25252525 0x25252525 0x25252525 0x25252525) + ) + (v128.const i32x4 0x65656565 0x65656565 0x65656565 0x65656565) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i8x16.all_true + (local.get $5) + ) + ) + (then + (return + (i32.const 0) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (br_if $label + (i32.gt_u + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 16) + ) + ) + (i32.const 15) + ) + ) + ) + ) + ) + (br_if $block + (local.get $2) + ) + (return + (i32.const 0) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (loop $label1 + (if + (i32.ne + (local.tee $3 + (select + (i32.or + (local.tee $3 + (i32.load8_u + (local.get $0) + ) + ) + (i32.const 32) + ) + (local.get $3) + (i32.lt_u + (i32.sub + (local.get $3) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + (local.tee $4 + (select + (i32.or + (local.tee $4 + (i32.load8_u + (local.get $1) + ) + ) + (i32.const 32) + ) + (local.get $4) + (i32.lt_u + (i32.sub + (local.get $4) + (i32.const 65) + ) + (i32.const 26) + ) + ) + ) + ) + (then + (return + (i32.sub + (local.get $3) + (local.get $4) + ) + ) + ) + ) + (if + (local.get $3) + (then + (local.set $2 + (i32.sub + (local.tee $3 + (local.get $2) + ) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (local.get $3) + ) + ) + ) + ) + (i32.const 0) ) (func $strnlen (param $0 i32) (param $1 i32) (result i32) (local $2 i32) diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index a046e3b..dca0560 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -22,21 +22,23 @@ const ( ) var ( - memory []byte - module api.Module - memset api.Function - memcpy api.Function - memchr api.Function - memcmp api.Function - strlen api.Function - strchr api.Function - strcmp api.Function - strstr api.Function - strspn api.Function - strrchr api.Function - strncmp api.Function - strcspn api.Function - stack [8]uint64 + memory []byte + module api.Module + memset api.Function + memcpy api.Function + memchr api.Function + memcmp api.Function + strlen api.Function + strchr api.Function + strcmp api.Function + strstr api.Function + strspn api.Function + strrchr api.Function + strncmp api.Function + strcspn api.Function + strcasecmp api.Function + strncasecmp api.Function + stack [8]uint64 ) func call(fn api.Function, arg ...uint64) uint64 { @@ -70,6 +72,8 @@ func TestMain(m *testing.M) { strrchr = mod.ExportedFunction("strrchr") strncmp = mod.ExportedFunction("strncmp") strcspn = mod.ExportedFunction("strcspn") + strcasecmp = mod.ExportedFunction("strcasecmp") + strncasecmp = mod.ExportedFunction("strncasecmp") memory, _ = mod.Memory().Read(0, mod.Memory().Size()) os.Exit(m.Run()) @@ -182,6 +186,32 @@ func Benchmark_strncmp(b *testing.B) { } } +func Benchmark_strcasecmp(b *testing.B) { + clear(memory) + fill(memory[ptr1:ptr1+size-1], 7) + fill(memory[ptr2:ptr2+size/2], 7) + fill(memory[ptr2+size/2:ptr2+size-1], 5) + + b.SetBytes(size/2 + 1) + b.ResetTimer() + for range b.N { + call(strcasecmp, ptr1, ptr2, size) + } +} + +func Benchmark_strncasecmp(b *testing.B) { + clear(memory) + fill(memory[ptr1:ptr1+size-1], 7) + fill(memory[ptr2:ptr2+size/2], 7) + fill(memory[ptr2+size/2:ptr2+size-1], 5) + + b.SetBytes(size/2 + 1) + b.ResetTimer() + for range b.N { + call(strncasecmp, ptr1, ptr2, size-1) + } +} + func Benchmark_strspn(b *testing.B) { clear(memory) fill(memory[ptr1:ptr1+size/2], 7) diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 7a1e857..494d6bb 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -40,7 +40,7 @@ void *memmove(void *dest, const void *src, size_t n) { __attribute__((weak)) int memcmp(const void *v1, const void *v2, size_t n) { - // Baseline algorithm. + // Scalar algorithm. if (n < sizeof(v128_t)) { const unsigned char *u1 = (unsigned char *)v1; const unsigned char *u2 = (unsigned char *)v2; @@ -89,7 +89,7 @@ int memcmp(const void *v1, const void *v2, size_t n) { #else // __OPTIMIZE_SIZE__ static int __memcmpeq(const void *v1, const void *v2, size_t n) { - // Baseline algorithm. + // Scalar algorithm. if (n < sizeof(v128_t)) { const unsigned char *u1 = (unsigned char *)v1; const unsigned char *u2 = (unsigned char *)v2; @@ -184,7 +184,7 @@ void *memrchr(const void *v, int c, size_t n) { } } - // Baseline algorithm. + // Scalar algorithm. const char *a = (char *)w; while (n--) { if (*(--a) == (char)c) return (char *)a; @@ -219,6 +219,19 @@ size_t strlen(const char *s) { } } +static int __strcmp_s(const char *s1, const char *s2) { + // Scalar algorithm. + const unsigned char *u1 = (unsigned char *)s1; + const unsigned char *u2 = (unsigned char *)s2; + for (;;) { + if (*u1 != *u2) return *u1 - *u2; + if (*u1 == 0) break; + u1++; + u2++; + } + return 0; +} + static int __strcmp(const char *s1, const char *s2) { // How many bytes can be read before pointers go out of bounds. size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // @@ -243,28 +256,7 @@ static int __strcmp(const char *s1, const char *s2) { w2++; } - // Baseline algorithm. - const unsigned char *u1 = (unsigned char *)w1; - const unsigned char *u2 = (unsigned char *)w2; - for (;;) { - if (*u1 != *u2) return *u1 - *u2; - if (*u1 == 0) break; - u1++; - u2++; - } - return 0; -} - -static int __strcmp_s(const char *s1, const char *s2) { - const unsigned char *u1 = (unsigned char *)s1; - const unsigned char *u2 = (unsigned char *)s2; - for (;;) { - if (*u1 != *u2) return *u1 - *u2; - if (*u1 == 0) break; - u1++; - u2++; - } - return 0; + return __strcmp_s((char *)w1, (char *)w2); } __attribute__((weak, always_inline)) @@ -302,7 +294,7 @@ int strncmp(const char *s1, const char *s2, size_t n) { w2++; } - // Baseline algorithm. + // Scalar algorithm. const unsigned char *u1 = (unsigned char *)w1; const unsigned char *u2 = (unsigned char *)w2; while (n--) { @@ -446,7 +438,7 @@ size_t strspn(const char *s, const char *c) { w++; } - // Baseline algorithm. + // Scalar algorithm. for (s = (char *)w; *s == *c; s++); return s - a; } @@ -469,7 +461,7 @@ size_t strspn(const char *s, const char *c) { w++; } - // Baseline algorithm. + // Scalar algorithm. for (s = (char *)w; _WASM_SIMD128_CHKBIT(bitmap, *s); s++); return s - a; } @@ -502,7 +494,7 @@ size_t strcspn(const char *s, const char *c) { w++; } - // Baseline algorithm. + // Scalar algorithm. for (s = (char *)w; !_WASM_SIMD128_CHKBIT(bitmap, *s); s++); return s - a; } @@ -573,7 +565,7 @@ static const char *__memmem_raita(const char *haystk, size_t sh, haystk += skip; } - // Baseline algorithm. + // Scalar algorithm. for (size_t j = 0; j <= sh - sn; j++) { for (size_t i = 0;; i++) { if (sn == i) return haystk; diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h index 373a33a..80d82fa 100644 --- a/sqlite3/libc/strings.h +++ b/sqlite3/libc/strings.h @@ -1,7 +1,11 @@ #ifndef _WASM_SIMD128_STRINGS_H #define _WASM_SIMD128_STRINGS_H +#include +#include #include +#include +#include <__macro_PAGESIZE.h> #include_next // the system strings.h @@ -16,6 +20,111 @@ int bcmp(const void *v1, const void *v2, size_t n) { return __memcmpeq(v1, v2, n); } +v128_t __tolower8x16(v128_t v) { + __i8x16 i; + i = v + wasm_i8x16_splat(INT8_MAX - ('Z')); + i = i > wasm_i8x16_splat(INT8_MAX - ('Z' - 'A' + 1)); + i = i & wasm_i8x16_splat('a' - 'A'); + return v | i; +} + +static int __strcasecmp_s(const char *s1, const char *s2) { + // Scalar algorithm. + const unsigned char *u1 = (unsigned char *)s1; + const unsigned char *u2 = (unsigned char *)s2; + for (;;) { + int c1 = tolower(*u1); + int c2 = tolower(*u2); + if (c1 != c2) return c1 - c2; + if (c1 == 0) break; + u1++; + u2++; + } + return 0; +} + +static int __strcasecmp(const char *s1, const char *s2) { + // How many bytes can be read before pointers go out of bounds. + size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // + (size_t)(s1 > s2 ? s1 : s2); + + // Unaligned loads handle the case where the strings + // have mismatching alignments. + const v128_t *w1 = (v128_t *)s1; + const v128_t *w2 = (v128_t *)s2; + for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) { + v128_t v1 = __tolower8x16(wasm_v128_load(w1)); + v128_t v2 = __tolower8x16(wasm_v128_load(w2)); + + // Find any single bit difference. + if (wasm_v128_any_true(v1 ^ v2)) { + // The terminator may come before the difference. + break; + } + // We know all characters are equal. + // If any is a terminator the strings are equal. + if (!wasm_i8x16_all_true(v1)) { + return 0; + } + w1++; + w2++; + } + + return __strcasecmp_s((char *)w1, (char *)w2); +} + +__attribute__((weak)) +int strcasecmp(const char *s1, const char *s2) { + // Skip the vector search when comparing against small literal strings. + if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) { + return __strcasecmp_s(s1, s2); + } + return __strcasecmp(s1, s2); +} + +__attribute__((weak)) +int strncasecmp(const char *s1, const char *s2, size_t n) { + // How many bytes can be read before pointers go out of bounds. + size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // + (size_t)(s1 > s2 ? s1 : s2); + if (n > N) n = N; + + // Unaligned loads handle the case where the strings + // have mismatching alignments. + const v128_t *w1 = (v128_t *)s1; + const v128_t *w2 = (v128_t *)s2; + for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + v128_t v1 = __tolower8x16(wasm_v128_load(w1)); + v128_t v2 = __tolower8x16(wasm_v128_load(w2)); + + // Find any single bit difference. + if (wasm_v128_any_true(v1 ^ v2)) { + // The terminator may come before the difference. + break; + } + // We know all characters are equal. + // If any is a terminator the strings are equal. + if (!wasm_i8x16_all_true(v1)) { + return 0; + } + w1++; + w2++; + } + + // Scalar algorithm. + const unsigned char *u1 = (unsigned char *)w1; + const unsigned char *u2 = (unsigned char *)w2; + while (n--) { + int c1 = tolower(*u1); + int c2 = tolower(*u2); + if (c1 != c2) return c1 - c2; + if (c1 == 0) break; + u1++; + u2++; + } + return 0; +} + #endif // __wasm_simd128__ #ifdef __cplusplus