diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 3211d01..9dcd5be 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -20,6 +20,7 @@ trap 'rm -f libc.tmp' EXIT -Wl,--export=memset \ -Wl,--export=memcpy \ -Wl,--export=memcmp \ + -Wl,--export=strlen \ -Wl,--export=strcmp \ -Wl,--export=strncmp diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index ce4e306..1ed0412 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 42e54ad..bd223c9 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -1,12 +1,14 @@ (module $libc.wasm (type $0 (func (param i32 i32 i32) (result i32))) - (type $1 (func (param i32 i32) (result i32))) + (type $1 (func (param i32) (result i32))) + (type $2 (func (param i32 i32) (result i32))) (memory $0 256) (data $0 (i32.const 1024) "\01") (export "memory" (memory $0)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) (export "memcmp" (func $memcmp)) + (export "strlen" (func $strlen)) (export "strcmp" (func $strcmp)) (export "strncmp" (func $strncmp)) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) @@ -128,127 +130,173 @@ ) (i32.const 0) ) - (func $strcmp (param $0 i32) (param $1 i32) (result i32) + (func $strlen (param $0 i32) (result i32) + (local $1 i32) (local $2 i32) - (local $3 i32) - (local $4 v128) - (local $5 v128) - (local.set $3 - (block $block (result i32) - (if - (i32.and - (i32.or - (local.get $0) - (local.get $1) - ) - (i32.const 15) + (local $scratch i32) + (block $block + (br_if $block + (i32.gt_u + (local.tee $1 + (local.get $0) ) - (then - (local.set $2 - (i32.load8_u - (local.get $0) + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) ) + (i32.const 16) ) - (br $block - (i32.load8_u + ) + ) + ) + (loop $label + (br_if $block + (i32.eqz + (i8x16.all_true + (v128.load align=1 (local.get $1) ) ) ) ) - (if - (v128.any_true - (v128.xor - (local.tee $5 - (v128.load - (local.get $1) - ) - ) - (local.tee $4 - (v128.load - (local.get $0) - ) - ) - ) - ) - (then - (local.set $2 - (i8x16.extract_lane_u 0 - (local.get $4) - ) - ) - (br $block - (i8x16.extract_lane_u 0 - (local.get $5) + (br_if $label + (i32.le_u + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 16) ) ) + (local.get $2) ) ) - (loop $label - (if - (i32.eqz - (i8x16.all_true - (local.get $4) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $4 - (v128.load offset=16 - (local.get $0) - ) - ) - (local.set $5 - (v128.load offset=16 + ) + ) + (local.set $0 + (i32.add + (i32.xor + (local.get $0) + (i32.const -1) + ) + (local.get $1) + ) + ) + (loop $label1 + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (block (result i32) + (local.set $scratch + (i32.load8_u (local.get $1) ) ) (local.set $1 (i32.add (local.get $1) + (i32.const 1) + ) + ) + (local.get $scratch) + ) + ) + ) + (local.get $0) + ) + (func $strcmp (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 v128) + (block $block + (br_if $block + (i32.lt_u + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) (i32.const 16) ) ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (br_if $label - (i32.eqz - (v128.any_true - (v128.xor - (local.get $5) - (local.get $4) + (local.get $0) + ) + ) + (br_if $block + (i32.gt_u + (local.get $1) + (local.get $2) + ) + ) + (loop $label + (br_if $block + (v128.any_true + (v128.xor + (v128.load align=1 + (local.get $1) + ) + (local.tee $4 + (v128.load align=1 + (local.get $0) ) ) ) ) ) - (local.set $2 - (i8x16.extract_lane_u 0 - (local.get $4) + (if + (i32.eqz + (i8x16.all_true + (local.get $4) + ) + ) + (then + (return + (i32.const 0) + ) ) ) - (i8x16.extract_lane_u 0 - (local.get $5) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (br_if $block + (i32.gt_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (local.get $2) + ) + ) + (br_if $label + (i32.le_u + (local.get $1) + (local.get $2) + ) ) ) ) (if (i32.eq - (i32.and - (local.get $2) - (i32.const 255) + (local.tee $2 + (i32.load8_u + (local.get $0) + ) ) - (i32.and - (local.get $3) - (i32.const 255) + (local.tee $3 + (i32.load8_u + (local.get $1) + ) ) ) (then @@ -264,16 +312,10 @@ (i32.const 1) ) ) - (local.set $2 - (local.get $3) - ) (loop $label1 (if (i32.eqz - (i32.and - (local.get $2) - (i32.const 255) - ) + (local.get $2) ) (then (return @@ -313,14 +355,8 @@ ) ) (i32.sub - (i32.and - (local.get $2) - (i32.const 255) - ) - (i32.and - (local.get $3) - (i32.const 255) - ) + (local.get $2) + (local.get $3) ) ) (func $strncmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index 7ee710e..701dd6b 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -25,6 +25,7 @@ var ( memset api.Function memcpy api.Function memcmp api.Function + strlen api.Function strcmp api.Function strncmp api.Function stack [8]uint64 @@ -49,6 +50,7 @@ func TestMain(m *testing.M) { memset = mod.ExportedFunction("memset") memcpy = mod.ExportedFunction("memcpy") memcmp = mod.ExportedFunction("memcmp") + strlen = mod.ExportedFunction("strlen") strcmp = mod.ExportedFunction("strcmp") strncmp = mod.ExportedFunction("strncmp") memory, _ = mod.Memory().Read(0, mod.Memory().Size()) @@ -114,6 +116,22 @@ func Benchmark_memcmp(b *testing.B) { } } +func Benchmark_strlen(b *testing.B) { + clear(memory) + call(memset, ptr1, 5, size-1) + + b.SetBytes(size) + b.ResetTimer() + for range b.N { + call(strlen, ptr1) + } + b.StopTimer() + + if got := int32(call(strlen, ptr1)); got != size-1 { + b.Fatal(got) + } +} + func Benchmark_strcmp(b *testing.B) { clear(memory) call(memset, ptr1, 7, size-1) diff --git a/sqlite3/strings.c b/sqlite3/strings.c index 89364cf..74aca2b 100644 --- a/sqlite3/strings.c +++ b/sqlite3/strings.c @@ -1,3 +1,4 @@ +#include <__macro_PAGESIZE.h> #include #include #include @@ -21,8 +22,6 @@ void *memmove(void *dest, const void *src, size_t n) { #ifdef __wasm_simd128__ -#define UNALIGNED(x) ((uintptr_t)x % sizeof(*x)) - int memcmp(const void *v1, const void *v2, size_t n) { const v128_t *w1 = v1; const v128_t *w2 = v2; @@ -44,20 +43,41 @@ int memcmp(const void *v1, const void *v2, size_t n) { return 0; } -int strcmp(const char *c1, const char *c2) { - const v128_t *w1 = (void *)c1; - const v128_t *w2 = (void *)c2; - if (!(UNALIGNED(w1) | UNALIGNED(w2))) { - while (true) { - if (wasm_v128_any_true(*w1 ^ *w2)) { - break; // *w1 != *w2 - } - if (!wasm_i8x16_all_true(*w1)) { - return 0; // *w1 == *w2 and have a NUL - } - w1++; - w2++; +size_t strlen(const char *s) { + const v128_t *const limit = + (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; + + const v128_t *w = (void *)s; + while (w <= limit) { + if (!wasm_i8x16_all_true(wasm_v128_load(w))) { + break; // *w has a NUL } + w++; + } + + const char *ss = (void *)w; + while (true) { + if (*ss == 0) break; + ss++; + } + return ss - s; +} + +int strcmp(const char *s1, const char *s2) { + const v128_t *const limit = + (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; + + const v128_t *w1 = (void *)s1; + const v128_t *w2 = (void *)s2; + while (w1 <= limit && w2 <= limit) { + if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { + break; // *w1 != *w2 + } + if (!wasm_i8x16_all_true(wasm_v128_load(w1))) { + return 0; // *w1 == *w2 and have a NUL + } + w1++; + w2++; } const uint8_t *u1 = (void *)w1; @@ -71,9 +91,9 @@ int strcmp(const char *c1, const char *c2) { return 0; } -int strncmp(const char *c1, const char *c2, size_t n) { - const v128_t *w1 = (void *)c1; - const v128_t *w2 = (void *)c2; +int strncmp(const char *s1, const char *s2, size_t n) { + const v128_t *w1 = (void *)s1; + const v128_t *w2 = (void *)s2; for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { break; // *w1 != *w2 @@ -96,6 +116,4 @@ int strncmp(const char *c1, const char *c2, size_t n) { return 0; } -#undef UNALIGNED - #endif