diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index 7d8443a..d2a6883 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -21,10 +21,13 @@ trap 'rm -f libc.tmp' EXIT -Wl,--initial-memory=16777216 \ -Wl,--export=memset \ -Wl,--export=memcpy \ + -Wl,--export=memchr \ -Wl,--export=memcmp \ -Wl,--export=strlen \ + -Wl,--export=strchr \ -Wl,--export=strcmp \ - -Wl,--export=strncmp + -Wl,--export=strncmp \ + -Wl,--export=strchrnul "$BINARYEN/wasm-ctor-eval" -g -c _initialize libc.wasm -o libc.tmp "$BINARYEN/wasm-opt" -g --strip --strip-producers -c -O3 \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 1ed0412..e0a8d4e 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index bd223c9..4bd601a 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -1,16 +1,19 @@ (module $libc.wasm (type $0 (func (param i32 i32 i32) (result i32))) - (type $1 (func (param i32) (result i32))) - (type $2 (func (param i32 i32) (result i32))) + (type $1 (func (param i32 i32) (result i32))) + (type $2 (func (param i32) (result i32))) (memory $0 256) (data $0 (i32.const 1024) "\01") (export "memory" (memory $0)) (export "memset" (func $memset)) (export "memcpy" (func $memcpy)) (export "memcmp" (func $memcmp)) + (export "memchr" (func $memchr)) (export "strlen" (func $strlen)) (export "strcmp" (func $strcmp)) (export "strncmp" (func $strncmp)) + (export "strchrnul" (func $strchrnul)) + (export "strchr" (func $strchr)) (func $memset (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (memory.fill (local.get $0) @@ -130,6 +133,96 @@ ) (i32.const 0) ) + (func $memchr (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 v128) + (block $block2 + (block $block1 + (block $block + (if + (i32.ge_u + (local.get $2) + (i32.const 16) + ) + (then + (local.set $3 + (i8x16.splat + (local.get $1) + ) + ) + (loop $label + (br_if $block + (v128.any_true + (i8x16.eq + (v128.load align=1 + (local.get $0) + ) + (local.get $3) + ) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (br_if $label + (i32.gt_u + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 16) + ) + ) + (i32.const 15) + ) + ) + ) + ) + ) + (br_if $block1 + (i32.eqz + (local.get $2) + ) + ) + ) + (local.set $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (loop $label1 + (br_if $block2 + (i32.eq + (i32.load8_u + (local.get $0) + ) + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br_if $label1 + (local.tee $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + ) + (local.get $0) + ) (func $strlen (param $0 i32) (result i32) (local $1 i32) (local $2 i32) @@ -490,6 +583,195 @@ ) (i32.const 0) ) + (func $strchrnul (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 v128) + (local $4 v128) + (block $block + (br_if $block + (i32.lt_u + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (i32.const 16) + ) + ) + (local.get $0) + ) + ) + (local.set $3 + (i8x16.splat + (local.get $1) + ) + ) + (loop $label + (br_if $block + (i32.eqz + (i8x16.all_true + (local.tee $4 + (v128.load align=1 + (local.get $0) + ) + ) + ) + ) + ) + (br_if $block + (v128.any_true + (i8x16.eq + (local.get $4) + (local.get $3) + ) + ) + ) + (br_if $label + (i32.le_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (local.get $2) + ) + ) + ) + ) + (local.set $1 + (i32.extend8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.sub + (local.get $0) + (i32.const 1) + ) + ) + (loop $label1 + (br_if $label1 + (select + (local.tee $2 + (i32.load8_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + ) + ) + (i32.const 0) + (i32.ne + (local.get $1) + (local.get $2) + ) + ) + ) + ) + (local.get $0) + ) + (func $strchr (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 v128) + (local $4 v128) + (block $block + (br_if $block + (i32.lt_u + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (i32.const 16) + ) + ) + (local.get $0) + ) + ) + (local.set $3 + (i8x16.splat + (local.get $1) + ) + ) + (loop $label + (br_if $block + (i32.eqz + (i8x16.all_true + (local.tee $4 + (v128.load align=1 + (local.get $0) + ) + ) + ) + ) + ) + (br_if $block + (v128.any_true + (i8x16.eq + (local.get $4) + (local.get $3) + ) + ) + ) + (br_if $label + (i32.le_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (local.get $2) + ) + ) + ) + ) + (local.set $1 + (i32.extend8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.sub + (local.get $0) + (i32.const 1) + ) + ) + (loop $label1 + (br_if $label1 + (select + (local.tee $2 + (i32.load8_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + ) + ) + (i32.const 0) + (i32.ne + (local.get $1) + (local.get $2) + ) + ) + ) + ) + (select + (local.get $0) + (i32.const 0) + (i32.eq + (local.get $1) + (local.get $2) + ) + ) + ) ;; features section: mutable-globals, nontrapping-float-to-int, simd, bulk-memory, sign-ext, reference-types, multivalue, bulk-memory-opt ) diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index 701dd6b..469c86e 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -25,8 +25,10 @@ var ( memset api.Function memcpy api.Function memcmp api.Function + memchr api.Function strlen api.Function strcmp api.Function + strchr api.Function strncmp api.Function stack [8]uint64 ) @@ -49,8 +51,10 @@ func TestMain(m *testing.M) { module = mod memset = mod.ExportedFunction("memset") memcpy = mod.ExportedFunction("memcpy") + memchr = mod.ExportedFunction("memchr") memcmp = mod.ExportedFunction("memcmp") strlen = mod.ExportedFunction("strlen") + strchr = mod.ExportedFunction("strchr") strcmp = mod.ExportedFunction("strcmp") strncmp = mod.ExportedFunction("strncmp") memory, _ = mod.Memory().Read(0, mod.Memory().Size()) @@ -93,6 +97,23 @@ func Benchmark_memcpy(b *testing.B) { } } +func Benchmark_memchr(b *testing.B) { + clear(memory) + call(memset, ptr1, 7, size) + call(memset, ptr1+size/2, 5, size) + + b.SetBytes(size / 2) + b.ResetTimer() + for range b.N { + call(memchr, ptr1, 5, size) + } + b.StopTimer() + + if got := call(memchr, ptr1, 5, size); got != ptr1+size/2 { + b.Fatal(got) + } +} + func Benchmark_memcmp(b *testing.B) { clear(memory) call(memset, ptr1, 7, size) @@ -132,6 +153,23 @@ func Benchmark_strlen(b *testing.B) { } } +func Benchmark_strchr(b *testing.B) { + clear(memory) + call(memset, ptr1, 7, size) + call(memset, ptr1+size/2, 5, size) + + b.SetBytes(size / 2) + b.ResetTimer() + for range b.N { + call(strchr, ptr1, 5) + } + b.StopTimer() + + if got := call(strchr, ptr1, 5); got != ptr1+size/2 { + b.Fatal(got) + } +} + func Benchmark_strcmp(b *testing.B) { clear(memory) call(memset, ptr1, 7, size-1) diff --git a/sqlite3/strings.c b/sqlite3/strings.c index 74aca2b..ea4c2fd 100644 --- a/sqlite3/strings.c +++ b/sqlite3/strings.c @@ -43,6 +43,26 @@ int memcmp(const void *v1, const void *v2, size_t n) { return 0; } +void *memchr(const void *v, int c, size_t n) { + c = (uint8_t)c; + + const v128_t wc = wasm_i8x16_splat(c); + const v128_t *w = (void *)v; + for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + if (wasm_v128_any_true(wasm_i8x16_eq(wasm_v128_load(w), wc))) { + break; // *w has a c + } + w++; + } + + const uint8_t *u = (void *)w; + while (n--) { + if (*u == c) return (void *)u; + u++; + } + return 0; +} + size_t strlen(const char *s) { const v128_t *const limit = (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; @@ -116,4 +136,35 @@ int strncmp(const char *s1, const char *s2, size_t n) { return 0; } +char *strchrnul(const char *s, int c) { + c = (char)c; + + const v128_t *const limit = + (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; + + const v128_t wc = wasm_i8x16_splat(c); + const v128_t *w = (void *)s; + while (w <= limit) { + if (!wasm_i8x16_all_true(wasm_v128_load(w))) { + break; // *w has a NUL + } + if (wasm_v128_any_true(wasm_i8x16_eq(wasm_v128_load(w), wc))) { + break; // *w has a c + } + w++; + } + + s = (void *)w; + while (true) { + if (*s == 0 || *s == c) break; + s++; + } + return (void *)s; +} + +char *strchr(const char *s, int c) { + char *r = strchrnul(s, c); + return *(char *)r == (char)c ? r : 0; +} + #endif