diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 2b7a27d..5589d94 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -142,38 +142,42 @@ (local $3 i32) (local $4 i32) (local $5 i32) - (local $6 i32) + (local $6 v128) (local $7 v128) - (local $8 v128) - (local $scratch i32) - (block $block - (br_if $block - (i32.eqz - (local.get $2) + (block $block2 + (block $block + (br_if $block + (i32.eqz + (local.get $2) + ) ) - ) - (local.set $4 - (i32.and - (local.get $0) - (i32.const 15) + (local.set $3 + (i32.and + (local.get $0) + (i32.const 15) + ) + ) + (local.set $4 + (i32.sub + (local.get $2) + (i32.const 1) + ) ) - ) - (block $block2 (block $block1 (br_if $block1 (i32.eqz (v128.any_true - (local.tee $7 + (local.tee $6 (i8x16.eq (v128.load - (local.tee $3 + (local.tee $2 (i32.and (local.get $0) (i32.const -16) ) ) ) - (local.tee $8 + (local.tee $7 (i8x16.splat (local.get $1) ) @@ -188,123 +192,109 @@ (local.tee $5 (i32.and (i8x16.bitmask - (local.get $7) + (local.get $6) ) (i32.shl (i32.const -1) - (local.get $4) + (local.get $3) ) ) ) ) ) (local.set $1 - (local.get $2) + (local.get $4) ) (br $block2) ) (br_if $block - (i32.lt_u - (local.get $2) + (i32.gt_u (local.tee $1 (i32.sub - (local.get $2) - (local.tee $3 - (i32.sub - (i32.const 16) - (local.get $4) - ) + (i32.add + (local.get $3) + (local.get $4) ) - ) - ) - ) - ) - (br_if $block - (i32.eqz - (local.get $1) - ) - ) - (local.set $3 - (i32.add - (local.get $0) - (local.get $3) - ) - ) - (block $block3 - (loop $label - (br_if $block3 - (v128.any_true - (local.tee $7 - (i8x16.eq - (v128.load - (local.get $3) - ) - (local.get $8) - ) - ) - ) - ) - (br_if $block - (i32.gt_u - (local.tee $0 - (i32.sub - (local.get $1) - (i32.const 16) - ) - ) - (local.get $1) - ) - ) - (local.set $3 - (i32.add - (local.get $3) (i32.const 16) ) ) - (br_if $label - (i32.eqz - (block (result i32) - (local.set $scratch - (i32.eq - (local.get $1) - (i32.const 16) - ) + (local.get $4) + ) + ) + (local.set $2 + (i32.add + (i32.sub + (local.get $0) + (local.get $3) + ) + (i32.const 16) + ) + ) + (loop $label + (if + (v128.any_true + (local.tee $6 + (i8x16.eq + (v128.load + (local.get $2) ) - (local.set $1 - (local.get $0) - ) - (local.get $scratch) + (local.get $7) + ) + ) + ) + (then + (local.set $5 + (i8x16.bitmask + (local.get $6) + ) + ) + (local.set $3 + (i32.const 0) + ) + (br $block2) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + (br_if $label + (i32.ge_u + (local.get $1) + (local.tee $1 + (i32.sub + (local.get $1) + (i32.const 16) ) ) ) ) - (br $block) ) - (local.set $5 - (i8x16.bitmask - (local.get $7) + ) + (return + (i32.const 0) + ) + ) + (select + (i32.add + (local.get $2) + (local.tee $0 + (i32.ctz + (local.get $5) ) ) ) - (local.set $6 - (select - (i32.add - (local.get $3) - (local.tee $0 - (i32.ctz - (local.get $5) - ) - ) - ) - (i32.const 0) - (i32.lt_u - (local.get $0) - (local.get $1) - ) + (i32.const 0) + (i32.le_u + (local.get $0) + (i32.add + (local.get $1) + (local.get $3) ) ) ) - (local.get $6) ) (func $strlen (param $0 i32) (result i32) (local $1 i32) diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 140d701..2bdcebe 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -71,18 +71,22 @@ int memcmp(const void *v1, const void *v2, size_t n) { __attribute__((weak)) void *memchr(const void *v, int c, size_t n) { + if (n-- == 0) { + return NULL; + } + uintptr_t align = (uintptr_t)v % sizeof(v128_t); const v128_t *w = (void *)(v - align); const v128_t wc = wasm_i8x16_splat(c); - while (n) { + while (true) { const v128_t cmp = wasm_i8x16_eq(*w, wc); if (wasm_v128_any_true(cmp)) { int mask = wasm_i8x16_bitmask(cmp) >> align << align; __builtin_assume(mask || align); if (mask) { size_t ctz = __builtin_ctz(mask); - return ctz < n ? (void *)w + ctz : NULL; + return ctz <= n + align ? (void *)w + ctz : NULL; } } if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) { @@ -91,7 +95,6 @@ void *memchr(const void *v, int c, size_t n) { align = 0; w++; } - return NULL; } __attribute__((weak)) @@ -244,6 +247,7 @@ size_t strspn(const char *s, const char *c) { volatile v128_t *w = (void *)byteset; #pragma unroll for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){}; + while (*c && (byteset[*(uint8_t *)c] = 1)) c++; #pragma unroll 4 while (byteset[*(uint8_t *)s]) s++; @@ -263,6 +267,7 @@ size_t strcspn(const char *s, const char *c) { volatile v128_t *w = (void *)byteset; #pragma unroll for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){}; + while ((byteset[*(uint8_t *)c] = 1) && *c) c++; #pragma unroll 4 while (!byteset[*(uint8_t *)s]) s++;