From bb87a920f7abe873696d66f8360d76db9663f32d Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Tue, 22 Apr 2025 01:19:59 +0100 Subject: [PATCH] Fix `memchr`. --- sqlite3/libc/build.sh | 7 +- sqlite3/libc/libc.wasm | Bin 2767 -> 3582 bytes sqlite3/libc/libc.wat | 156 ++++++++++++++++++++++++-------------- sqlite3/libc/libc_test.go | 3 + sqlite3/libc/string.h | 38 ++++++---- 5 files changed, 130 insertions(+), 74 deletions(-) diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index cff8387..b9082d7 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -10,10 +10,13 @@ SRCS="${1:-libc.c}" "../tools.sh" trap 'rm -f libc.c libc.tmp' EXIT -echo '#include ' > libc.c -echo '#include ' >> libc.c +cat << EOF > libc.c +#include +#include +EOF "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ + -Wall -Wextra -Wno-unused-parameter -Wno-unused-function \ -o libc.wasm -I. "$SRCS" \ -mexec-model=reactor \ -msimd128 -mmutable-globals -mmultivalue \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 4358642cd04b0811aab12e95c397af6fdd6fee9a..0566afe30505c2867f73a48f7aa5fdd1d8e21c94 100755 GIT binary patch delta 1503 zcmZ{kzmC&D5XN_B*9nr7OC-1x=ycNBSBHW!LLyt_q|*t6bgw{hvKu1ku2}Qu0fJm1 zQRE70B3(m=MMFt?n)Ebz0R!J`LLwzu7l~(orQ&7K24kTB_}oO7uFg5dXz>SQX*S2He|?ljm}e* zr}DfljU3`f7?mZ825MlIwN`bQ+iu=ZQT?K=jIv$i@!FXY2_6-3i(+n-M6ppOx=KZ6 zu|XrxkCR4h8n?gYi8}ZZ5jlBnWKW2c%A?3M)CF&oxP^%Qkt9L2pdGD`$@AuQkc zeBTd|#NO2zca*U$-s-v~wyrwX!YlOP!vy8IlO1G0bR$Rf(D?2GbQ^T;K{3iYBQ%-uu$51}K( zauY+7JwclolkN#5S)C>8WegrGGEayVN0z|6pjj!rVnm)GN5sD(lK5EMvO5h{s^gCU z`eV`B;(h3qF5ZR6JV<7HiDNZZdl8dqkW7MP93;gqajU5=f}hGfNM=D&1<7=apg%4z ztjQMIka38>1D0lCk>WLl>G7x`4weGkXO-d;psTj_<6VaqKILVF=NS`+>|Ib5#J5~! z2jjXq)YFNf74N5#{WMkOx5nXJ=n%fD>(lxrMaw delta 709 zcmaKpJ#G|15QVF%du)sLvMXepzy#I3LM9=_B4RpNCzb^g;siJ_vk3zhiG%SC0BuC* zGx!3)1rU(~Cq4of7~oa!2rUFe)0*!0Q(yhu{kHw#vz*`g`1RBJdluKp+2R%NcI*8# zt@Y1+t!w>p?$n>QiANk%)HiuDPpon{VkPzSJZYkn+STfM@}z?nx2{|%D;LSr5rs_& zW*K4Sluw#i1+Bt4i*$S@wbV-ab$fYVHr?$nQvNDGBuZ}aq|z4%CB%%QM;!M=D9}&) zmouaxq_JK{hzYRi-YPdy@KCZHiVt4ljA{o~hC}}G%PlT8?z)@u-RXu0HcfW#^1U|W zrb(jmKqy0a`sR^P@rsyWaD$mr6sup#&QOyg|KkD&g(tT&faNF#1HrbwY5K0uhNfG1t`ywFB|k_gOtZDmMGAnl}RSi z7iOSg+C9_$(P4l9w8z!~Oivp08UK%#tkQDomb7Gr78bCiC52WhYsEOIvnn=7Vvuam TlDcH{m)uIxJX~Jg+H=1Fd0>a8 diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 9944612..2b7a27d 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -142,42 +142,53 @@ (local $3 i32) (local $4 i32) (local $5 i32) - (local $6 v128) + (local $6 i32) (local $7 v128) - (local.set $4 - (i32.and - (local.get $0) - (i32.const 15) + (local $8 v128) + (local $scratch i32) + (block $block + (br_if $block + (i32.eqz + (local.get $2) + ) ) - ) - (block $block1 - (block $block - (if - (v128.any_true - (local.tee $6 - (i8x16.eq - (v128.load - (local.tee $3 - (i32.and - (local.get $0) - (i32.const -16) - ) - ) - ) + (local.set $4 + (i32.and + (local.get $0) + (i32.const 15) + ) + ) + (block $block2 + (block $block1 + (br_if $block1 + (i32.eqz + (v128.any_true (local.tee $7 - (i8x16.splat - (local.get $1) + (i8x16.eq + (v128.load + (local.tee $3 + (i32.and + (local.get $0) + (i32.const -16) + ) + ) + ) + (local.tee $8 + (i8x16.splat + (local.get $1) + ) + ) ) ) ) ) ) - (then - (br_if $block - (local.tee $1 + (br_if $block1 + (i32.eqz + (local.tee $5 (i32.and (i8x16.bitmask - (local.get $6) + (local.get $7) ) (i32.shl (i32.const -1) @@ -187,44 +198,63 @@ ) ) ) + (local.set $1 + (local.get $2) + ) + (br $block2) ) - (br_if $block1 - (i32.gt_u + (br_if $block + (i32.lt_u + (local.get $2) (local.tee $1 (i32.sub - (i32.add - (local.get $2) - (local.get $4) + (local.get $2) + (local.tee $3 + (i32.sub + (i32.const 16) + (local.get $4) + ) ) - (i32.const 16) ) ) - (local.get $2) + ) + ) + (br_if $block + (i32.eqz + (local.get $1) ) ) (local.set $3 (i32.add - (i32.sub - (local.get $0) - (local.get $4) - ) - (i32.const 16) + (local.get $0) + (local.get $3) ) ) - (block $block2 + (block $block3 (loop $label - (br_if $block2 + (br_if $block3 (v128.any_true - (local.tee $6 + (local.tee $7 (i8x16.eq (v128.load (local.get $3) ) - (local.get $7) + (local.get $8) ) ) ) ) + (br_if $block + (i32.gt_u + (local.tee $0 + (i32.sub + (local.get $1) + (i32.const 16) + ) + ) + (local.get $1) + ) + ) (local.set $3 (i32.add (local.get $3) @@ -232,35 +262,49 @@ ) ) (br_if $label - (i32.ge_u - (local.get $1) - (local.tee $1 - (i32.sub - (local.get $1) - (i32.const 16) + (i32.eqz + (block (result i32) + (local.set $scratch + (i32.eq + (local.get $1) + (i32.const 16) + ) ) + (local.set $1 + (local.get $0) + ) + (local.get $scratch) ) ) ) ) - (br $block1) + (br $block) ) - (local.set $1 + (local.set $5 (i8x16.bitmask - (local.get $6) + (local.get $7) ) ) ) - (local.set $5 - (i32.add - (local.get $3) - (i32.ctz + (local.set $6 + (select + (i32.add + (local.get $3) + (local.tee $0 + (i32.ctz + (local.get $5) + ) + ) + ) + (i32.const 0) + (i32.lt_u + (local.get $0) (local.get $1) ) ) ) ) - (local.get $5) + (local.get $6) ) (func $strlen (param $0 i32) (result i32) (local $1 i32) diff --git a/sqlite3/libc/libc_test.go b/sqlite3/libc/libc_test.go index 9fd4c3a..ba7bbca 100644 --- a/sqlite3/libc/libc_test.go +++ b/sqlite3/libc/libc_test.go @@ -116,6 +116,9 @@ func Benchmark_memchr(b *testing.B) { if got := call(memchr, ptr1, 5, size); got != ptr1+size/2 { b.Fatal(got) } + if got := call(memchr, ptr1, 5, size/2); got != 0 { + b.Fatal(got, ptr1+size/2) + } } func Benchmark_memcmp(b *testing.B) { diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index ea4d5bb..140d701 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -45,7 +45,7 @@ void *memmove(void *dest, const void *src, size_t n) { // aligned address less than memory size. // // These also assume unaligned access is not painfully slow, -// but that bitmask extraction is slow on AArch64. +// but that bitmask extraction is really slow on AArch64. __attribute__((weak)) int memcmp(const void *v1, const void *v2, size_t n) { @@ -75,13 +75,14 @@ void *memchr(const void *v, int c, size_t n) { const v128_t *w = (void *)(v - align); const v128_t wc = wasm_i8x16_splat(c); - while (true) { + while (n) { const v128_t cmp = wasm_i8x16_eq(*w, wc); if (wasm_v128_any_true(cmp)) { int mask = wasm_i8x16_bitmask(cmp) >> align << align; __builtin_assume(mask || align); if (mask) { - return (void *)w + __builtin_ctz(mask); + size_t ctz = __builtin_ctz(mask); + return ctz < n ? (void *)w + ctz : NULL; } } if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) { @@ -90,6 +91,7 @@ void *memchr(const void *v, int c, size_t n) { align = 0; w++; } + return NULL; } __attribute__((weak)) @@ -111,12 +113,7 @@ size_t strlen(const char *s) { } } -__attribute__((weak)) -int strcmp(const char *s1, const char *s2) { - if (__builtin_constant_p(__builtin_strlen(s2))) { - return strncmp(s1, s2, __builtin_strlen(s2)); - } - +static int __strcmp(const char *s1, const char *s2) { const v128_t *const limit = (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; @@ -144,6 +141,14 @@ int strcmp(const char *s1, const char *s2) { return 0; } +__attribute__((weak, always_inline)) +int strcmp(const char *s1, const char *s2) { + if (__builtin_constant_p(strlen(s2))) { + return strncmp(s1, s2, strlen(s2)); + } + return __strcmp(s1, s2); +} + __attribute__((weak)) int strncmp(const char *s1, const char *s2, size_t n) { const v128_t *const limit = @@ -173,12 +178,7 @@ int strncmp(const char *s1, const char *s2, size_t n) { return 0; } -__attribute__((always_inline)) static char *__strchrnul(const char *s, int c) { - if (__builtin_constant_p(c) && (char)c == 0) { - return (char *)s + strlen(s); - } - uintptr_t align = (uintptr_t)s % sizeof(v128_t); const v128_t *w = (void *)(s - align); const v128_t wc = wasm_i8x16_splat(c); @@ -197,13 +197,19 @@ static char *__strchrnul(const char *s, int c) { } } -__attribute__((weak)) +__attribute__((weak, always_inline)) char *strchrnul(const char *s, int c) { + if (__builtin_constant_p(c) && (char)c == 0) { + return (char *)s + strlen(s); + } return __strchrnul(s, c); } -__attribute__((weak)) +__attribute__((weak, always_inline)) char *strchr(const char *s, int c) { + if (__builtin_constant_p(c) && (char)c == 0) { + return (char *)s + strlen(s); + } char *r = __strchrnul(s, c); return *(char *)r == (char)c ? r : NULL; }