From 687e643d7aff524646d59a2581604256fbad1172 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Tue, 13 May 2025 16:20:45 +0100 Subject: [PATCH] Case insensitive compare. --- sqlite3/libc/build.sh | 5 +- sqlite3/libc/libc.wasm | Bin 5209 -> 6063 bytes sqlite3/libc/libc.wat | 1302 +++++++++++++++++++++++++------------ sqlite3/libc/libc_test.go | 60 +- sqlite3/libc/string.h | 52 +- sqlite3/libc/strings.h | 109 ++++ 6 files changed, 1060 insertions(+), 468 deletions(-) diff --git a/sqlite3/libc/build.sh b/sqlite3/libc/build.sh index e28dd43..89144dc 100755 --- a/sqlite3/libc/build.sh +++ b/sqlite3/libc/build.sh @@ -11,8 +11,9 @@ SRCS="${1:-libc.c}" trap 'rm -f libc.c libc.tmp' EXIT cat << EOF > libc.c -#include #include +#include +#include EOF "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \ @@ -40,11 +41,13 @@ EOF -Wl,--export=strchr \ -Wl,--export=strchrnul \ -Wl,--export=strcmp \ + -Wl,--export=strcasecmp \ -Wl,--export=strcpy \ -Wl,--export=strcspn \ -Wl,--export=strlen \ -Wl,--export=strncat \ -Wl,--export=strncmp \ + -Wl,--export=strncasecmp \ -Wl,--export=strncpy \ -Wl,--export=strrchr \ -Wl,--export=strspn \ diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 3e5583b918c76d1a3b77aa1b8ccc361ea22c6ea8..db4fc8e15eb53413c56657f795d9f3304a84dfc8 100755 GIT binary patch delta 1366 zcmb7E&2G~`5T3EuPGhHu+q7w#BG^Ox`UJJ1JtOq&L5t6_ zWYeZM*_CsM*1?_1=)fszqsMsDFijUbjxp++>jFjm$#StUI<_)M%MbfVTQX~g>vY^( zPsFCoxRFF@xZo5xO$nWvPJWb{vmCEEH#W=~QuR{{K!U`MT~wGf*IpT(oqOS4O7Dwr2iOlBJY5*xV^VnhR_zo8@I@AIYTttNU6 zuLV|ifA4wo-s{G|!xOBGg+a*y=|ZXj{xFpQ&lh7UFeXBvo}U{uma@A0#L~B?4(k#6Eyb1hQq1!%}wNBKCRyfSprxK7F2P6 b$nRe%K`D*!hf5c9pnOpVmMJ3sR($sttw&Xt delta 737 zcmYL`zmC&D5XNV0uN}vU&#}4ukr1~8MWR3)kANS5o-PKO$|omssZ%KDGnbufSl!p0?UZ42AeaTZ*>cfTh)5M zDM7H{D%T@XvhsYkwz~YVhk=Qy*XNjB7_wEV5(2hzRwKTFJ{X&<)^NdB<@`fYKR2sk zLZMD4RerS#HPq7*4a;nAOPO*e@XYFovtP)OJBfhk^gVo71P2c0yVz$J89Vtjl;=B8Rih|=76Kga@Y$Z^Dk==#U zY5NDAGAlMP^!GP!^XH$ks=!}3KNIX}siucjDEkD>IWse$h?r~Mu25e8V1*j|$K{)B ztB0I1tbh$7H1hHjtMLg{7_Qenw6)G|>g>W));c +#include #include +#include +#include <__macro_PAGESIZE.h> #include_next // the system strings.h @@ -16,6 +20,111 @@ int bcmp(const void *v1, const void *v2, size_t n) { return __memcmpeq(v1, v2, n); } +v128_t __tolower8x16(v128_t v) { + __i8x16 i; + i = v + wasm_i8x16_splat(INT8_MAX - ('Z')); + i = i > wasm_i8x16_splat(INT8_MAX - ('Z' - 'A' + 1)); + i = i & wasm_i8x16_splat('a' - 'A'); + return v | i; +} + +static int __strcasecmp_s(const char *s1, const char *s2) { + // Scalar algorithm. + const unsigned char *u1 = (unsigned char *)s1; + const unsigned char *u2 = (unsigned char *)s2; + for (;;) { + int c1 = tolower(*u1); + int c2 = tolower(*u2); + if (c1 != c2) return c1 - c2; + if (c1 == 0) break; + u1++; + u2++; + } + return 0; +} + +static int __strcasecmp(const char *s1, const char *s2) { + // How many bytes can be read before pointers go out of bounds. + size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // + (size_t)(s1 > s2 ? s1 : s2); + + // Unaligned loads handle the case where the strings + // have mismatching alignments. + const v128_t *w1 = (v128_t *)s1; + const v128_t *w2 = (v128_t *)s2; + for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) { + v128_t v1 = __tolower8x16(wasm_v128_load(w1)); + v128_t v2 = __tolower8x16(wasm_v128_load(w2)); + + // Find any single bit difference. + if (wasm_v128_any_true(v1 ^ v2)) { + // The terminator may come before the difference. + break; + } + // We know all characters are equal. + // If any is a terminator the strings are equal. + if (!wasm_i8x16_all_true(v1)) { + return 0; + } + w1++; + w2++; + } + + return __strcasecmp_s((char *)w1, (char *)w2); +} + +__attribute__((weak)) +int strcasecmp(const char *s1, const char *s2) { + // Skip the vector search when comparing against small literal strings. + if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) { + return __strcasecmp_s(s1, s2); + } + return __strcasecmp(s1, s2); +} + +__attribute__((weak)) +int strncasecmp(const char *s1, const char *s2, size_t n) { + // How many bytes can be read before pointers go out of bounds. + size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - // + (size_t)(s1 > s2 ? s1 : s2); + if (n > N) n = N; + + // Unaligned loads handle the case where the strings + // have mismatching alignments. + const v128_t *w1 = (v128_t *)s1; + const v128_t *w2 = (v128_t *)s2; + for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + v128_t v1 = __tolower8x16(wasm_v128_load(w1)); + v128_t v2 = __tolower8x16(wasm_v128_load(w2)); + + // Find any single bit difference. + if (wasm_v128_any_true(v1 ^ v2)) { + // The terminator may come before the difference. + break; + } + // We know all characters are equal. + // If any is a terminator the strings are equal. + if (!wasm_i8x16_all_true(v1)) { + return 0; + } + w1++; + w2++; + } + + // Scalar algorithm. + const unsigned char *u1 = (unsigned char *)w1; + const unsigned char *u2 = (unsigned char *)w2; + while (n--) { + int c1 = tolower(*u1); + int c2 = tolower(*u2); + if (c1 != c2) return c1 - c2; + if (c1 == 0) break; + u1++; + u2++; + } + return 0; +} + #endif // __wasm_simd128__ #ifdef __cplusplus