Case insensitive compare.

2026-01-12 05:59:14 +00:00 · 2025-05-13 16:20:45 +01:00
parent fc5ced209c
commit 687e643d7a
6 changed files with 1060 additions and 468 deletions
--- a/sqlite3/libc/build.sh
+++ b/sqlite3/libc/build.sh
@@ -11,8 +11,9 @@ SRCS="${1:-libc.c}"

 trap 'rm -f libc.c libc.tmp' EXIT
 cat << EOF > libc.c
-#include <string.h>
 #include <stdlib.h>
+#include <string.h>
+#include <strings.h>
 EOF

 "$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \
@@ -40,11 +41,13 @@ EOF
 	-Wl,--export=strchr \
 	-Wl,--export=strchrnul \
 	-Wl,--export=strcmp \
+	-Wl,--export=strcasecmp \
 	-Wl,--export=strcpy \
 	-Wl,--export=strcspn \
 	-Wl,--export=strlen \
 	-Wl,--export=strncat \
 	-Wl,--export=strncmp \
+	-Wl,--export=strncasecmp \
 	-Wl,--export=strncpy \
 	-Wl,--export=strrchr \
 	-Wl,--export=strspn \
--- a/sqlite3/libc/libc.wasm
+++ b/sqlite3/libc/libc.wasm
--- a/sqlite3/libc/libc.wat
+++ b/sqlite3/libc/libc.wat
--- a/sqlite3/libc/libc_test.go
+++ b/sqlite3/libc/libc_test.go
@@ -22,21 +22,23 @@ const (
 )

 var (
-	memory  []byte
-	module  api.Module
-	memset  api.Function
-	memcpy  api.Function
-	memchr  api.Function
-	memcmp  api.Function
-	strlen  api.Function
-	strchr  api.Function
-	strcmp  api.Function
-	strstr  api.Function
-	strspn  api.Function
-	strrchr api.Function
-	strncmp api.Function
-	strcspn api.Function
-	stack   [8]uint64
+	memory      []byte
+	module      api.Module
+	memset      api.Function
+	memcpy      api.Function
+	memchr      api.Function
+	memcmp      api.Function
+	strlen      api.Function
+	strchr      api.Function
+	strcmp      api.Function
+	strstr      api.Function
+	strspn      api.Function
+	strrchr     api.Function
+	strncmp     api.Function
+	strcspn     api.Function
+	strcasecmp  api.Function
+	strncasecmp api.Function
+	stack       [8]uint64
 )

 func call(fn api.Function, arg ...uint64) uint64 {
@@ -70,6 +72,8 @@ func TestMain(m *testing.M) {
 	strrchr = mod.ExportedFunction("strrchr")
 	strncmp = mod.ExportedFunction("strncmp")
 	strcspn = mod.ExportedFunction("strcspn")
+	strcasecmp = mod.ExportedFunction("strcasecmp")
+	strncasecmp = mod.ExportedFunction("strncasecmp")
 	memory, _ = mod.Memory().Read(0, mod.Memory().Size())

 	os.Exit(m.Run())
@@ -182,6 +186,32 @@ func Benchmark_strncmp(b *testing.B) {
 	}
 }

+func Benchmark_strcasecmp(b *testing.B) {
+	clear(memory)
+	fill(memory[ptr1:ptr1+size-1], 7)
+	fill(memory[ptr2:ptr2+size/2], 7)
+	fill(memory[ptr2+size/2:ptr2+size-1], 5)
+
+	b.SetBytes(size/2 + 1)
+	b.ResetTimer()
+	for range b.N {
+		call(strcasecmp, ptr1, ptr2, size)
+	}
+}
+
+func Benchmark_strncasecmp(b *testing.B) {
+	clear(memory)
+	fill(memory[ptr1:ptr1+size-1], 7)
+	fill(memory[ptr2:ptr2+size/2], 7)
+	fill(memory[ptr2+size/2:ptr2+size-1], 5)
+
+	b.SetBytes(size/2 + 1)
+	b.ResetTimer()
+	for range b.N {
+		call(strncasecmp, ptr1, ptr2, size-1)
+	}
+}
+
 func Benchmark_strspn(b *testing.B) {
 	clear(memory)
 	fill(memory[ptr1:ptr1+size/2], 7)
--- a/sqlite3/libc/string.h
+++ b/sqlite3/libc/string.h
@@ -40,7 +40,7 @@ void *memmove(void *dest, const void *src, size_t n) {

 __attribute__((weak))
 int memcmp(const void *v1, const void *v2, size_t n) {
-  // Baseline algorithm.
+  // Scalar algorithm.
  if (n < sizeof(v128_t)) {
    const unsigned char *u1 = (unsigned char *)v1;
    const unsigned char *u2 = (unsigned char *)v2;
@@ -89,7 +89,7 @@ int memcmp(const void *v1, const void *v2, size_t n) {
 #else  // __OPTIMIZE_SIZE__

 static int __memcmpeq(const void *v1, const void *v2, size_t n) {
-  // Baseline algorithm.
+  // Scalar algorithm.
  if (n < sizeof(v128_t)) {
    const unsigned char *u1 = (unsigned char *)v1;
    const unsigned char *u2 = (unsigned char *)v2;
@@ -184,7 +184,7 @@ void *memrchr(const void *v, int c, size_t n) {
    }
  }

-  // Baseline algorithm.
+  // Scalar algorithm.
  const char *a = (char *)w;
  while (n--) {
    if (*(--a) == (char)c) return (char *)a;
@@ -219,6 +219,19 @@ size_t strlen(const char *s) {
  }
 }

+static int __strcmp_s(const char *s1, const char *s2) {
+  // Scalar algorithm.
+  const unsigned char *u1 = (unsigned char *)s1;
+  const unsigned char *u2 = (unsigned char *)s2;
+  for (;;) {
+    if (*u1 != *u2) return *u1 - *u2;
+    if (*u1 == 0) break;
+    u1++;
+    u2++;
+  }
+  return 0;
+}
+
 static int __strcmp(const char *s1, const char *s2) {
  // How many bytes can be read before pointers go out of bounds.
  size_t N = __builtin_wasm_memory_size(0) * PAGESIZE -  //
@@ -243,28 +256,7 @@ static int __strcmp(const char *s1, const char *s2) {
    w2++;
  }

-  // Baseline algorithm.
-  const unsigned char *u1 = (unsigned char *)w1;
-  const unsigned char *u2 = (unsigned char *)w2;
-  for (;;) {
-    if (*u1 != *u2) return *u1 - *u2;
-    if (*u1 == 0) break;
-    u1++;
-    u2++;
-  }
-  return 0;
-}
-
-static int __strcmp_s(const char *s1, const char *s2) {
-  const unsigned char *u1 = (unsigned char *)s1;
-  const unsigned char *u2 = (unsigned char *)s2;
-  for (;;) {
-    if (*u1 != *u2) return *u1 - *u2;
-    if (*u1 == 0) break;
-    u1++;
-    u2++;
-  }
-  return 0;
+  return __strcmp_s((char *)w1, (char *)w2);
 }

 __attribute__((weak, always_inline))
@@ -302,7 +294,7 @@ int strncmp(const char *s1, const char *s2, size_t n) {
    w2++;
  }

-  // Baseline algorithm.
+  // Scalar algorithm.
  const unsigned char *u1 = (unsigned char *)w1;
  const unsigned char *u2 = (unsigned char *)w2;
  while (n--) {
@@ -446,7 +438,7 @@ size_t strspn(const char *s, const char *c) {
      w++;
    }

-    // Baseline algorithm.
+    // Scalar algorithm.
    for (s = (char *)w; *s == *c; s++);
    return s - a;
  }
@@ -469,7 +461,7 @@ size_t strspn(const char *s, const char *c) {
    w++;
  }

-  // Baseline algorithm.
+  // Scalar algorithm.
  for (s = (char *)w; _WASM_SIMD128_CHKBIT(bitmap, *s); s++);
  return s - a;
 }
@@ -502,7 +494,7 @@ size_t strcspn(const char *s, const char *c) {
    w++;
  }

-  // Baseline algorithm.
+  // Scalar algorithm.
  for (s = (char *)w; !_WASM_SIMD128_CHKBIT(bitmap, *s); s++);
  return s - a;
 }
@@ -573,7 +565,7 @@ static const char *__memmem_raita(const char *haystk, size_t sh,
    haystk += skip;
  }

-  // Baseline algorithm.
+  // Scalar algorithm.
  for (size_t j = 0; j <= sh - sn; j++) {
    for (size_t i = 0;; i++) {
      if (sn == i) return haystk;
--- a/sqlite3/libc/strings.h
+++ b/sqlite3/libc/strings.h
@@ -1,7 +1,11 @@
 #ifndef _WASM_SIMD128_STRINGS_H
 #define _WASM_SIMD128_STRINGS_H

+#include <ctype.h>
+#include <stdint.h>
 #include <string.h>
+#include <wasm_simd128.h>
+#include <__macro_PAGESIZE.h>

 #include_next <strings.h>  // the system strings.h

@@ -16,6 +20,111 @@ int bcmp(const void *v1, const void *v2, size_t n) {
  return __memcmpeq(v1, v2, n);
 }

+v128_t __tolower8x16(v128_t v) {
+  __i8x16 i;
+  i = v + wasm_i8x16_splat(INT8_MAX - ('Z'));
+  i = i > wasm_i8x16_splat(INT8_MAX - ('Z' - 'A' + 1));
+  i = i & wasm_i8x16_splat('a' - 'A');
+  return v | i;
+}
+
+static int __strcasecmp_s(const char *s1, const char *s2) {
+  // Scalar algorithm.
+  const unsigned char *u1 = (unsigned char *)s1;
+  const unsigned char *u2 = (unsigned char *)s2;
+  for (;;) {
+    int c1 = tolower(*u1);
+    int c2 = tolower(*u2);
+    if (c1 != c2) return c1 - c2;
+    if (c1 == 0) break;
+    u1++;
+    u2++;
+  }
+  return 0;
+}
+
+static int __strcasecmp(const char *s1, const char *s2) {
+  // How many bytes can be read before pointers go out of bounds.
+  size_t N = __builtin_wasm_memory_size(0) * PAGESIZE -  //
+             (size_t)(s1 > s2 ? s1 : s2);
+
+  // Unaligned loads handle the case where the strings
+  // have mismatching alignments.
+  const v128_t *w1 = (v128_t *)s1;
+  const v128_t *w2 = (v128_t *)s2;
+  for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
+    v128_t v1 = __tolower8x16(wasm_v128_load(w1));
+    v128_t v2 = __tolower8x16(wasm_v128_load(w2));
+
+    // Find any single bit difference.
+    if (wasm_v128_any_true(v1 ^ v2)) {
+      // The terminator may come before the difference.
+      break;
+    }
+    // We know all characters are equal.
+    // If any is a terminator the strings are equal.
+    if (!wasm_i8x16_all_true(v1)) {
+      return 0;
+    }
+    w1++;
+    w2++;
+  }
+
+  return __strcasecmp_s((char *)w1, (char *)w2);
+}
+
+__attribute__((weak))
+int strcasecmp(const char *s1, const char *s2) {
+  // Skip the vector search when comparing against small literal strings.
+  if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) {
+    return __strcasecmp_s(s1, s2);
+  }
+  return __strcasecmp(s1, s2);
+}
+
+__attribute__((weak))
+int strncasecmp(const char *s1, const char *s2, size_t n) {
+  // How many bytes can be read before pointers go out of bounds.
+  size_t N = __builtin_wasm_memory_size(0) * PAGESIZE -  //
+             (size_t)(s1 > s2 ? s1 : s2);
+  if (n > N) n = N;
+
+  // Unaligned loads handle the case where the strings
+  // have mismatching alignments.
+  const v128_t *w1 = (v128_t *)s1;
+  const v128_t *w2 = (v128_t *)s2;
+  for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
+    v128_t v1 = __tolower8x16(wasm_v128_load(w1));
+    v128_t v2 = __tolower8x16(wasm_v128_load(w2));
+
+    // Find any single bit difference.
+    if (wasm_v128_any_true(v1 ^ v2)) {
+      // The terminator may come before the difference.
+      break;
+    }
+    // We know all characters are equal.
+    // If any is a terminator the strings are equal.
+    if (!wasm_i8x16_all_true(v1)) {
+      return 0;
+    }
+    w1++;
+    w2++;
+  }
+
+  // Scalar algorithm.
+  const unsigned char *u1 = (unsigned char *)w1;
+  const unsigned char *u2 = (unsigned char *)w2;
+  while (n--) {
+    int c1 = tolower(*u1);
+    int c2 = tolower(*u2);
+    if (c1 != c2) return c1 - c2;
+    if (c1 == 0) break;
+    u1++;
+    u2++;
+  }
+  return 0;
+}
+
 #endif  // __wasm_simd128__

 #ifdef __cplusplus