Case insensitive compare.

This commit is contained in:
Nuno Cruces
2025-05-13 16:20:45 +01:00
parent fc5ced209c
commit 687e643d7a
6 changed files with 1060 additions and 468 deletions

View File

@@ -11,8 +11,9 @@ SRCS="${1:-libc.c}"
trap 'rm -f libc.c libc.tmp' EXIT
cat << EOF > libc.c
#include <string.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
EOF
"$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \
@@ -40,11 +41,13 @@ EOF
-Wl,--export=strchr \
-Wl,--export=strchrnul \
-Wl,--export=strcmp \
-Wl,--export=strcasecmp \
-Wl,--export=strcpy \
-Wl,--export=strcspn \
-Wl,--export=strlen \
-Wl,--export=strncat \
-Wl,--export=strncmp \
-Wl,--export=strncasecmp \
-Wl,--export=strncpy \
-Wl,--export=strrchr \
-Wl,--export=strspn \

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -22,21 +22,23 @@ const (
)
var (
memory []byte
module api.Module
memset api.Function
memcpy api.Function
memchr api.Function
memcmp api.Function
strlen api.Function
strchr api.Function
strcmp api.Function
strstr api.Function
strspn api.Function
strrchr api.Function
strncmp api.Function
strcspn api.Function
stack [8]uint64
memory []byte
module api.Module
memset api.Function
memcpy api.Function
memchr api.Function
memcmp api.Function
strlen api.Function
strchr api.Function
strcmp api.Function
strstr api.Function
strspn api.Function
strrchr api.Function
strncmp api.Function
strcspn api.Function
strcasecmp api.Function
strncasecmp api.Function
stack [8]uint64
)
func call(fn api.Function, arg ...uint64) uint64 {
@@ -70,6 +72,8 @@ func TestMain(m *testing.M) {
strrchr = mod.ExportedFunction("strrchr")
strncmp = mod.ExportedFunction("strncmp")
strcspn = mod.ExportedFunction("strcspn")
strcasecmp = mod.ExportedFunction("strcasecmp")
strncasecmp = mod.ExportedFunction("strncasecmp")
memory, _ = mod.Memory().Read(0, mod.Memory().Size())
os.Exit(m.Run())
@@ -182,6 +186,32 @@ func Benchmark_strncmp(b *testing.B) {
}
}
func Benchmark_strcasecmp(b *testing.B) {
clear(memory)
fill(memory[ptr1:ptr1+size-1], 7)
fill(memory[ptr2:ptr2+size/2], 7)
fill(memory[ptr2+size/2:ptr2+size-1], 5)
b.SetBytes(size/2 + 1)
b.ResetTimer()
for range b.N {
call(strcasecmp, ptr1, ptr2, size)
}
}
func Benchmark_strncasecmp(b *testing.B) {
clear(memory)
fill(memory[ptr1:ptr1+size-1], 7)
fill(memory[ptr2:ptr2+size/2], 7)
fill(memory[ptr2+size/2:ptr2+size-1], 5)
b.SetBytes(size/2 + 1)
b.ResetTimer()
for range b.N {
call(strncasecmp, ptr1, ptr2, size-1)
}
}
func Benchmark_strspn(b *testing.B) {
clear(memory)
fill(memory[ptr1:ptr1+size/2], 7)

View File

@@ -40,7 +40,7 @@ void *memmove(void *dest, const void *src, size_t n) {
__attribute__((weak))
int memcmp(const void *v1, const void *v2, size_t n) {
// Baseline algorithm.
// Scalar algorithm.
if (n < sizeof(v128_t)) {
const unsigned char *u1 = (unsigned char *)v1;
const unsigned char *u2 = (unsigned char *)v2;
@@ -89,7 +89,7 @@ int memcmp(const void *v1, const void *v2, size_t n) {
#else // __OPTIMIZE_SIZE__
static int __memcmpeq(const void *v1, const void *v2, size_t n) {
// Baseline algorithm.
// Scalar algorithm.
if (n < sizeof(v128_t)) {
const unsigned char *u1 = (unsigned char *)v1;
const unsigned char *u2 = (unsigned char *)v2;
@@ -184,7 +184,7 @@ void *memrchr(const void *v, int c, size_t n) {
}
}
// Baseline algorithm.
// Scalar algorithm.
const char *a = (char *)w;
while (n--) {
if (*(--a) == (char)c) return (char *)a;
@@ -219,6 +219,19 @@ size_t strlen(const char *s) {
}
}
static int __strcmp_s(const char *s1, const char *s2) {
// Scalar algorithm.
const unsigned char *u1 = (unsigned char *)s1;
const unsigned char *u2 = (unsigned char *)s2;
for (;;) {
if (*u1 != *u2) return *u1 - *u2;
if (*u1 == 0) break;
u1++;
u2++;
}
return 0;
}
static int __strcmp(const char *s1, const char *s2) {
// How many bytes can be read before pointers go out of bounds.
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
@@ -243,28 +256,7 @@ static int __strcmp(const char *s1, const char *s2) {
w2++;
}
// Baseline algorithm.
const unsigned char *u1 = (unsigned char *)w1;
const unsigned char *u2 = (unsigned char *)w2;
for (;;) {
if (*u1 != *u2) return *u1 - *u2;
if (*u1 == 0) break;
u1++;
u2++;
}
return 0;
}
static int __strcmp_s(const char *s1, const char *s2) {
const unsigned char *u1 = (unsigned char *)s1;
const unsigned char *u2 = (unsigned char *)s2;
for (;;) {
if (*u1 != *u2) return *u1 - *u2;
if (*u1 == 0) break;
u1++;
u2++;
}
return 0;
return __strcmp_s((char *)w1, (char *)w2);
}
__attribute__((weak, always_inline))
@@ -302,7 +294,7 @@ int strncmp(const char *s1, const char *s2, size_t n) {
w2++;
}
// Baseline algorithm.
// Scalar algorithm.
const unsigned char *u1 = (unsigned char *)w1;
const unsigned char *u2 = (unsigned char *)w2;
while (n--) {
@@ -446,7 +438,7 @@ size_t strspn(const char *s, const char *c) {
w++;
}
// Baseline algorithm.
// Scalar algorithm.
for (s = (char *)w; *s == *c; s++);
return s - a;
}
@@ -469,7 +461,7 @@ size_t strspn(const char *s, const char *c) {
w++;
}
// Baseline algorithm.
// Scalar algorithm.
for (s = (char *)w; _WASM_SIMD128_CHKBIT(bitmap, *s); s++);
return s - a;
}
@@ -502,7 +494,7 @@ size_t strcspn(const char *s, const char *c) {
w++;
}
// Baseline algorithm.
// Scalar algorithm.
for (s = (char *)w; !_WASM_SIMD128_CHKBIT(bitmap, *s); s++);
return s - a;
}
@@ -573,7 +565,7 @@ static const char *__memmem_raita(const char *haystk, size_t sh,
haystk += skip;
}
// Baseline algorithm.
// Scalar algorithm.
for (size_t j = 0; j <= sh - sn; j++) {
for (size_t i = 0;; i++) {
if (sn == i) return haystk;

View File

@@ -1,7 +1,11 @@
#ifndef _WASM_SIMD128_STRINGS_H
#define _WASM_SIMD128_STRINGS_H
#include <ctype.h>
#include <stdint.h>
#include <string.h>
#include <wasm_simd128.h>
#include <__macro_PAGESIZE.h>
#include_next <strings.h> // the system strings.h
@@ -16,6 +20,111 @@ int bcmp(const void *v1, const void *v2, size_t n) {
return __memcmpeq(v1, v2, n);
}
v128_t __tolower8x16(v128_t v) {
__i8x16 i;
i = v + wasm_i8x16_splat(INT8_MAX - ('Z'));
i = i > wasm_i8x16_splat(INT8_MAX - ('Z' - 'A' + 1));
i = i & wasm_i8x16_splat('a' - 'A');
return v | i;
}
static int __strcasecmp_s(const char *s1, const char *s2) {
// Scalar algorithm.
const unsigned char *u1 = (unsigned char *)s1;
const unsigned char *u2 = (unsigned char *)s2;
for (;;) {
int c1 = tolower(*u1);
int c2 = tolower(*u2);
if (c1 != c2) return c1 - c2;
if (c1 == 0) break;
u1++;
u2++;
}
return 0;
}
static int __strcasecmp(const char *s1, const char *s2) {
// How many bytes can be read before pointers go out of bounds.
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
(size_t)(s1 > s2 ? s1 : s2);
// Unaligned loads handle the case where the strings
// have mismatching alignments.
const v128_t *w1 = (v128_t *)s1;
const v128_t *w2 = (v128_t *)s2;
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
v128_t v1 = __tolower8x16(wasm_v128_load(w1));
v128_t v2 = __tolower8x16(wasm_v128_load(w2));
// Find any single bit difference.
if (wasm_v128_any_true(v1 ^ v2)) {
// The terminator may come before the difference.
break;
}
// We know all characters are equal.
// If any is a terminator the strings are equal.
if (!wasm_i8x16_all_true(v1)) {
return 0;
}
w1++;
w2++;
}
return __strcasecmp_s((char *)w1, (char *)w2);
}
__attribute__((weak))
int strcasecmp(const char *s1, const char *s2) {
// Skip the vector search when comparing against small literal strings.
if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) {
return __strcasecmp_s(s1, s2);
}
return __strcasecmp(s1, s2);
}
__attribute__((weak))
int strncasecmp(const char *s1, const char *s2, size_t n) {
// How many bytes can be read before pointers go out of bounds.
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
(size_t)(s1 > s2 ? s1 : s2);
if (n > N) n = N;
// Unaligned loads handle the case where the strings
// have mismatching alignments.
const v128_t *w1 = (v128_t *)s1;
const v128_t *w2 = (v128_t *)s2;
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
v128_t v1 = __tolower8x16(wasm_v128_load(w1));
v128_t v2 = __tolower8x16(wasm_v128_load(w2));
// Find any single bit difference.
if (wasm_v128_any_true(v1 ^ v2)) {
// The terminator may come before the difference.
break;
}
// We know all characters are equal.
// If any is a terminator the strings are equal.
if (!wasm_i8x16_all_true(v1)) {
return 0;
}
w1++;
w2++;
}
// Scalar algorithm.
const unsigned char *u1 = (unsigned char *)w1;
const unsigned char *u2 = (unsigned char *)w2;
while (n--) {
int c1 = tolower(*u1);
int c2 = tolower(*u2);
if (c1 != c2) return c1 - c2;
if (c1 == 0) break;
u1++;
u2++;
}
return 0;
}
#endif // __wasm_simd128__
#ifdef __cplusplus