More memcmp.

This commit is contained in:
Nuno Cruces
2025-05-06 10:22:02 +01:00
parent 320b68e74f
commit d3973b23e3
12 changed files with 662 additions and 597 deletions

Binary file not shown.

View File

@@ -43,8 +43,8 @@ cd ~-
"$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \
-Wall -Wextra -Wno-unused-parameter -Wno-unused-function \
-o bcw2.wasm "build/main.c" \
-I"build" \
-o bcw2.wasm build/main.c \
-I"$ROOT/sqlite3/libc" -I"build" \
-mexec-model=reactor \
-msimd128 -mmutable-globals -mmultivalue \
-mbulk-memory -mreference-types \

Binary file not shown.

8
go.mod
View File

@@ -8,16 +8,16 @@ require (
github.com/ncruces/julianday v1.0.0
github.com/ncruces/sort v0.1.5
github.com/tetratelabs/wazero v1.9.0
golang.org/x/crypto v0.37.0
golang.org/x/sys v0.32.0
golang.org/x/crypto v0.38.0
golang.org/x/sys v0.33.0
)
require (
github.com/dchest/siphash v1.2.3 // ext/bloom
github.com/google/uuid v1.6.0 // ext/uuid
github.com/psanford/httpreadat v0.1.0 // example
golang.org/x/sync v0.13.0 // test
golang.org/x/text v0.24.0 // ext/unicode
golang.org/x/sync v0.14.0 // test
golang.org/x/text v0.25.0 // ext/unicode
lukechampine.com/adiantum v1.1.1 // vfs/adiantum
)

16
go.sum
View File

@@ -10,13 +10,13 @@ github.com/psanford/httpreadat v0.1.0 h1:VleW1HS2zO7/4c7c7zNl33fO6oYACSagjJIyMIw
github.com/psanford/httpreadat v0.1.0/go.mod h1:Zg7P+TlBm3bYbyHTKv/EdtSJZn3qwbPwpfZ/I9GKCRE=
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
lukechampine.com/adiantum v1.1.1 h1:4fp6gTxWCqpEbLy40ExiYDDED3oUNWx5cTqBCtPdZqA=
lukechampine.com/adiantum v1.1.1/go.mod h1:LrAYVnTYLnUtE/yMp5bQr0HstAf060YUF8nM0B6+rUw=

View File

@@ -27,16 +27,24 @@ EOF
-Wl,--stack-first \
-Wl,--import-undefined \
-Wl,--initial-memory=16777216 \
-Wl,--export=memccpy \
-Wl,--export=memchr \
-Wl,--export=memcmp \
-Wl,--export=memcpy \
-Wl,--export=memmove \
-Wl,--export=memrchr \
-Wl,--export=memset \
-Wl,--export=stpcpy \
-Wl,--export=stpncpy \
-Wl,--export=strchr \
-Wl,--export=strchrnul \
-Wl,--export=strcmp \
-Wl,--export=strcpy \
-Wl,--export=strcspn \
-Wl,--export=strlen \
-Wl,--export=strncat \
-Wl,--export=strncmp \
-Wl,--export=strncpy \
-Wl,--export=strrchr \
-Wl,--export=strspn \
-Wl,--export=qsort

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -42,13 +42,25 @@ void *memmove(void *dest, const void *src, size_t n) {
__attribute__((weak))
int memcmp(const void *v1, const void *v2, size_t n) {
// Baseline algorithm.
if (n < sizeof(v128_t)) {
const unsigned char *u1 = (unsigned char *)v1;
const unsigned char *u2 = (unsigned char *)v2;
while (n--) {
if (*u1 != *u2) return *u1 - *u2;
u1++;
u2++;
}
return 0;
}
// memcmp is allowed to read up to n bytes from each object.
// Find the first different character in the objects.
// Unaligned loads handle the case where the objects
// have mismatching alignments.
const v128_t *w1 = (v128_t *)v1;
const v128_t *w2 = (v128_t *)v2;
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
while (n) {
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w1), wasm_v128_load(w2));
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(cmp)) {
@@ -60,17 +72,12 @@ int memcmp(const void *v1, const void *v2, size_t n) {
__builtin_assume(*u1 - *u2 != 0);
return *u1 - *u2;
}
w1++;
w2++;
}
// Baseline algorithm.
const unsigned char *u1 = (unsigned char *)w1;
const unsigned char *u2 = (unsigned char *)w2;
while (n--) {
if (*u1 != *u2) return *u1 - *u2;
u1++;
u2++;
// This makes n a multiple of sizeof(v128_t)
// for every iteration except the first.
size_t align = (n - 1) % sizeof(v128_t) + 1;
w1 = (v128_t *)((char *)w1 + align);
w2 = (v128_t *)((char *)w2 + align);
n -= align;
}
return 0;
}
@@ -359,29 +366,13 @@ size_t strspn(const char *s, const char *c) {
return s - a;
}
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
// Unoptimized version.
memset(byteset, 0, sizeof(byteset));
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
while (byteset[*(unsigned char *)s]) s++;
#else // __OPTIMIZE__
// This is faster than memset.
// Going backward helps bounds check elimination.
volatile v128_t *w = (v128_t *)byteset;
#pragma unroll
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
// Keeping byteset[0] = 0 avoids the next loop needing that check.
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
#pragma unroll 4
#if __OPTIMIZE__ && !__OPTIMIZE_SIZE__
#pragma unroll 4
#endif
while (byteset[*(unsigned char *)s]) s++;
#endif // __OPTIMIZE__
return s - a;
}
@@ -395,29 +386,13 @@ size_t strcspn(const char *s, const char *c) {
if (!c[0] || !c[1]) return __strchrnul(s, *c) - s;
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
// Unoptimized version.
memset(byteset, 0, sizeof(byteset));
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
while (!byteset[*(unsigned char *)s]) s++;
#else // __OPTIMIZE__
// This is faster than memset.
// Going backward helps bounds check elimination.
volatile v128_t *w = (v128_t *)byteset;
#pragma unroll
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
// Setting byteset[0] = 1 avoids the next loop needing that check.
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
#pragma unroll 4
#if __OPTIMIZE__ && !__OPTIMIZE_SIZE__
#pragma unroll 4
#endif
while (!byteset[*(unsigned char *)s]) s++;
#endif // __OPTIMIZE__
return s - a;
}
@@ -435,8 +410,9 @@ size_t strcspn(const char *s, const char *c) {
// - strsep
// - strtok
__attribute__((weak, always_inline))
__attribute__((weak))
void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n) {
void *memchr(const void *v, int c, size_t n);
const void *m = memchr(src, c, n);
if (m != NULL) {
n = (char *)m - (char *)src + 1;
@@ -446,15 +422,23 @@ void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n
return (void *)m;
}
__attribute__((weak, always_inline))
char *stpcpy(char *__restrict dest, const char *__restrict src) {
__attribute__((weak))
char *strncat(char *__restrict dest, const char *__restrict src, size_t n) {
size_t strnlen(const char *s, size_t n);
size_t dlen = strlen(dest);
size_t slen = strnlen(src, n);
memcpy(dest + dlen, src, slen);
dest[dlen + slen] = 0;
return dest;
}
static char *__stpcpy(char *__restrict dest, const char *__restrict src) {
size_t slen = strlen(src);
memcpy(dest, src, slen + 1);
return dest + slen;
}
__attribute__((weak, always_inline))
char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
static char *__stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
size_t strnlen(const char *s, size_t n);
size_t slen = strnlen(src, n);
memcpy(dest, src, slen);
@@ -463,24 +447,23 @@ char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
}
__attribute__((weak, always_inline))
char *stpcpy(char *__restrict dest, const char *__restrict src) {
return __stpcpy(dest, src);
}
char *strcpy(char *__restrict dest, const char *__restrict src) {
stpcpy(dest, src);
__stpcpy(dest, src);
return dest;
}
__attribute__((weak, always_inline))
char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
return __stpncpy(dest, src, n);
}
__attribute__((weak, always_inline))
char *strncpy(char *__restrict dest, const char *__restrict src, size_t n) {
stpncpy(dest, src, n);
return dest;
}
__attribute__((weak, always_inline))
char *strncat(char *__restrict dest, const char *__restrict src, size_t n) {
size_t strnlen(const char *s, size_t n);
size_t dlen = strlen(dest);
size_t slen = strnlen(src, n);
memcpy(dest + dlen, src, slen);
dest[dlen + slen] = 0;
__stpncpy(dest, src, n);
return dest;
}

View File

@@ -16,24 +16,34 @@ __attribute__((weak))
int bcmp(const void *v1, const void *v2, size_t n) {
// bcmp is the same as memcmp but only compares for equality.
// Baseline algorithm.
if (n < sizeof(v128_t)) {
const unsigned char *u1 = (unsigned char *)v1;
const unsigned char *u2 = (unsigned char *)v2;
while (n--) {
if (*u1 != *u2) return 1;
u1++;
u2++;
}
return 0;
}
// bcmp is allowed to read up to n bytes from each object.
// Unaligned loads handle the case where the objects
// have mismatching alignments.
const v128_t *w1 = (v128_t *)v1;
const v128_t *w2 = (v128_t *)v2;
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
while (n) {
// Find any single bit difference.
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
return 1;
}
w1++;
w2++;
}
// Continue byte-by-byte.
const unsigned char *u1 = (unsigned char *)w1;
const unsigned char *u2 = (unsigned char *)w2;
while (n--) {
if (*u1 != *u2) return 1;
u1++;
u2++;
// This makes n a multiple of sizeof(v128_t)
// for every iteration except the first.
size_t align = (n - 1) % sizeof(v128_t) + 1;
w1 = (v128_t *)((char *)w1 + align);
w2 = (v128_t *)((char *)w2 + align);
n -= align;
}
return 0;
}

Binary file not shown.