mirror of
https://github.com/ncruces/go-sqlite3.git
synced 2026-01-11 21:49:13 +00:00
More memcmp.
This commit is contained in:
Binary file not shown.
@@ -43,8 +43,8 @@ cd ~-
|
||||
|
||||
"$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \
|
||||
-Wall -Wextra -Wno-unused-parameter -Wno-unused-function \
|
||||
-o bcw2.wasm "build/main.c" \
|
||||
-I"build" \
|
||||
-o bcw2.wasm build/main.c \
|
||||
-I"$ROOT/sqlite3/libc" -I"build" \
|
||||
-mexec-model=reactor \
|
||||
-msimd128 -mmutable-globals -mmultivalue \
|
||||
-mbulk-memory -mreference-types \
|
||||
|
||||
Binary file not shown.
8
go.mod
8
go.mod
@@ -8,16 +8,16 @@ require (
|
||||
github.com/ncruces/julianday v1.0.0
|
||||
github.com/ncruces/sort v0.1.5
|
||||
github.com/tetratelabs/wazero v1.9.0
|
||||
golang.org/x/crypto v0.37.0
|
||||
golang.org/x/sys v0.32.0
|
||||
golang.org/x/crypto v0.38.0
|
||||
golang.org/x/sys v0.33.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/dchest/siphash v1.2.3 // ext/bloom
|
||||
github.com/google/uuid v1.6.0 // ext/uuid
|
||||
github.com/psanford/httpreadat v0.1.0 // example
|
||||
golang.org/x/sync v0.13.0 // test
|
||||
golang.org/x/text v0.24.0 // ext/unicode
|
||||
golang.org/x/sync v0.14.0 // test
|
||||
golang.org/x/text v0.25.0 // ext/unicode
|
||||
lukechampine.com/adiantum v1.1.1 // vfs/adiantum
|
||||
)
|
||||
|
||||
|
||||
16
go.sum
16
go.sum
@@ -10,13 +10,13 @@ github.com/psanford/httpreadat v0.1.0 h1:VleW1HS2zO7/4c7c7zNl33fO6oYACSagjJIyMIw
|
||||
github.com/psanford/httpreadat v0.1.0/go.mod h1:Zg7P+TlBm3bYbyHTKv/EdtSJZn3qwbPwpfZ/I9GKCRE=
|
||||
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
|
||||
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
|
||||
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
|
||||
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
|
||||
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
|
||||
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
|
||||
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
|
||||
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
|
||||
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
|
||||
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
|
||||
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
|
||||
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
|
||||
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
|
||||
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
|
||||
lukechampine.com/adiantum v1.1.1 h1:4fp6gTxWCqpEbLy40ExiYDDED3oUNWx5cTqBCtPdZqA=
|
||||
lukechampine.com/adiantum v1.1.1/go.mod h1:LrAYVnTYLnUtE/yMp5bQr0HstAf060YUF8nM0B6+rUw=
|
||||
|
||||
@@ -27,16 +27,24 @@ EOF
|
||||
-Wl,--stack-first \
|
||||
-Wl,--import-undefined \
|
||||
-Wl,--initial-memory=16777216 \
|
||||
-Wl,--export=memccpy \
|
||||
-Wl,--export=memchr \
|
||||
-Wl,--export=memcmp \
|
||||
-Wl,--export=memcpy \
|
||||
-Wl,--export=memmove \
|
||||
-Wl,--export=memrchr \
|
||||
-Wl,--export=memset \
|
||||
-Wl,--export=stpcpy \
|
||||
-Wl,--export=stpncpy \
|
||||
-Wl,--export=strchr \
|
||||
-Wl,--export=strchrnul \
|
||||
-Wl,--export=strcmp \
|
||||
-Wl,--export=strcpy \
|
||||
-Wl,--export=strcspn \
|
||||
-Wl,--export=strlen \
|
||||
-Wl,--export=strncat \
|
||||
-Wl,--export=strncmp \
|
||||
-Wl,--export=strncpy \
|
||||
-Wl,--export=strrchr \
|
||||
-Wl,--export=strspn \
|
||||
-Wl,--export=qsort
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -42,13 +42,25 @@ void *memmove(void *dest, const void *src, size_t n) {
|
||||
|
||||
__attribute__((weak))
|
||||
int memcmp(const void *v1, const void *v2, size_t n) {
|
||||
// Baseline algorithm.
|
||||
if (n < sizeof(v128_t)) {
|
||||
const unsigned char *u1 = (unsigned char *)v1;
|
||||
const unsigned char *u2 = (unsigned char *)v2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// memcmp is allowed to read up to n bytes from each object.
|
||||
// Find the first different character in the objects.
|
||||
// Unaligned loads handle the case where the objects
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)v1;
|
||||
const v128_t *w2 = (v128_t *)v2;
|
||||
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
while (n) {
|
||||
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w1), wasm_v128_load(w2));
|
||||
// Bitmask is slow on AArch64, all_true is much faster.
|
||||
if (!wasm_i8x16_all_true(cmp)) {
|
||||
@@ -60,17 +72,12 @@ int memcmp(const void *v1, const void *v2, size_t n) {
|
||||
__builtin_assume(*u1 - *u2 != 0);
|
||||
return *u1 - *u2;
|
||||
}
|
||||
w1++;
|
||||
w2++;
|
||||
}
|
||||
|
||||
// Baseline algorithm.
|
||||
const unsigned char *u1 = (unsigned char *)w1;
|
||||
const unsigned char *u2 = (unsigned char *)w2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
u1++;
|
||||
u2++;
|
||||
// This makes n a multiple of sizeof(v128_t)
|
||||
// for every iteration except the first.
|
||||
size_t align = (n - 1) % sizeof(v128_t) + 1;
|
||||
w1 = (v128_t *)((char *)w1 + align);
|
||||
w2 = (v128_t *)((char *)w2 + align);
|
||||
n -= align;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -359,29 +366,13 @@ size_t strspn(const char *s, const char *c) {
|
||||
return s - a;
|
||||
}
|
||||
|
||||
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
|
||||
|
||||
// Unoptimized version.
|
||||
memset(byteset, 0, sizeof(byteset));
|
||||
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
|
||||
while (byteset[*(unsigned char *)s]) s++;
|
||||
|
||||
#else // __OPTIMIZE__
|
||||
|
||||
// This is faster than memset.
|
||||
// Going backward helps bounds check elimination.
|
||||
volatile v128_t *w = (v128_t *)byteset;
|
||||
#pragma unroll
|
||||
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
|
||||
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
|
||||
|
||||
// Keeping byteset[0] = 0 avoids the next loop needing that check.
|
||||
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
|
||||
#pragma unroll 4
|
||||
#if __OPTIMIZE__ && !__OPTIMIZE_SIZE__
|
||||
#pragma unroll 4
|
||||
#endif
|
||||
while (byteset[*(unsigned char *)s]) s++;
|
||||
|
||||
#endif // __OPTIMIZE__
|
||||
|
||||
return s - a;
|
||||
}
|
||||
|
||||
@@ -395,29 +386,13 @@ size_t strcspn(const char *s, const char *c) {
|
||||
|
||||
if (!c[0] || !c[1]) return __strchrnul(s, *c) - s;
|
||||
|
||||
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
|
||||
|
||||
// Unoptimized version.
|
||||
memset(byteset, 0, sizeof(byteset));
|
||||
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
|
||||
while (!byteset[*(unsigned char *)s]) s++;
|
||||
|
||||
#else // __OPTIMIZE__
|
||||
|
||||
// This is faster than memset.
|
||||
// Going backward helps bounds check elimination.
|
||||
volatile v128_t *w = (v128_t *)byteset;
|
||||
#pragma unroll
|
||||
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
|
||||
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
|
||||
|
||||
// Setting byteset[0] = 1 avoids the next loop needing that check.
|
||||
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
|
||||
#pragma unroll 4
|
||||
#if __OPTIMIZE__ && !__OPTIMIZE_SIZE__
|
||||
#pragma unroll 4
|
||||
#endif
|
||||
while (!byteset[*(unsigned char *)s]) s++;
|
||||
|
||||
#endif // __OPTIMIZE__
|
||||
|
||||
return s - a;
|
||||
}
|
||||
|
||||
@@ -435,8 +410,9 @@ size_t strcspn(const char *s, const char *c) {
|
||||
// - strsep
|
||||
// - strtok
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
__attribute__((weak))
|
||||
void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n) {
|
||||
void *memchr(const void *v, int c, size_t n);
|
||||
const void *m = memchr(src, c, n);
|
||||
if (m != NULL) {
|
||||
n = (char *)m - (char *)src + 1;
|
||||
@@ -446,15 +422,23 @@ void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n
|
||||
return (void *)m;
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *stpcpy(char *__restrict dest, const char *__restrict src) {
|
||||
__attribute__((weak))
|
||||
char *strncat(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
size_t strnlen(const char *s, size_t n);
|
||||
size_t dlen = strlen(dest);
|
||||
size_t slen = strnlen(src, n);
|
||||
memcpy(dest + dlen, src, slen);
|
||||
dest[dlen + slen] = 0;
|
||||
return dest;
|
||||
}
|
||||
|
||||
static char *__stpcpy(char *__restrict dest, const char *__restrict src) {
|
||||
size_t slen = strlen(src);
|
||||
memcpy(dest, src, slen + 1);
|
||||
return dest + slen;
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
static char *__stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
size_t strnlen(const char *s, size_t n);
|
||||
size_t slen = strnlen(src, n);
|
||||
memcpy(dest, src, slen);
|
||||
@@ -463,24 +447,23 @@ char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *stpcpy(char *__restrict dest, const char *__restrict src) {
|
||||
return __stpcpy(dest, src);
|
||||
}
|
||||
|
||||
char *strcpy(char *__restrict dest, const char *__restrict src) {
|
||||
stpcpy(dest, src);
|
||||
__stpcpy(dest, src);
|
||||
return dest;
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
return __stpncpy(dest, src, n);
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *strncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
stpncpy(dest, src, n);
|
||||
return dest;
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *strncat(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
size_t strnlen(const char *s, size_t n);
|
||||
size_t dlen = strlen(dest);
|
||||
size_t slen = strnlen(src, n);
|
||||
memcpy(dest + dlen, src, slen);
|
||||
dest[dlen + slen] = 0;
|
||||
__stpncpy(dest, src, n);
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,24 +16,34 @@ __attribute__((weak))
|
||||
int bcmp(const void *v1, const void *v2, size_t n) {
|
||||
// bcmp is the same as memcmp but only compares for equality.
|
||||
|
||||
// Baseline algorithm.
|
||||
if (n < sizeof(v128_t)) {
|
||||
const unsigned char *u1 = (unsigned char *)v1;
|
||||
const unsigned char *u2 = (unsigned char *)v2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return 1;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// bcmp is allowed to read up to n bytes from each object.
|
||||
// Unaligned loads handle the case where the objects
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)v1;
|
||||
const v128_t *w2 = (v128_t *)v2;
|
||||
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
while (n) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
return 1;
|
||||
}
|
||||
w1++;
|
||||
w2++;
|
||||
}
|
||||
|
||||
// Continue byte-by-byte.
|
||||
const unsigned char *u1 = (unsigned char *)w1;
|
||||
const unsigned char *u2 = (unsigned char *)w2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return 1;
|
||||
u1++;
|
||||
u2++;
|
||||
// This makes n a multiple of sizeof(v128_t)
|
||||
// for every iteration except the first.
|
||||
size_t align = (n - 1) % sizeof(v128_t) + 1;
|
||||
w1 = (v128_t *)((char *)w1 + align);
|
||||
w2 = (v128_t *)((char *)w2 + align);
|
||||
n -= align;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user