diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 9373667..441da66 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -964,11 +964,11 @@ ) ) (v128.store - (i32.const 65520) + (i32.const 65568) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (v128.store - (i32.const 65504) + (i32.const 65552) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (local.set $1 @@ -988,7 +988,7 @@ ) (i32.const 28) ) - (i32.const 65504) + (i32.const 65552) ) ) (i32.or @@ -1046,7 +1046,7 @@ ) (i32.const 28) ) - (i32.const 65504) + (i32.const 65552) ) ) (local.get $3) @@ -1206,11 +1206,11 @@ ) ) (v128.store - (i32.const 65520) + (i32.const 65600) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (v128.store - (i32.const 65504) + (i32.const 65584) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ) (local.set $1 @@ -1230,7 +1230,7 @@ ) (i32.const 28) ) - (i32.const 65504) + (i32.const 65584) ) ) (i32.or @@ -1280,7 +1280,7 @@ ) (i32.const 28) ) - (i32.const 65504) + (i32.const 65584) ) ) (local.get $2) diff --git a/sqlite3/libc/stdlib.h b/sqlite3/libc/stdlib.h index ba91da6..ba465ba 100644 --- a/sqlite3/libc/stdlib.h +++ b/sqlite3/libc/stdlib.h @@ -12,8 +12,9 @@ extern "C" { // Shellsort with Gonnet & Baeza-Yates gap sequence. // Simple, no recursion, doesn't use the C stack. -// Clang auto-vectorizes the inner loop. +// Clang auto-vectorizes the inner loop. +__attribute__((weak)) void qsort(void *base, size_t nel, size_t width, int (*comp)(const void *, const void *)) { if (width == 0) return; diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index a785887..f224094 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -6,7 +6,7 @@ #include #include <__macro_PAGESIZE.h> -#include_next // the system string.h +#include_next // the system string.h #ifdef __cplusplus extern "C" { @@ -18,14 +18,17 @@ extern "C" { // Clang will intrinsify using SIMD for small, constant N. // For everything else, this helps inlining. +__attribute__((weak)) void *memset(void *dest, int c, size_t n) { return __builtin_memset(dest, c, n); } +__attribute__((weak)) void *memcpy(void *restrict dest, const void *restrict src, size_t n) { return __builtin_memcpy(dest, src, n); } +__attribute__((weak)) void *memmove(void *dest, const void *src, size_t n) { return __builtin_memmove(dest, src, n); } @@ -43,6 +46,7 @@ void *memmove(void *dest, const void *src, size_t n) { // These also assume unaligned access is not painfully slow, // but that bitmask extraction is slow on AArch64. +__attribute__((weak)) int memcmp(const void *v1, const void *v2, size_t n) { const v128_t *w1 = v1; const v128_t *w2 = v2; @@ -64,6 +68,7 @@ int memcmp(const void *v1, const void *v2, size_t n) { return 0; } +__attribute__((weak)) void *memchr(const void *v, int c, size_t n) { uintptr_t align = (uintptr_t)v % sizeof(v128_t); const v128_t *w = (void *)(v - align); @@ -86,6 +91,7 @@ void *memchr(const void *v, int c, size_t n) { } } +__attribute__((weak)) size_t strlen(const char *s) { uintptr_t align = (uintptr_t)s % sizeof(v128_t); const v128_t *w = (void *)(s - align); @@ -104,6 +110,7 @@ size_t strlen(const char *s) { } } +__attribute__((weak)) int strcmp(const char *s1, const char *s2) { const v128_t *const limit = (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; @@ -132,6 +139,7 @@ int strcmp(const char *s1, const char *s2) { return 0; } +__attribute__((weak)) int strncmp(const char *s1, const char *s2, size_t n) { const v128_t *const limit = (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; @@ -160,7 +168,8 @@ int strncmp(const char *s1, const char *s2, size_t n) { return 0; } -char *strchrnul(const char *s, int c) { +__attribute__((always_inline)) +static char *__strchrnul(const char *s, int c) { if (__builtin_constant_p(c) && (char)c == 0) { return (char *)s + strlen(s); } @@ -183,18 +192,34 @@ char *strchrnul(const char *s, int c) { } } +__attribute__((weak)) +char *strchrnul(const char *s, int c) { + return __strchrnul(s, c); +} + +__attribute__((weak)) char *strchr(const char *s, int c) { - char *r = strchrnul(s, c); + char *r = __strchrnul(s, c); return *(char *)r == (char)c ? r : NULL; } +#pragma push_macro("STATIC") #pragma push_macro("BITOP") +// Avoid using the C stack. +#ifndef _REENTRANT +#define STATIC static +#else +#define STATIC +#endif + #define BITOP(a, b, op) \ ((a)[(b) / (8 * sizeof(size_t))] op((size_t)1) \ << ((b) % (8 * sizeof(size_t)))) +__attribute__((weak)) size_t strspn(const char *s, const char *c) { + STATIC size_t byteset[32 / sizeof(size_t)]; const char *const a = s; if (!c[0]) return 0; @@ -216,25 +241,27 @@ size_t strspn(const char *s, const char *c) { return s - a; } - size_t byteset[32 / sizeof(size_t)] = {0}; - + memset(byteset, 0, sizeof(byteset)); for (; *c && BITOP(byteset, *(uint8_t *)c, |=); c++); for (; *s && BITOP(byteset, *(uint8_t *)s, &); s++); return s - a; } +__attribute__((weak)) size_t strcspn(const char *s, const char *c) { - if (!c[0] || !c[1]) return strchrnul(s, *c) - s; - + STATIC size_t byteset[32 / sizeof(size_t)]; const char *const a = s; - size_t byteset[32 / sizeof(size_t)] = {0}; + if (!c[0] || !c[1]) return __strchrnul(s, *c) - s; + + memset(byteset, 0, sizeof(byteset)); for (; *c && BITOP(byteset, *(uint8_t *)c, |=); c++); for (; *s && !BITOP(byteset, *(uint8_t *)s, &); s++); return s - a; } #pragma pop_macro("BITOP") +#pragma pop_macro("STATIC") #endif // __wasm_simd128__