diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index 68687cd..f376156 100755 Binary files a/sqlite3/libc/libc.wasm and b/sqlite3/libc/libc.wasm differ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 4354504..65e52cc 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -136,94 +136,128 @@ (i32.const 0) ) (func $memchr (param $0 i32) (param $1 i32) (param $2 i32) (result i32) - (local $3 v128) - (block $block2 - (block $block1 - (block $block - (if - (i32.ge_u - (local.get $2) - (i32.const 16) - ) - (then - (local.set $3 - (i8x16.splat - (local.get $1) - ) - ) - (loop $label - (br_if $block - (v128.any_true - (i8x16.eq - (v128.load align=1 - (local.get $0) - ) - (local.get $3) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 v128) + (local $7 v128) + (local.set $4 + (i32.and + (local.get $0) + (i32.const 15) + ) + ) + (block $block1 + (block $block + (if + (v128.any_true + (local.tee $6 + (i8x16.eq + (v128.load + (local.tee $3 + (i32.and + (local.get $0) + (i32.const -16) ) ) ) - (local.set $0 - (i32.add - (local.get $0) + (local.tee $7 + (i8x16.splat + (local.get $1) + ) + ) + ) + ) + ) + (then + (br_if $block + (local.tee $1 + (i32.and + (i8x16.bitmask + (local.get $6) + ) + (i32.shl + (i32.const -1) + (local.get $4) + ) + ) + ) + ) + ) + ) + (br_if $block1 + (i32.gt_u + (local.tee $1 + (i32.sub + (i32.add + (local.get $2) + (local.get $4) + ) + (i32.const 16) + ) + ) + (local.get $2) + ) + ) + (local.set $3 + (i32.add + (i32.sub + (local.get $0) + (local.get $4) + ) + (i32.const 16) + ) + ) + (block $block2 + (loop $label + (br_if $block2 + (v128.any_true + (local.tee $6 + (i8x16.eq + (v128.load + (local.get $3) + ) + (local.get $7) + ) + ) + ) + ) + (local.set $3 + (i32.add + (local.get $3) + (i32.const 16) + ) + ) + (br_if $label + (i32.ge_u + (local.get $1) + (local.tee $1 + (i32.sub + (local.get $1) (i32.const 16) ) ) - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) - ) - (i32.const 15) - ) - ) ) ) ) - (br_if $block1 - (i32.eqz - (local.get $2) - ) - ) + (br $block1) ) (local.set $1 - (i32.and - (local.get $1) - (i32.const 255) - ) - ) - (loop $label1 - (br_if $block2 - (i32.eq - (i32.load8_u - (local.get $0) - ) - (local.get $1) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 1) - ) - ) - (br_if $label1 - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 1) - ) - ) + (i8x16.bitmask + (local.get $6) ) ) ) - (local.set $0 - (i32.const 0) + (local.set $5 + (i32.add + (local.get $3) + (i32.ctz + (local.get $1) + ) + ) ) ) - (local.get $0) + (local.get $5) ) (func $strlen (param $0 i32) (result i32) (local $1 i32) @@ -309,123 +343,90 @@ (local $2 i32) (local $3 i32) (local $4 v128) - (local $5 v128) - (local.set $3 - (block $block (result i32) - (if - (i32.and - (i32.or - (local.get $0) - (local.get $1) - ) - (i32.const 15) - ) - (then - (local.set $2 - (i32.load8_u - (local.get $0) - ) - ) - (br $block - (i32.load8_u - (local.get $1) + (block $block + (br_if $block + (i32.lt_u + (local.tee $2 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) ) + (i32.const 16) ) ) + (local.get $0) ) - (if + ) + (br_if $block + (i32.gt_u + (local.get $1) + (local.get $2) + ) + ) + (loop $label + (br_if $block (v128.any_true (v128.xor - (local.tee $5 - (v128.load - (local.get $1) - ) + (v128.load align=1 + (local.get $1) ) (local.tee $4 - (v128.load + (v128.load align=1 (local.get $0) ) ) ) ) + ) + (if + (i32.eqz + (i8x16.all_true + (local.get $4) + ) + ) (then - (local.set $2 - (i8x16.extract_lane_u 0 - (local.get $4) - ) - ) - (br $block - (i8x16.extract_lane_u 0 - (local.get $5) - ) + (return + (i32.const 0) ) ) ) - (loop $label - (if - (i32.eqz - (i8x16.all_true - (local.get $4) - ) - ) - (then - (return - (i32.const 0) - ) - ) - ) - (local.set $4 - (v128.load offset=16 - (local.get $0) - ) - ) - (local.set $5 - (v128.load offset=16 - (local.get $1) - ) - ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) - ) - ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) - ) - (br_if $label - (i32.eqz - (v128.any_true - (v128.xor - (local.get $5) - (local.get $4) - ) - ) - ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) ) ) - (local.set $2 - (i8x16.extract_lane_u 0 - (local.get $4) + (br_if $block + (i32.gt_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (local.get $2) ) ) - (i8x16.extract_lane_u 0 - (local.get $5) + (br_if $label + (i32.le_u + (local.get $1) + (local.get $2) + ) ) ) ) (if (i32.eq - (i32.and - (local.get $2) - (i32.const 255) + (local.tee $2 + (i32.load8_u + (local.get $0) + ) ) - (i32.and - (local.get $3) - (i32.const 255) + (local.tee $3 + (i32.load8_u + (local.get $1) + ) ) ) (then @@ -441,16 +442,10 @@ (i32.const 1) ) ) - (local.set $2 - (local.get $3) - ) (loop $label1 (if (i32.eqz - (i32.and - (local.get $2) - (i32.const 255) - ) + (local.get $2) ) (then (return @@ -490,81 +485,95 @@ ) ) (i32.sub - (i32.and - (local.get $2) - (i32.const 255) - ) - (i32.and - (local.get $3) - (i32.const 255) - ) + (local.get $2) + (local.get $3) ) ) (func $strncmp (param $0 i32) (param $1 i32) (param $2 i32) (result i32) (local $3 i32) (local $4 i32) (local $5 v128) - (block $block - (if - (i32.ge_u - (local.get $2) - (i32.const 16) + (block $block1 + (block $block + (br_if $block + (i32.lt_u + (local.tee $3 + (i32.sub + (i32.shl + (memory.size) + (i32.const 16) + ) + (i32.const 16) + ) + ) + (local.get $0) + ) ) - (then - (loop $label - (br_if $block - (v128.any_true - (v128.xor + (loop $label + (br_if $block + (i32.gt_u + (local.get $1) + (local.get $3) + ) + ) + (br_if $block + (i32.lt_u + (local.get $2) + (i32.const 16) + ) + ) + (br_if $block1 + (v128.any_true + (v128.xor + (v128.load align=1 + (local.get $1) + ) + (local.tee $5 (v128.load align=1 - (local.get $1) - ) - (local.tee $5 - (v128.load align=1 - (local.get $0) - ) + (local.get $0) ) ) ) ) - (if - (i32.eqz - (i8x16.all_true - (local.get $5) - ) - ) - (then - (return - (i32.const 0) - ) + ) + (if + (i32.eqz + (i8x16.all_true + (local.get $5) ) ) - (local.set $1 - (i32.add - (local.get $1) - (i32.const 16) + (then + (return + (i32.const 0) ) ) - (local.set $0 - (i32.add - (local.get $0) - (i32.const 16) - ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 16) ) - (br_if $label - (i32.gt_u - (local.tee $2 - (i32.sub - (local.get $2) - (i32.const 16) - ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 16) + ) + ) + (br_if $label + (i32.le_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 16) ) - (i32.const 15) ) + (local.get $3) ) ) ) ) - (br_if $block + (br_if $block1 (local.get $2) ) (return diff --git a/sqlite3/sqlite_opt.h b/sqlite3/sqlite_opt.h index 0db37c3..73b1480 100644 --- a/sqlite3/sqlite_opt.h +++ b/sqlite3/sqlite_opt.h @@ -4,15 +4,15 @@ #define SQLITE_THREADSAFE 0 #define SQLITE_DEFAULT_WAL_SYNCHRONOUS 1 #define SQLITE_LIKE_DOESNT_MATCH_BLOBS -#define SQLITE_MAX_EXPR_DEPTH 0 #define SQLITE_STRICT_SUBTYPE 1 -#define SQLITE_USE_ALLOCA #define SQLITE_OMIT_DEPRECATED #define SQLITE_OMIT_SHARED_CACHE #define SQLITE_OMIT_AUTOINIT // We need these: // #define SQLITE_DEFAULT_MEMSTATUS 0 +// #define SQLITE_MAX_EXPR_DEPTH 0 +// #define SQLITE_USE_ALLOCA // #define SQLITE_OMIT_DECLTYPE // #define SQLITE_OMIT_PROGRESS_CALLBACK diff --git a/sqlite3/strings.c b/sqlite3/strings.c index 24cb9d7..cdf5389 100644 --- a/sqlite3/strings.c +++ b/sqlite3/strings.c @@ -1,6 +1,7 @@ #include #include #include +#include <__macro_PAGESIZE.h> #ifdef __wasm_bulk_memory__ @@ -42,23 +43,25 @@ int memcmp(const void *v1, const void *v2, size_t n) { } void *memchr(const void *v, int c, size_t n) { - c = (uint8_t)c; - + uintptr_t align = (uintptr_t)v % sizeof(v128_t); + const v128_t *w = (void *)(v - align); const v128_t wc = wasm_i8x16_splat(c); - const v128_t *w = (void *)v; - for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { - if (wasm_v128_any_true(wasm_i8x16_eq(wasm_v128_load(w), wc))) { - break; + + while (true) { + const v128_t cmp = wasm_i8x16_eq(*w, wc); + if (wasm_v128_any_true(cmp)) { + int mask = wasm_i8x16_bitmask(cmp) >> align << align; + __builtin_assume(mask || align); + if (mask) { + return (void *)w + __builtin_ctz(mask); + } } + if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) { + return NULL; + } + align = 0; w++; } - - const uint8_t *u = (void *)w; - while (n--) { - if (*u == c) return (void *)u; - u++; - } - return 0; } size_t strlen(const char *s) { @@ -80,32 +83,40 @@ size_t strlen(const char *s) { } int strcmp(const char *s1, const char *s2) { + const v128_t *const limit = + (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; + const v128_t *w1 = (void *)s1; const v128_t *w2 = (void *)s2; - if (((uintptr_t)s1 | (uintptr_t)s2) % sizeof(v128_t) == 0) { - while (!wasm_v128_any_true(*w1 ^ *w2)) { - if (!wasm_i8x16_all_true(*w1)) { - return 0; - } - w1++; - w2++; + while (w1 <= limit && w2 <= limit) { + if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { + break; } + if (!wasm_i8x16_all_true(wasm_v128_load(w1))) { + return 0; + } + w1++; + w2++; } const uint8_t *u1 = (void *)w1; const uint8_t *u2 = (void *)w2; while (true) { if (*u1 != *u2) return *u1 - *u2; - if (*u1 == 0) return 0; + if (*u1 == 0) break; u1++; u2++; } + return 0; } int strncmp(const char *s1, const char *s2, size_t n) { + const v128_t *const limit = + (v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1; + const v128_t *w1 = (void *)s1; const v128_t *w2 = (void *)s2; - for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) { + for (; w1 <= limit && w2 <= limit && n >= sizeof(v128_t); n -= sizeof(v128_t)) { if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) { break; } @@ -128,9 +139,7 @@ int strncmp(const char *s1, const char *s2, size_t n) { } char *strchrnul(const char *s, int c) { - c = (char)c; - - if (__builtin_constant_p(c) && c == 0) { + if (__builtin_constant_p(c) && (char)c == 0) { return (char *)s + strlen(s); } @@ -154,7 +163,7 @@ char *strchrnul(const char *s, int c) { char *strchr(const char *s, int c) { char *r = strchrnul(s, c); - return *(char *)r == (char)c ? r : 0; + return *(char *)r == (char)c ? r : NULL; } #endif diff --git a/sqlite3/vfs.c b/sqlite3/vfs.c index e69396e..e659f3b 100644 --- a/sqlite3/vfs.c +++ b/sqlite3/vfs.c @@ -137,9 +137,10 @@ sqlite3_vfs *sqlite3_vfs_find(const char *zVfsName) { // Create a new C wrapper. sqlite3_vfs *head = go_vfs_list; - go_vfs_list = malloc(sizeof(sqlite3_vfs) + strlen(zVfsName) + 1); + size_t vfsNameLen = strlen(zVfsName); + go_vfs_list = malloc(sizeof(sqlite3_vfs) + vfsNameLen + 1); char *name = (char *)(go_vfs_list + 1); - strcpy(name, zVfsName); + memcpy(name, zVfsName, vfsNameLen + 1); *go_vfs_list = (sqlite3_vfs){ .iVersion = 2, .szOsFile = sizeof(struct go_file),