From f7658826700a88017b4c5330bff26d0a8053dcad Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Sat, 17 May 2025 13:20:11 +0100 Subject: [PATCH] Sunday's Quick Search. --- sqlite3/libc/libc.wasm | Bin 6398 -> 6394 bytes sqlite3/libc/libc.wat | 165 ++++++++++++++++++++--------------------- sqlite3/libc/string.h | 23 +++--- 3 files changed, 95 insertions(+), 93 deletions(-) diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index d3ea5f869fe6c8187abd3d51290cc6093327d27e..e9962648748b65a3557011bb65de7a59f477ea15 100755 GIT binary patch delta 333 zcmYk1J!%6%427kUc5E|jcDHt;9o(jHo60Kk#~OTyRGF!aQwBC&TEku>#Fxnx0y#kl zPK4QjtC2pv_oO*4zn5=2#LvSMK3w3}@KMFS5K_G1l@wKw;(9DViY%DoMCiT*u_tTQ znY7+)PZ~qcTBw^U=w7rJvi68wgt@-Bp8#%U;1{!kz&v`PjRleTXo$9vZ4+wGJE1hZ z){CAMbL0|25EEy8?n*E(=5{93g1FX`S;VKvmjJdj3CJwPsTr>T!~)X)V5)9p?u^mM n=#24+d0lX7y~Traj&OUrqZog=m%3>=2oHD7BUXp^W;8Qv? delta 337 zcmYk1OG*Pl5QeMzX=pN!bwFwww=S}EH3dG13OR%;yH_D@WpaeT+(U$1#!CpUMS?Zs zfLV3-$KT&y^}ReU-?xaLhZlS*@T>nQV`oS+-kO$78OYpB7a)@fQ#>QRnghv^xqOgl zwcd;wK}w!b)n(v4aW`mk^yK-7_YoGnQ$GPzD}1Li1HmF9`L)+i-1%(UsAs=9H3Tz_3ZV{v$^AN?;a8abr^ diff --git a/sqlite3/libc/libc.wat b/sqlite3/libc/libc.wat index 3df9e10..97d625a 100644 --- a/sqlite3/libc/libc.wat +++ b/sqlite3/libc/libc.wat @@ -2769,7 +2769,7 @@ (local.get $3) ) ) - (local.set $4 + (local.set $5 (call $memchr (local.get $0) (i32.load8_s @@ -2785,27 +2785,27 @@ ) (then (return - (local.get $4) + (local.get $5) ) ) ) (if (i32.eqz - (local.get $4) + (local.get $5) ) (then (return - (local.get $4) + (local.get $5) ) ) ) (br_if $block (i32.lt_u - (local.tee $8 + (local.tee $7 (i32.add (i32.sub (local.get $0) - (local.get $4) + (local.get $5) ) (local.get $1) ) @@ -2818,7 +2818,7 @@ (i32.and (i32.gt_u (i32.sub - (local.get $8) + (local.get $7) (local.get $3) ) (i32.const 15) @@ -2832,8 +2832,8 @@ (then (return (call $__memmem - (local.get $4) - (local.get $8) + (local.get $5) + (local.get $7) (local.get $2) (local.get $3) (i32.const 0) @@ -2845,15 +2845,10 @@ (i32.const 4112) (select (i32.const -1) - (local.tee $7 - (i32.sub - (local.get $3) - (i32.const 1) - ) - ) + (local.get $3) (local.tee $0 - (i32.gt_s - (local.get $7) + (i32.gt_u + (local.get $3) (i32.const 254) ) ) @@ -2863,31 +2858,33 @@ (block $block1 (br_if $block1 (i32.ge_u - (local.tee $1 + (local.tee $6 (select (i32.sub (local.get $3) - (i32.const 256) + (i32.const 255) ) (i32.const 0) (local.get $0) ) ) - (local.get $7) - ) - ) - (local.set $5 - (i32.sub - (i32.sub - (local.get $3) - (local.get $1) - ) - (i32.const 2) + (local.get $3) ) ) (if - (local.tee $6 + (local.tee $4 (i32.and + (i32.sub + (local.get $3) + (local.tee $1 + (local.get $6) + ) + ) + (i32.const 3) + ) + ) + (then + (local.set $0 (i32.add (i32.xor (local.get $1) @@ -2895,12 +2892,6 @@ ) (local.get $3) ) - (i32.const 3) - ) - ) - (then - (local.set $0 - (local.get $5) ) (loop $label (i32.store8 @@ -2928,9 +2919,9 @@ ) ) (br_if $label - (local.tee $6 + (local.tee $4 (i32.sub - (local.get $6) + (local.get $4) (i32.const 1) ) ) @@ -2939,35 +2930,61 @@ ) ) (br_if $block1 - (i32.lt_u - (local.get $5) - (i32.const 3) + (i32.gt_u + (i32.sub + (local.get $6) + (local.get $3) + ) + (i32.const -4) ) ) - (local.set $0 + (local.set $9 (i32.sub - (local.get $3) + (i32.const 0) (local.get $1) ) ) - (local.set $5 + (local.set $0 + (local.get $3) + ) + (local.set $6 (local.get $2) ) (loop $label1 (i32.store8 (i32.add (i32.load8_u - (local.tee $9 + (local.tee $4 (i32.add (local.get $1) - (local.get $5) + (local.get $6) ) ) ) (i32.const 4112) ) (i32.sub - (local.get $0) + (local.tee $8 + (i32.add + (local.get $0) + (local.get $9) + ) + ) + (i32.const 1) + ) + ) + (i32.store8 + (i32.add + (i32.load8_u + (i32.add + (local.get $4) + (i32.const 1) + ) + ) + (i32.const 4112) + ) + (i32.sub + (local.get $8) (i32.const 2) ) ) @@ -2975,14 +2992,14 @@ (i32.add (i32.load8_u (i32.add - (local.get $9) - (i32.const 1) + (local.get $4) + (i32.const 2) ) ) (i32.const 4112) ) (i32.sub - (local.get $0) + (local.get $8) (i32.const 3) ) ) @@ -2990,49 +3007,29 @@ (i32.add (i32.load8_u (i32.add - (local.get $9) - (i32.const 2) - ) - ) - (i32.const 4112) - ) - (local.tee $6 - (i32.sub - (local.get $0) - (i32.const 4) - ) - ) - ) - (i32.store8 - (i32.add - (i32.load8_u - (i32.add - (local.get $9) + (local.get $4) (i32.const 3) ) ) (i32.const 4112) ) (i32.sub - (local.get $0) - (i32.const 5) - ) - ) - (local.set $5 - (i32.add - (local.get $5) + (local.get $8) (i32.const 4) ) ) - (local.set $0 - (local.get $6) + (local.set $6 + (i32.add + (local.get $6) + (i32.const 4) + ) ) (br_if $label1 (i32.ne (local.get $1) - (local.tee $7 + (local.tee $0 (i32.sub - (local.get $7) + (local.get $0) (i32.const 4) ) ) @@ -3040,17 +3037,17 @@ ) ) ) - (local.set $6 + (local.set $4 (call $__memmem - (local.get $4) - (local.get $8) + (local.get $5) + (local.get $7) (local.get $2) (local.get $3) (i32.const 4112) ) ) ) - (local.get $6) + (local.get $4) ) (func $__memmem (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) (result i32) (local $5 i32) @@ -3150,7 +3147,7 @@ (local.set $15 (i32.add (local.get $3) - (i32.const 14) + (i32.const 15) ) ) (local.set $16 diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 55cc61d..98ce011 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -460,9 +460,15 @@ size_t strcspn(const char *s, const char *c) { // For haystacks of known length and large enough needles, // Boyer-Moore's bad-character rule may be useful, -// as proposed by Horspool and Raita. +// as proposed by Horspool, Sunday and Raita. +// +// We augment the SIMD algorithm with Quick Search's +// bad-character shift. This does NOT depend on the order +// in which the window matched. +// // https://www-igm.univ-mlv.fr/~lecroq/string/node14.html // https://www-igm.univ-mlv.fr/~lecroq/string/node18.html +// https://www-igm.univ-mlv.fr/~lecroq/string/node19.html // https://www-igm.univ-mlv.fr/~lecroq/string/node22.html static const char *__memmem(const char *haystk, size_t sh, @@ -511,9 +517,9 @@ static const char *__memmem(const char *haystk, size_t sh, // Have we reached the end of the haystack? if (!wasm_i8x16_all_true(blk_fst)) return NULL; } else { - // Apply the bad-character rule to the rightmost - // character of the window. - if (bmbc) skip += bmbc[(unsigned char)haystk[sn - 1 + 15]]; + // Apply the bad-character rule to the character to the right + // of the righmost character of the search window. + if (bmbc) skip += bmbc[(unsigned char)haystk[sn - 1 + sizeof(v128_t)]]; // Have we reached the end of the haystack? if (__builtin_sub_overflow(sh, skip, &sh)) return NULL; // Is the needle longer than the haystack? @@ -560,12 +566,11 @@ void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) { // Compute Boyer-Moore's bad-character shift function. // Only the last 255 characters of the needle matter for shifts up to 255, // which is good enough for most needles. - size_t n = sn - 1; + size_t c = sn; size_t i = 0; - int c = n; if (c >= 255) { + i = sn - 255; c = 255; - i = n - 255; } #ifndef _REENTRANT @@ -573,10 +578,10 @@ void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) { #endif uint8_t bmbc[256]; memset(bmbc, c, sizeof(bmbc)); - for (; i < n; i++) { + for (; i < sn; i++) { // One less than the usual offset because // we advance at least one vector at a time. - size_t t = n - i - 1; + size_t t = sn - i - 1; bmbc[(unsigned char)needle[i]] = t; }