mirror of
https://github.com/ncruces/go-sqlite3.git
synced 2026-01-20 01:24:15 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c780ef16e2 | ||
|
|
b609930142 | ||
|
|
fd165ce724 | ||
|
|
d3973b23e3 | ||
|
|
320b68e74f | ||
|
|
2c3850e5d1 | ||
|
|
db7aacff9f | ||
|
|
d748d98e39 | ||
|
|
13b8642384 | ||
|
|
29c5c816cb |
@@ -1,6 +1,6 @@
|
||||
# Embeddable Wasm build of SQLite
|
||||
|
||||
This folder includes an embeddable Wasm build of SQLite 3.49.1 for use with
|
||||
This folder includes an embeddable Wasm build of SQLite 3.49.2 for use with
|
||||
[`github.com/ncruces/go-sqlite3`](https://pkg.go.dev/github.com/ncruces/go-sqlite3).
|
||||
|
||||
The following optional features are compiled in:
|
||||
|
||||
Binary file not shown.
@@ -13,8 +13,8 @@ mkdir -p build/ext/
|
||||
cp "$ROOT"/sqlite3/*.[ch] build/
|
||||
cp "$ROOT"/sqlite3/*.patch build/
|
||||
|
||||
# https://sqlite.org/src/info/3215186aa9204149
|
||||
curl -# https://sqlite.org/src/tarball/sqlite.tar.gz?r=3215186a | tar xz
|
||||
# https://sqlite.org/src/info/9d6517e7cc8bf175
|
||||
curl -# https://sqlite.org/src/tarball/sqlite.tar.gz?r=9d6517e7 | tar xz
|
||||
|
||||
cd sqlite
|
||||
if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
|
||||
@@ -43,8 +43,8 @@ cd ~-
|
||||
|
||||
"$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \
|
||||
-Wall -Wextra -Wno-unused-parameter -Wno-unused-function \
|
||||
-o bcw2.wasm "build/main.c" \
|
||||
-I"build" \
|
||||
-o bcw2.wasm build/main.c \
|
||||
-I"$ROOT/sqlite3/libc" -I"build" \
|
||||
-mexec-model=reactor \
|
||||
-msimd128 -mmutable-globals -mmultivalue \
|
||||
-mbulk-memory -mreference-types \
|
||||
|
||||
@@ -19,7 +19,7 @@ func Test_init(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if version != "3.49.1" {
|
||||
if version != "3.49.2" {
|
||||
t.Error(version)
|
||||
}
|
||||
}
|
||||
|
||||
Binary file not shown.
8
go.mod
8
go.mod
@@ -8,16 +8,16 @@ require (
|
||||
github.com/ncruces/julianday v1.0.0
|
||||
github.com/ncruces/sort v0.1.5
|
||||
github.com/tetratelabs/wazero v1.9.0
|
||||
golang.org/x/crypto v0.37.0
|
||||
golang.org/x/sys v0.32.0
|
||||
golang.org/x/crypto v0.38.0
|
||||
golang.org/x/sys v0.33.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/dchest/siphash v1.2.3 // ext/bloom
|
||||
github.com/google/uuid v1.6.0 // ext/uuid
|
||||
github.com/psanford/httpreadat v0.1.0 // example
|
||||
golang.org/x/sync v0.13.0 // test
|
||||
golang.org/x/text v0.24.0 // ext/unicode
|
||||
golang.org/x/sync v0.14.0 // test
|
||||
golang.org/x/text v0.25.0 // ext/unicode
|
||||
lukechampine.com/adiantum v1.1.1 // vfs/adiantum
|
||||
)
|
||||
|
||||
|
||||
16
go.sum
16
go.sum
@@ -10,13 +10,13 @@ github.com/psanford/httpreadat v0.1.0 h1:VleW1HS2zO7/4c7c7zNl33fO6oYACSagjJIyMIw
|
||||
github.com/psanford/httpreadat v0.1.0/go.mod h1:Zg7P+TlBm3bYbyHTKv/EdtSJZn3qwbPwpfZ/I9GKCRE=
|
||||
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
|
||||
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
|
||||
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
|
||||
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
|
||||
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
|
||||
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
|
||||
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
|
||||
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
|
||||
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
|
||||
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
|
||||
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
|
||||
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
|
||||
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
|
||||
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
|
||||
lukechampine.com/adiantum v1.1.1 h1:4fp6gTxWCqpEbLy40ExiYDDED3oUNWx5cTqBCtPdZqA=
|
||||
lukechampine.com/adiantum v1.1.1/go.mod h1:LrAYVnTYLnUtE/yMp5bQr0HstAf060YUF8nM0B6+rUw=
|
||||
|
||||
@@ -3,7 +3,7 @@ set -euo pipefail
|
||||
|
||||
cd -P -- "$(dirname -- "$0")"
|
||||
|
||||
curl -#OL "https://sqlite.org/2025/sqlite-amalgamation-3490100.zip"
|
||||
curl -#OL "https://sqlite.org/2025/sqlite-amalgamation-3490200.zip"
|
||||
unzip -d . sqlite-amalgamation-*.zip
|
||||
mv sqlite-amalgamation-*/sqlite3.c .
|
||||
mv sqlite-amalgamation-*/sqlite3.h .
|
||||
@@ -19,30 +19,30 @@ rm -rf sqlite-amalgamation-*
|
||||
|
||||
mkdir -p ext/
|
||||
cd ext/
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/anycollseq.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/base64.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/decimal.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/ieee754.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/regexp.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/series.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/spellfix.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/uint.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/anycollseq.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/base64.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/decimal.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/ieee754.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/regexp.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/series.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/spellfix.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/uint.c"
|
||||
cd ~-
|
||||
|
||||
cd ../vfs/tests/mptest/testdata/
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/config01.test"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/config02.test"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/crash01.test"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/crash02.subtest"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/multiwrite01.test"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/config01.test"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/config02.test"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/crash01.test"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/crash02.subtest"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/multiwrite01.test"
|
||||
cd ~-
|
||||
|
||||
cd ../vfs/tests/mptest/wasm/
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/mptest.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/mptest.c"
|
||||
cd ~-
|
||||
|
||||
cd ../vfs/tests/speedtest1/wasm/
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/test/speedtest1.c"
|
||||
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/test/speedtest1.c"
|
||||
cd ~-
|
||||
|
||||
cat *.patch | patch -p0 --no-backup-if-mismatch
|
||||
@@ -27,16 +27,25 @@ EOF
|
||||
-Wl,--stack-first \
|
||||
-Wl,--import-undefined \
|
||||
-Wl,--initial-memory=16777216 \
|
||||
-Wl,--export=memccpy \
|
||||
-Wl,--export=memchr \
|
||||
-Wl,--export=memcmp \
|
||||
-Wl,--export=memcpy \
|
||||
-Wl,--export=memmove \
|
||||
-Wl,--export=memrchr \
|
||||
-Wl,--export=memset \
|
||||
-Wl,--export=stpcpy \
|
||||
-Wl,--export=stpncpy \
|
||||
-Wl,--export=strchr \
|
||||
-Wl,--export=strchrnul \
|
||||
-Wl,--export=strcmp \
|
||||
-Wl,--export=strcpy \
|
||||
-Wl,--export=strcspn \
|
||||
-Wl,--export=strlen \
|
||||
-Wl,--export=strncat \
|
||||
-Wl,--export=strncmp \
|
||||
-Wl,--export=strncpy \
|
||||
-Wl,--export=strrchr \
|
||||
-Wl,--export=strspn \
|
||||
-Wl,--export=qsort
|
||||
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
_ "embed"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/tetratelabs/wazero"
|
||||
@@ -31,6 +32,7 @@ var (
|
||||
strchr api.Function
|
||||
strcmp api.Function
|
||||
strspn api.Function
|
||||
strrchr api.Function
|
||||
strncmp api.Function
|
||||
strcspn api.Function
|
||||
stack [8]uint64
|
||||
@@ -63,6 +65,7 @@ func TestMain(m *testing.M) {
|
||||
strchr = mod.ExportedFunction("strchr")
|
||||
strcmp = mod.ExportedFunction("strcmp")
|
||||
strspn = mod.ExportedFunction("strspn")
|
||||
strrchr = mod.ExportedFunction("strrchr")
|
||||
strncmp = mod.ExportedFunction("strncmp")
|
||||
strcspn = mod.ExportedFunction("strcspn")
|
||||
memory, _ = mod.Memory().Read(0, mod.Memory().Size())
|
||||
@@ -139,6 +142,18 @@ func Benchmark_strchr(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strrchr(b *testing.B) {
|
||||
clear(memory)
|
||||
fill(memory[ptr1:ptr1+size/2], 5)
|
||||
fill(memory[ptr1+size/2:ptr1+size-1], 7)
|
||||
|
||||
b.SetBytes(size/2 + 1)
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(strrchr, ptr1, 5)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strcmp(b *testing.B) {
|
||||
clear(memory)
|
||||
fill(memory[ptr1:ptr1+size-1], 7)
|
||||
@@ -195,43 +210,117 @@ func Benchmark_strcspn(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func Test_memchr(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for pos := range length + 2 {
|
||||
for alignment := range 24 {
|
||||
clear(memory[:2*page])
|
||||
func Test_memcmp(t *testing.T) {
|
||||
const s1 string = "" +
|
||||
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
|
||||
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
|
||||
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
|
||||
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
|
||||
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
|
||||
"\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
|
||||
"\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
|
||||
"\x7f\xf3\x93\x01\x00\x01"
|
||||
const s2 string = "" +
|
||||
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
|
||||
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
|
||||
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
|
||||
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
|
||||
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
|
||||
"\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
|
||||
"\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
|
||||
"\x80\xf3\x93\x01\x00\x02"
|
||||
|
||||
ptr := (page - 8) + alignment
|
||||
fill(memory[ptr:ptr+max(pos, length)], 5)
|
||||
memory[ptr+pos] = 7
|
||||
p1 := ptr1
|
||||
p2 := len(memory) - len(s2)
|
||||
|
||||
want := 0
|
||||
if pos < length {
|
||||
want = ptr + pos
|
||||
}
|
||||
clear(memory)
|
||||
copy(memory[p1:], s1)
|
||||
copy(memory[p2:], s2)
|
||||
|
||||
got := call(memchr, uint64(ptr), 7, uint64(length))
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
|
||||
ptr, 7, uint64(length), uint32(got), uint32(want))
|
||||
}
|
||||
for i := range len(s1) + 1 {
|
||||
for j := range len(s1) - i {
|
||||
want := strings.Compare(s1[i:i+j], s2[i:i+j])
|
||||
got := call(memcmp, uint64(p1+i), uint64(p2+i), uint64(j))
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strcmp(%d, %d, %d) = %d, want %d",
|
||||
ptr1+i, ptr2+i, j, int32(got), want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clear(memory)
|
||||
ptr := len(memory) - length
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
memory[len(memory)-1] = 7
|
||||
func Test_strcmp(t *testing.T) {
|
||||
const s1 string = "" +
|
||||
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
|
||||
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
|
||||
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
|
||||
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
|
||||
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
|
||||
"\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
|
||||
"\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
|
||||
"\x7f\xf3\x93\x01\x00\x01"
|
||||
const s2 string = "" +
|
||||
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
|
||||
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
|
||||
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
|
||||
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
|
||||
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
|
||||
"\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
|
||||
"\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
|
||||
"\x80\xf3\x93\x01\x00\x02"
|
||||
|
||||
want := len(memory) - 1
|
||||
if length == 0 {
|
||||
want = 0
|
||||
p1 := ptr1
|
||||
p2 := len(memory) - len(s2) - 1
|
||||
|
||||
clear(memory)
|
||||
copy(memory[p1:], s1)
|
||||
copy(memory[p2:], s2)
|
||||
|
||||
for i := range len(s1) + 1 {
|
||||
want := strings.Compare(term(s1[i:]), term(s2[i:]))
|
||||
got := call(strcmp, uint64(p1+i), uint64(p2+i))
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strcmp(%d, %d) = %d, want %d",
|
||||
p1+i, ptr2+i, int32(got), want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
got := call(memchr, uint64(ptr), 7, uint64(length))
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
|
||||
ptr, 7, uint64(length), uint32(got), uint32(want))
|
||||
func Test_strncmp(t *testing.T) {
|
||||
const s1 string = "" +
|
||||
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
|
||||
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
|
||||
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
|
||||
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
|
||||
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
|
||||
"\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
|
||||
"\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
|
||||
"\x7f\xf3\x93\x01\x00\x01"
|
||||
const s2 string = "" +
|
||||
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
|
||||
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
|
||||
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
|
||||
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
|
||||
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
|
||||
"\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
|
||||
"\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
|
||||
"\x80\xf3\x93\x01\x00\x02"
|
||||
|
||||
p1 := ptr1
|
||||
p2 := len(memory) - len(s2) - 1
|
||||
|
||||
clear(memory)
|
||||
copy(memory[p1:], s1)
|
||||
copy(memory[p2:], s2)
|
||||
|
||||
for i := range len(s1) + 1 {
|
||||
for j := range len(s1) - i + 1 {
|
||||
want := strings.Compare(term(s1[i:i+j]), term(s2[i:i+j]))
|
||||
got := call(strncmp, uint64(p1+i), uint64(p2+i), uint64(j))
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strncmp(%d, %d, %d) = %d, want %d",
|
||||
ptr1+i, ptr2+i, j, int32(got), want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -239,9 +328,9 @@ func Test_memchr(t *testing.T) {
|
||||
func Test_strlen(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for alignment := range 24 {
|
||||
clear(memory[:2*page])
|
||||
|
||||
ptr := (page - 8) + alignment
|
||||
|
||||
clear(memory[:2*page])
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
|
||||
got := call(strlen, uint64(ptr))
|
||||
@@ -270,22 +359,62 @@ func Test_strlen(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strchr(t *testing.T) {
|
||||
func Test_memchr(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for pos := range length + 2 {
|
||||
for alignment := range 24 {
|
||||
clear(memory[:2*page])
|
||||
|
||||
ptr := (page - 8) + alignment
|
||||
fill(memory[ptr:ptr+max(pos, length)], 5)
|
||||
memory[ptr+pos] = 7
|
||||
memory[ptr+length] = 0
|
||||
|
||||
want := 0
|
||||
if pos < length {
|
||||
want = ptr + pos
|
||||
}
|
||||
|
||||
clear(memory[:2*page])
|
||||
fill(memory[ptr:ptr+max(pos, length)], 5)
|
||||
memory[ptr+pos] = 7
|
||||
|
||||
got := call(memchr, uint64(ptr), 7, uint64(length))
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
|
||||
ptr, 7, uint64(length), uint32(got), uint32(want))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clear(memory)
|
||||
ptr := len(memory) - length
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
memory[len(memory)-1] = 7
|
||||
|
||||
want := len(memory) - 1
|
||||
if length == 0 {
|
||||
want = 0
|
||||
}
|
||||
|
||||
got := call(memchr, uint64(ptr), 7, uint64(length))
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
|
||||
ptr, 7, uint64(length), uint32(got), uint32(want))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strchr(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for pos := range length + 2 {
|
||||
for alignment := range 24 {
|
||||
ptr := (page - 8) + alignment
|
||||
want := 0
|
||||
if pos < length {
|
||||
want = ptr + pos
|
||||
}
|
||||
|
||||
clear(memory[:2*page])
|
||||
fill(memory[ptr:ptr+max(pos, length)], 5)
|
||||
memory[ptr+pos] = 7
|
||||
memory[ptr+pos+1] = 7
|
||||
memory[ptr+length] = 0
|
||||
|
||||
got := call(strchr, uint64(ptr), 7)
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strchr(%d, %d) = %d, want %d",
|
||||
@@ -312,21 +441,66 @@ func Test_strchr(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strrchr(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for pos := range length + 2 {
|
||||
for alignment := range 24 {
|
||||
ptr := (page - 8) + alignment
|
||||
want := 0
|
||||
if pos < length {
|
||||
want = ptr + pos
|
||||
} else if length > 0 {
|
||||
want = ptr
|
||||
}
|
||||
|
||||
clear(memory[:2*page])
|
||||
fill(memory[ptr:ptr+max(pos, length)], 5)
|
||||
memory[ptr] = 7
|
||||
memory[ptr+pos] = 7
|
||||
memory[ptr+length] = 0
|
||||
|
||||
got := call(strrchr, uint64(ptr), 7)
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strrchr(%d, %d) = %d, want %d",
|
||||
ptr, 7, uint32(got), uint32(want))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ptr := len(memory) - length
|
||||
want := len(memory) - 2
|
||||
if length <= 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
clear(memory)
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
memory[ptr] = 7
|
||||
memory[len(memory)-2] = 7
|
||||
memory[len(memory)-1] = 0
|
||||
|
||||
got := call(strrchr, uint64(ptr), 7)
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strrchr(%d, %d) = %d, want %d",
|
||||
ptr, 7, uint32(got), uint32(want))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strspn(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for pos := range length + 2 {
|
||||
for alignment := range 24 {
|
||||
clear(memory[:2*page])
|
||||
|
||||
ptr := (page - 8) + alignment
|
||||
want := min(pos, length)
|
||||
|
||||
clear(memory[:2*page])
|
||||
fill(memory[ptr:ptr+max(pos, length)], 5)
|
||||
memory[ptr+pos] = 7
|
||||
memory[ptr+length] = 0
|
||||
memory[128] = 3
|
||||
memory[129] = 5
|
||||
|
||||
want := min(pos, length)
|
||||
|
||||
got := call(strspn, uint64(ptr), 129)
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strspn(%d, %d) = %d, want %d",
|
||||
@@ -341,18 +515,18 @@ func Test_strspn(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
clear(memory)
|
||||
ptr := len(memory) - length
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
memory[len(memory)-1] = 7
|
||||
memory[128] = 3
|
||||
memory[129] = 5
|
||||
|
||||
want := length - 1
|
||||
if length == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
clear(memory)
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
memory[len(memory)-1] = 7
|
||||
memory[128] = 3
|
||||
memory[129] = 5
|
||||
|
||||
got := call(strspn, uint64(ptr), 129)
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strspn(%d, %d) = %d, want %d",
|
||||
@@ -371,17 +545,16 @@ func Test_strcspn(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for pos := range length + 2 {
|
||||
for alignment := range 24 {
|
||||
clear(memory[:2*page])
|
||||
|
||||
ptr := (page - 8) + alignment
|
||||
want := min(pos, length)
|
||||
|
||||
clear(memory[:2*page])
|
||||
fill(memory[ptr:ptr+max(pos, length)], 5)
|
||||
memory[ptr+pos] = 7
|
||||
memory[ptr+length] = 0
|
||||
memory[128] = 3
|
||||
memory[129] = 7
|
||||
|
||||
want := min(pos, length)
|
||||
|
||||
got := call(strcspn, uint64(ptr), 129)
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strcspn(%d, %d) = %d, want %d",
|
||||
@@ -396,18 +569,18 @@ func Test_strcspn(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
clear(memory)
|
||||
ptr := len(memory) - length
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
memory[len(memory)-1] = 7
|
||||
memory[128] = 3
|
||||
memory[129] = 7
|
||||
|
||||
want := length - 1
|
||||
if length == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
clear(memory)
|
||||
fill(memory[ptr:ptr+length], 5)
|
||||
memory[len(memory)-1] = 7
|
||||
memory[128] = 3
|
||||
memory[129] = 7
|
||||
|
||||
got := call(strcspn, uint64(ptr), 129)
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strcspn(%d, %d) = %d, want %d",
|
||||
@@ -427,3 +600,21 @@ func fill(s []byte, v byte) {
|
||||
s[i] = v
|
||||
}
|
||||
}
|
||||
|
||||
func sign(x int32) int {
|
||||
switch {
|
||||
case x > 0:
|
||||
return +1
|
||||
case x < 0:
|
||||
return -1
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func term(s string) string {
|
||||
if i := strings.IndexByte(s, 0); i >= 0 {
|
||||
return s[:i]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
36
sqlite3/libc/math.h
Normal file
36
sqlite3/libc/math.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef _WASM_SIMD128_MATH_H
|
||||
#define _WASM_SIMD128_MATH_H
|
||||
|
||||
#include <wasm_simd128.h>
|
||||
|
||||
#include_next <math.h> // the system math.h
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __wasm_relaxed_simd__
|
||||
|
||||
// This header assumes "relaxed fused multiply-add"
|
||||
// is both faster and more precise.
|
||||
|
||||
#define FP_FAST_FMA 1
|
||||
|
||||
__attribute__((weak))
|
||||
double fma(double x, double y, double z) {
|
||||
// If we get a software implementation from the host,
|
||||
// this is enough to short circuit it on the 2nd lane.
|
||||
const v128_t wx = wasm_f64x2_replace_lane(b, 0, x);
|
||||
const v128_t wy = wasm_f64x2_splat(y);
|
||||
const v128_t wz = wasm_f64x2_splat(z);
|
||||
const v128_t wr = wasm_f64x2_relaxed_madd(wx, wy, wz);
|
||||
return wasm_f64x2_extract_lane(wr, 0);
|
||||
}
|
||||
|
||||
#endif // __wasm_relaxed_simd__
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // _WASM_SIMD128_MATH_H
|
||||
@@ -17,22 +17,31 @@ extern "C" {
|
||||
__attribute__((weak))
|
||||
void qsort(void *base, size_t nel, size_t width,
|
||||
int (*comp)(const void *, const void *)) {
|
||||
if (width == 0) return;
|
||||
|
||||
// If nel is zero, we're required to do nothing.
|
||||
// If it's one, the array is already sorted.
|
||||
size_t wnel = width * nel;
|
||||
size_t gap = nel;
|
||||
while (gap > 1) {
|
||||
// Use 64-bit unsigned arithmetic to avoid intermediate overflow.
|
||||
// Absent overflow, gap will be strictly less than its previous value.
|
||||
// Once it is one or zero, set it to one: do a final pass, and stop.
|
||||
gap = (5ull * gap - 1) / 11;
|
||||
if (gap == 0) gap = 1;
|
||||
|
||||
// It'd be undefined behavior for wnel to overflow a size_t;
|
||||
// or if width is zero: the base pointer would be invalid.
|
||||
// Since gap is stricly less than nel, we can assume
|
||||
// wgap is strictly less than wnel.
|
||||
size_t wgap = width * gap;
|
||||
__builtin_assume(wgap < wnel);
|
||||
for (size_t i = wgap; i < wnel; i += width) {
|
||||
// Even without overflow flags, the overflow builtin helps the compiler.
|
||||
for (size_t j = i; !__builtin_sub_overflow(j, wgap, &j);) {
|
||||
char *a = j + (char *)base;
|
||||
char *b = a + wgap;
|
||||
if (comp(a, b) <= 0) break;
|
||||
|
||||
// This well known loop is automatically vectorized.
|
||||
size_t s = width;
|
||||
do {
|
||||
char tmp = *a;
|
||||
|
||||
@@ -25,7 +25,7 @@ void *memset(void *dest, int c, size_t n) {
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
void *memcpy(void *restrict dest, const void *restrict src, size_t n) {
|
||||
void *memcpy(void *__restrict dest, const void *__restrict src, size_t n) {
|
||||
return __builtin_memcpy(dest, src, n);
|
||||
}
|
||||
|
||||
@@ -38,38 +38,46 @@ void *memmove(void *dest, const void *src, size_t n) {
|
||||
|
||||
#ifdef __wasm_simd128__
|
||||
|
||||
// SIMD versions of some string.h functions.
|
||||
//
|
||||
// These assume aligned v128_t loads can't fail,
|
||||
// and so can't unaligned loads up to the last
|
||||
// aligned address less than memory size.
|
||||
//
|
||||
// These also assume unaligned access is not painfully slow,
|
||||
// but that bitmask extraction is really slow on AArch64.
|
||||
// SIMD implementations of string.h functions.
|
||||
|
||||
__attribute__((weak))
|
||||
int memcmp(const void *v1, const void *v2, size_t n) {
|
||||
// memcmp can read up to n bytes from each object.
|
||||
// Use unaligned loads to handle the case where
|
||||
// the objects have mismatching alignments.
|
||||
const v128_t *w1 = v1;
|
||||
const v128_t *w2 = v2;
|
||||
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
break;
|
||||
// Baseline algorithm.
|
||||
if (n < sizeof(v128_t)) {
|
||||
const unsigned char *u1 = (unsigned char *)v1;
|
||||
const unsigned char *u2 = (unsigned char *)v2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
w1++;
|
||||
w2++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Continue byte-by-byte.
|
||||
const unsigned char *u1 = (void *)w1;
|
||||
const unsigned char *u2 = (void *)w2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
u1++;
|
||||
u2++;
|
||||
// memcmp is allowed to read up to n bytes from each object.
|
||||
// Find the first different character in the objects.
|
||||
// Unaligned loads handle the case where the objects
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)v1;
|
||||
const v128_t *w2 = (v128_t *)v2;
|
||||
while (n) {
|
||||
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w1), wasm_v128_load(w2));
|
||||
// Bitmask is slow on AArch64, all_true is much faster.
|
||||
if (!wasm_i8x16_all_true(cmp)) {
|
||||
// Find the offset of the first zero bit (little-endian).
|
||||
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
|
||||
const unsigned char *u1 = (unsigned char *)w1 + ctz;
|
||||
const unsigned char *u2 = (unsigned char *)w2 + ctz;
|
||||
// This may help the compiler if the function is inlined.
|
||||
__builtin_assume(*u1 - *u2 != 0);
|
||||
return *u1 - *u2;
|
||||
}
|
||||
// This makes n a multiple of sizeof(v128_t)
|
||||
// for every iteration except the first.
|
||||
size_t align = (n - 1) % sizeof(v128_t) + 1;
|
||||
w1 = (v128_t *)((char *)w1 + align);
|
||||
w2 = (v128_t *)((char *)w2 + align);
|
||||
n -= align;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -77,7 +85,7 @@ int memcmp(const void *v1, const void *v2, size_t n) {
|
||||
__attribute__((weak))
|
||||
void *memchr(const void *v, int c, size_t n) {
|
||||
// When n is zero, a function that locates a character finds no occurrence.
|
||||
// Otherwise, decrement n to ensure __builtin_sub_overflow "overflows"
|
||||
// Otherwise, decrement n to ensure sub_overflow overflows
|
||||
// when n would go equal-to-or-below zero.
|
||||
if (n-- == 0) {
|
||||
return NULL;
|
||||
@@ -85,20 +93,20 @@ void *memchr(const void *v, int c, size_t n) {
|
||||
|
||||
// memchr must behave as if it reads characters sequentially
|
||||
// and stops as soon as a match is found.
|
||||
// Aligning ensures loads can't fail.
|
||||
// Aligning ensures loads beyond the first match don't fail.
|
||||
uintptr_t align = (uintptr_t)v % sizeof(v128_t);
|
||||
const v128_t *w = (void *)(v - align);
|
||||
const v128_t *w = (v128_t *)((char *)v - align);
|
||||
const v128_t wc = wasm_i8x16_splat(c);
|
||||
|
||||
while (true) {
|
||||
for (;;) {
|
||||
const v128_t cmp = wasm_i8x16_eq(*w, wc);
|
||||
// Bitmask is slow on AArch64, any_true is much faster.
|
||||
if (wasm_v128_any_true(cmp)) {
|
||||
// Clear the bits corresponding to alignment
|
||||
// Clear the bits corresponding to alignment (little-endian)
|
||||
// so we can count trailing zeros.
|
||||
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
|
||||
// At least one bit will be set, unless we cleared them.
|
||||
// Knowing this helps the compiler.
|
||||
// Knowing this helps the compiler.
|
||||
__builtin_assume(mask || align);
|
||||
// If the mask is zero because of alignment,
|
||||
// it's as if we didn't find anything.
|
||||
@@ -106,10 +114,10 @@ void *memchr(const void *v, int c, size_t n) {
|
||||
// We found a match, unless it is beyond the end of the object.
|
||||
// Recall that we decremented n, so less-than-or-equal-to is correct.
|
||||
size_t ctz = __builtin_ctz(mask);
|
||||
return ctz <= n + align ? (void *)w + ctz : NULL;
|
||||
return ctz <= n + align ? (char *)w + ctz : NULL;
|
||||
}
|
||||
}
|
||||
// Decrement n; if it "overflows" we're done.
|
||||
// Decrement n; if it overflows we're done.
|
||||
if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -118,22 +126,45 @@ void *memchr(const void *v, int c, size_t n) {
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
void *memrchr(const void *v, int c, size_t n) {
|
||||
// memrchr is allowed to read up to n bytes from the object.
|
||||
// Search backward for the last matching character.
|
||||
const v128_t *w = (v128_t *)((char *)v + n);
|
||||
const v128_t wc = wasm_i8x16_splat(c);
|
||||
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(--w), wc);
|
||||
// Bitmask is slow on AArch64, any_true is much faster.
|
||||
if (wasm_v128_any_true(cmp)) {
|
||||
size_t clz = __builtin_clz(wasm_i8x16_bitmask(cmp)) - 15;
|
||||
return (char *)(w + 1) - clz;
|
||||
}
|
||||
}
|
||||
|
||||
// Baseline algorithm.
|
||||
const char *a = (char *)w;
|
||||
while (n--) {
|
||||
if (*(--a) == (char)c) return (char *)a;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
size_t strlen(const char *s) {
|
||||
// strlen must stop as soon as it finds the terminator.
|
||||
// Aligning ensures loads can't fail.
|
||||
// Aligning ensures loads beyond the terminator don't fail.
|
||||
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
|
||||
const v128_t *w = (void *)(s - align);
|
||||
const v128_t *w = (v128_t *)(s - align);
|
||||
|
||||
while (true) {
|
||||
for (;;) {
|
||||
// Bitmask is slow on AArch64, all_true is much faster.
|
||||
if (!wasm_i8x16_all_true(*w)) {
|
||||
const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){});
|
||||
// Clear the bits corresponding to alignment
|
||||
// Clear the bits corresponding to alignment (little-endian)
|
||||
// so we can count trailing zeros.
|
||||
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
|
||||
// At least one bit will be set, unless we cleared them.
|
||||
// Knowing this helps the compiler.
|
||||
// Knowing this helps the compiler.
|
||||
__builtin_assume(mask || align);
|
||||
if (mask) {
|
||||
return (char *)w - s + __builtin_ctz(mask);
|
||||
@@ -145,23 +176,23 @@ size_t strlen(const char *s) {
|
||||
}
|
||||
|
||||
static int __strcmp(const char *s1, const char *s2) {
|
||||
// Set limit to the largest possible valid v128_t pointer.
|
||||
// Unsigned modular arithmetic gives the correct result
|
||||
// unless memory size is zero, in which case all pointers are invalid.
|
||||
const v128_t *const limit =
|
||||
(v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1;
|
||||
// How many bytes can be read before pointers go out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(size_t)(s1 > s2 ? s1 : s2);
|
||||
|
||||
// Use unaligned loads to handle the case where
|
||||
// the strings have mismatching alignments.
|
||||
const v128_t *w1 = (void *)s1;
|
||||
const v128_t *w2 = (void *)s2;
|
||||
while (w1 <= limit && w2 <= limit) {
|
||||
// Unaligned loads handle the case where the strings
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)s1;
|
||||
const v128_t *w2 = (v128_t *)s2;
|
||||
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
// The strings may still be equal,
|
||||
// if the terminator is found before that difference.
|
||||
break;
|
||||
}
|
||||
// All bytes are equal.
|
||||
// If any byte is zero (on both strings) the strings are equal.
|
||||
// All characters are equal.
|
||||
// If any is a terminator the strings are equal.
|
||||
if (!wasm_i8x16_all_true(wasm_v128_load(w1))) {
|
||||
return 0;
|
||||
}
|
||||
@@ -169,10 +200,22 @@ static int __strcmp(const char *s1, const char *s2) {
|
||||
w2++;
|
||||
}
|
||||
|
||||
// Continue byte-by-byte.
|
||||
const unsigned char *u1 = (void *)w1;
|
||||
const unsigned char *u2 = (void *)w2;
|
||||
while (true) {
|
||||
// Baseline algorithm.
|
||||
const unsigned char *u1 = (unsigned char *)w1;
|
||||
const unsigned char *u2 = (unsigned char *)w2;
|
||||
for (;;) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
if (*u1 == 0) break;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __strcmp_s(const char *s1, const char *s2) {
|
||||
const unsigned char *u1 = (unsigned char *)s1;
|
||||
const unsigned char *u2 = (unsigned char *)s2;
|
||||
for (;;) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
if (*u1 == 0) break;
|
||||
u1++;
|
||||
@@ -183,33 +226,33 @@ static int __strcmp(const char *s1, const char *s2) {
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
int strcmp(const char *s1, const char *s2) {
|
||||
// Use strncmp when comparing against literal strings.
|
||||
// If the literal is small, the vector search will be skipped.
|
||||
if (__builtin_constant_p(strlen(s2))) {
|
||||
return strncmp(s1, s2, strlen(s2));
|
||||
// Skip the vector search when comparing against small literal strings.
|
||||
if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) {
|
||||
return __strcmp_s(s1, s2);
|
||||
}
|
||||
return __strcmp(s1, s2);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
int strncmp(const char *s1, const char *s2, size_t n) {
|
||||
// Set limit to the largest possible valid v128_t pointer.
|
||||
// Unsigned modular arithmetic gives the correct result
|
||||
// unless memory size is zero, in which case all pointers are invalid.
|
||||
const v128_t *const limit =
|
||||
(v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1;
|
||||
// How many bytes can be read before pointers go out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(size_t)(s1 > s2 ? s1 : s2);
|
||||
if (n > N) n = N;
|
||||
|
||||
// Use unaligned loads to handle the case where
|
||||
// the strings have mismatching alignments.
|
||||
const v128_t *w1 = (void *)s1;
|
||||
const v128_t *w2 = (void *)s2;
|
||||
for (; w1 <= limit && w2 <= limit && n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
// Unaligned loads handle the case where the strings
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)s1;
|
||||
const v128_t *w2 = (v128_t *)s2;
|
||||
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
// The strings may still be equal,
|
||||
// if the terminator is found before that difference.
|
||||
break;
|
||||
}
|
||||
// All bytes are equal.
|
||||
// If any byte is zero (on both strings) the strings are equal.
|
||||
// All characters are equal.
|
||||
// If any is a terminator the strings are equal.
|
||||
if (!wasm_i8x16_all_true(wasm_v128_load(w1))) {
|
||||
return 0;
|
||||
}
|
||||
@@ -217,9 +260,9 @@ int strncmp(const char *s1, const char *s2, size_t n) {
|
||||
w2++;
|
||||
}
|
||||
|
||||
// Continue byte-by-byte.
|
||||
const unsigned char *u1 = (void *)w1;
|
||||
const unsigned char *u2 = (void *)w2;
|
||||
// Baseline algorithm.
|
||||
const unsigned char *u1 = (unsigned char *)w1;
|
||||
const unsigned char *u2 = (unsigned char *)w2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
if (*u1 == 0) break;
|
||||
@@ -231,20 +274,20 @@ int strncmp(const char *s1, const char *s2, size_t n) {
|
||||
|
||||
static char *__strchrnul(const char *s, int c) {
|
||||
// strchrnul must stop as soon as a match is found.
|
||||
// Aligning ensures loads can't fail.
|
||||
// Aligning ensures loads beyond the first match don't fail.
|
||||
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
|
||||
const v128_t *w = (void *)(s - align);
|
||||
const v128_t *w = (v128_t *)(s - align);
|
||||
const v128_t wc = wasm_i8x16_splat(c);
|
||||
|
||||
while (true) {
|
||||
for (;;) {
|
||||
const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){}) | wasm_i8x16_eq(*w, wc);
|
||||
// Bitmask is slow on AArch64, any_true is much faster.
|
||||
if (wasm_v128_any_true(cmp)) {
|
||||
// Clear the bits corresponding to alignment
|
||||
// Clear the bits corresponding to alignment (little-endian)
|
||||
// so we can count trailing zeros.
|
||||
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
|
||||
// At least one bit will be set, unless we cleared them.
|
||||
// Knowing this helps the compiler.
|
||||
// Knowing this helps the compiler.
|
||||
__builtin_assume(mask || align);
|
||||
if (mask) {
|
||||
return (char *)w + __builtin_ctz(mask);
|
||||
@@ -271,99 +314,221 @@ char *strchr(const char *s, int c) {
|
||||
return (char *)s + strlen(s);
|
||||
}
|
||||
char *r = __strchrnul(s, c);
|
||||
return *(char *)r == (char)c ? r : NULL;
|
||||
return *r == (char)c ? r : NULL;
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *strrchr(const char *s, int c) {
|
||||
// For finding the terminator, strlen is faster.
|
||||
if (__builtin_constant_p(c) && (char)c == 0) {
|
||||
return (char *)s + strlen(s);
|
||||
}
|
||||
// This could also be implemented in a single pass using strchr,
|
||||
// advancing to the next match until no more matches are found.
|
||||
// That would be suboptimal with lots of consecutive matches.
|
||||
return (char *)memrchr(s, c, strlen(s) + 1);
|
||||
}
|
||||
|
||||
// http://0x80.pl/notesen/2018-10-18-simd-byte-lookup.html
|
||||
|
||||
#define _WASM_SIMD128_BITMAP256_T \
|
||||
struct { \
|
||||
uint8_t l __attribute__((__vector_size__(16), __aligned__(16))); \
|
||||
uint8_t h __attribute__((__vector_size__(16), __aligned__(16))); \
|
||||
}
|
||||
|
||||
#define _WASM_SIMD128_SETBIT(bitmap, i) \
|
||||
({ \
|
||||
uint8_t _c = (uint8_t)(i); \
|
||||
uint8_t _hi_nibble = _c >> 4; \
|
||||
uint8_t _lo_nibble = _c & 0xf; \
|
||||
bitmap.l[_lo_nibble] |= 1 << (_hi_nibble - 0); \
|
||||
bitmap.h[_lo_nibble] |= 1 << (_hi_nibble - 8); \
|
||||
})
|
||||
|
||||
#define _WASM_SIMD128_CHKBIT(bitmap, i) \
|
||||
({ \
|
||||
uint8_t _c = (uint8_t)(i); \
|
||||
uint8_t _hi_nibble = _c >> 4; \
|
||||
uint8_t _lo_nibble = _c & 0xf; \
|
||||
uint8_t _bitmask = 1 << (_hi_nibble & 0x7); \
|
||||
uint8_t _bitset = (_hi_nibble < 8 ? bitmap.l : bitmap.h)[_lo_nibble]; \
|
||||
_bitmask & _bitset; \
|
||||
})
|
||||
|
||||
#define _WASM_SIMD128_CHKBITS(bitmap, v) \
|
||||
({ \
|
||||
v128_t _w = v; \
|
||||
v128_t _hi_nibbles = wasm_u8x16_shr(_w, 4); \
|
||||
v128_t _lo_nibbles = _w & wasm_u8x16_const_splat(0xf); \
|
||||
\
|
||||
v128_t _bitmask_lookup = wasm_u8x16_const(1, 2, 4, 8, 16, 32, 64, 128, \
|
||||
1, 2, 4, 8, 16, 32, 64, 128); \
|
||||
\
|
||||
v128_t _bitmask = wasm_i8x16_swizzle(_bitmask_lookup, _hi_nibbles); \
|
||||
v128_t _bitsets = wasm_v128_bitselect( \
|
||||
wasm_i8x16_swizzle(bitmap.l, _lo_nibbles), \
|
||||
wasm_i8x16_swizzle(bitmap.h, _lo_nibbles), \
|
||||
wasm_i8x16_lt(_hi_nibbles, wasm_u8x16_const_splat(8))); \
|
||||
\
|
||||
wasm_i8x16_eq(_bitsets & _bitmask, _bitmask); \
|
||||
})
|
||||
|
||||
__attribute__((weak))
|
||||
size_t strspn(const char *s, const char *c) {
|
||||
#ifndef _REENTRANT
|
||||
static // Avoid the stack for builds without threads.
|
||||
#endif
|
||||
char byteset[UCHAR_MAX + 1];
|
||||
// How many bytes can be read before the pointer goes out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - (size_t)s;
|
||||
const v128_t *w = (v128_t *)s;
|
||||
const char *const a = s;
|
||||
|
||||
if (!c[0]) return 0;
|
||||
if (!c[1]) {
|
||||
// Set limit to the largest possible valid v128_t pointer.
|
||||
// Unsigned modular arithmetic gives the correct result
|
||||
// unless memory size is zero, in which case all pointers are invalid.
|
||||
const v128_t *const limit =
|
||||
(v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1;
|
||||
|
||||
const v128_t *w = (void *)s;
|
||||
const v128_t wc = wasm_i8x16_splat(*c);
|
||||
while (w <= limit) {
|
||||
if (!wasm_i8x16_all_true(wasm_i8x16_eq(wasm_v128_load(w), wc))) {
|
||||
break;
|
||||
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
|
||||
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w), wc);
|
||||
// Bitmask is slow on AArch64, all_true is much faster.
|
||||
if (!wasm_i8x16_all_true(cmp)) {
|
||||
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
|
||||
return (char *)w + ctz - s;
|
||||
}
|
||||
w++;
|
||||
}
|
||||
|
||||
s = (void *)w;
|
||||
while (*s == *c) s++;
|
||||
// Baseline algorithm.
|
||||
for (s = (char *)w; *s == *c; s++);
|
||||
return s - a;
|
||||
}
|
||||
|
||||
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
|
||||
_WASM_SIMD128_BITMAP256_T bitmap = {};
|
||||
|
||||
// Unoptimized version.
|
||||
memset(byteset, 0, sizeof(byteset));
|
||||
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
|
||||
while (byteset[*(unsigned char *)s]) s++;
|
||||
for (; *c; c++) {
|
||||
_WASM_SIMD128_SETBIT(bitmap, *c);
|
||||
// Terminator IS NOT on the bitmap.
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// This is faster than memset.
|
||||
volatile v128_t *w = (void *)byteset;
|
||||
#pragma unroll
|
||||
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
|
||||
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
|
||||
|
||||
// Keeping byteset[0] = 0 avoids the other loop having to test for it.
|
||||
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
|
||||
#pragma unroll 4
|
||||
while (byteset[*(unsigned char *)s]) s++;
|
||||
|
||||
#endif
|
||||
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
|
||||
const v128_t cmp = _WASM_SIMD128_CHKBITS(bitmap, wasm_v128_load(w));
|
||||
// Bitmask is slow on AArch64, all_true is much faster.
|
||||
if (!wasm_i8x16_all_true(cmp)) {
|
||||
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
|
||||
return (char *)w + ctz - s;
|
||||
}
|
||||
w++;
|
||||
}
|
||||
|
||||
// Baseline algorithm.
|
||||
for (s = (char *)w; _WASM_SIMD128_CHKBIT(bitmap, *s); s++);
|
||||
return s - a;
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
size_t strcspn(const char *s, const char *c) {
|
||||
#ifndef _REENTRANT
|
||||
static // Avoid the stack for builds without threads.
|
||||
#endif
|
||||
char byteset[UCHAR_MAX + 1];
|
||||
const char *const a = s;
|
||||
|
||||
if (!c[0] || !c[1]) return __strchrnul(s, *c) - s;
|
||||
|
||||
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
|
||||
// How many bytes can be read before the pointer goes out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - (size_t)s;
|
||||
const v128_t *w = (v128_t *)s;
|
||||
const char *const a = s;
|
||||
|
||||
// Unoptimized version.
|
||||
memset(byteset, 0, sizeof(byteset));
|
||||
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
|
||||
while (!byteset[*(unsigned char *)s]) s++;
|
||||
_WASM_SIMD128_BITMAP256_T bitmap = {};
|
||||
|
||||
#else
|
||||
for (;;) {
|
||||
_WASM_SIMD128_SETBIT(bitmap, *c);
|
||||
// Terminator IS on the bitmap.
|
||||
if (!*c++) break;
|
||||
}
|
||||
|
||||
// This is faster than memset.
|
||||
volatile v128_t *w = (void *)byteset;
|
||||
#pragma unroll
|
||||
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
|
||||
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
|
||||
|
||||
// Setting byteset[0] = 1 avoids the other loop having to test for it.
|
||||
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
|
||||
#pragma unroll 4
|
||||
while (!byteset[*(unsigned char *)s]) s++;
|
||||
|
||||
#endif
|
||||
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
|
||||
const v128_t cmp = _WASM_SIMD128_CHKBITS(bitmap, wasm_v128_load(w));
|
||||
// Bitmask is slow on AArch64, any_true is much faster.
|
||||
if (wasm_v128_any_true(cmp)) {
|
||||
size_t ctz = __builtin_ctz(wasm_i8x16_bitmask(cmp));
|
||||
return (char *)w + ctz - s;
|
||||
}
|
||||
w++;
|
||||
}
|
||||
|
||||
// Baseline algorithm.
|
||||
for (s = (char *)w; !_WASM_SIMD128_CHKBIT(bitmap, *s); s++);
|
||||
return s - a;
|
||||
}
|
||||
|
||||
#undef _WASM_SIMD128_SETBIT
|
||||
#undef _WASM_SIMD128_CHKBIT
|
||||
#undef _WASM_SIMD128_CHKBITS
|
||||
#undef _WASM_SIMD128_BITMAP256_T
|
||||
|
||||
// Given the above SIMD implementations,
|
||||
// these are best implemented as
|
||||
// small wrappers over those functions.
|
||||
|
||||
// Simple wrappers already in musl:
|
||||
// - mempcpy
|
||||
// - strcat
|
||||
// - strdup
|
||||
// - strndup
|
||||
// - strnlen
|
||||
// - strpbrk
|
||||
// - strsep
|
||||
// - strtok
|
||||
|
||||
__attribute__((weak))
|
||||
void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n) {
|
||||
void *memchr(const void *v, int c, size_t n);
|
||||
const void *m = memchr(src, c, n);
|
||||
if (m != NULL) {
|
||||
n = (char *)m - (char *)src + 1;
|
||||
m = (char *)dest + n;
|
||||
}
|
||||
memcpy(dest, src, n);
|
||||
return (void *)m;
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
char *strncat(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
size_t strnlen(const char *s, size_t n);
|
||||
size_t dlen = strlen(dest);
|
||||
size_t slen = strnlen(src, n);
|
||||
memcpy(dest + dlen, src, slen);
|
||||
dest[dlen + slen] = 0;
|
||||
return dest;
|
||||
}
|
||||
|
||||
static char *__stpcpy(char *__restrict dest, const char *__restrict src) {
|
||||
size_t slen = strlen(src);
|
||||
memcpy(dest, src, slen + 1);
|
||||
return dest + slen;
|
||||
}
|
||||
|
||||
static char *__stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
size_t strnlen(const char *s, size_t n);
|
||||
size_t slen = strnlen(src, n);
|
||||
memcpy(dest, src, slen);
|
||||
memset(dest + slen, 0, n - slen);
|
||||
return dest + slen;
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *stpcpy(char *__restrict dest, const char *__restrict src) {
|
||||
return __stpcpy(dest, src);
|
||||
}
|
||||
|
||||
char *strcpy(char *__restrict dest, const char *__restrict src) {
|
||||
__stpcpy(dest, src);
|
||||
return dest;
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
return __stpncpy(dest, src, n);
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
char *strncpy(char *__restrict dest, const char *__restrict src, size_t n) {
|
||||
__stpncpy(dest, src, n);
|
||||
return dest;
|
||||
}
|
||||
|
||||
#endif // __wasm_simd128__
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
57
sqlite3/libc/strings.h
Normal file
57
sqlite3/libc/strings.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef _WASM_SIMD128_STRINGS_H
|
||||
#define _WASM_SIMD128_STRINGS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <wasm_simd128.h>
|
||||
|
||||
#include_next <strings.h> // the system strings.h
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __wasm_simd128__
|
||||
|
||||
__attribute__((weak))
|
||||
int bcmp(const void *v1, const void *v2, size_t n) {
|
||||
// bcmp is the same as memcmp but only compares for equality.
|
||||
|
||||
// Baseline algorithm.
|
||||
if (n < sizeof(v128_t)) {
|
||||
const unsigned char *u1 = (unsigned char *)v1;
|
||||
const unsigned char *u2 = (unsigned char *)v2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return 1;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// bcmp is allowed to read up to n bytes from each object.
|
||||
// Unaligned loads handle the case where the objects
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)v1;
|
||||
const v128_t *w2 = (v128_t *)v2;
|
||||
while (n) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
return 1;
|
||||
}
|
||||
// This makes n a multiple of sizeof(v128_t)
|
||||
// for every iteration except the first.
|
||||
size_t align = (n - 1) % sizeof(v128_t) + 1;
|
||||
w1 = (v128_t *)((char *)w1 + align);
|
||||
w2 = (v128_t *)((char *)w2 + align);
|
||||
n -= align;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __wasm_simd128__
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // _WASM_SIMD128_STRINGS_H
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user