Compare commits

..

10 Commits

Author SHA1 Message Date
Nuno Cruces
c780ef16e2 SQLite 3.49.2. 2025-05-07 14:08:18 +01:00
Nuno Cruces
b609930142 Refactor #274. 2025-05-07 12:46:13 +01:00
Nuno Cruces
fd165ce724 Issue #274. 2025-05-07 01:37:52 +01:00
Nuno Cruces
d3973b23e3 More memcmp. 2025-05-06 15:48:58 +01:00
Nuno Cruces
320b68e74f More tests. 2025-05-05 14:47:43 +01:00
Nuno Cruces
2c3850e5d1 Reuse fast funcs. 2025-05-03 01:18:10 +01:00
Nuno Cruces
db7aacff9f Add strrchr. 2025-05-02 14:35:14 +01:00
Nuno Cruces
d748d98e39 Fix. 2025-05-01 12:49:38 +01:00
Nuno Cruces
13b8642384 Compile as C++. 2025-04-29 14:03:59 +01:00
Nuno Cruces
29c5c816cb More libc. 2025-04-27 23:35:13 +01:00
18 changed files with 2227 additions and 1255 deletions

View File

@@ -1,6 +1,6 @@
# Embeddable Wasm build of SQLite
This folder includes an embeddable Wasm build of SQLite 3.49.1 for use with
This folder includes an embeddable Wasm build of SQLite 3.49.2 for use with
[`github.com/ncruces/go-sqlite3`](https://pkg.go.dev/github.com/ncruces/go-sqlite3).
The following optional features are compiled in:

Binary file not shown.

View File

@@ -13,8 +13,8 @@ mkdir -p build/ext/
cp "$ROOT"/sqlite3/*.[ch] build/
cp "$ROOT"/sqlite3/*.patch build/
# https://sqlite.org/src/info/3215186aa9204149
curl -# https://sqlite.org/src/tarball/sqlite.tar.gz?r=3215186a | tar xz
# https://sqlite.org/src/info/9d6517e7cc8bf175
curl -# https://sqlite.org/src/tarball/sqlite.tar.gz?r=9d6517e7 | tar xz
cd sqlite
if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
@@ -43,8 +43,8 @@ cd ~-
"$WASI_SDK/clang" --target=wasm32-wasi -std=c23 -g0 -O2 \
-Wall -Wextra -Wno-unused-parameter -Wno-unused-function \
-o bcw2.wasm "build/main.c" \
-I"build" \
-o bcw2.wasm build/main.c \
-I"$ROOT/sqlite3/libc" -I"build" \
-mexec-model=reactor \
-msimd128 -mmutable-globals -mmultivalue \
-mbulk-memory -mreference-types \

View File

@@ -19,7 +19,7 @@ func Test_init(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if version != "3.49.1" {
if version != "3.49.2" {
t.Error(version)
}
}

Binary file not shown.

8
go.mod
View File

@@ -8,16 +8,16 @@ require (
github.com/ncruces/julianday v1.0.0
github.com/ncruces/sort v0.1.5
github.com/tetratelabs/wazero v1.9.0
golang.org/x/crypto v0.37.0
golang.org/x/sys v0.32.0
golang.org/x/crypto v0.38.0
golang.org/x/sys v0.33.0
)
require (
github.com/dchest/siphash v1.2.3 // ext/bloom
github.com/google/uuid v1.6.0 // ext/uuid
github.com/psanford/httpreadat v0.1.0 // example
golang.org/x/sync v0.13.0 // test
golang.org/x/text v0.24.0 // ext/unicode
golang.org/x/sync v0.14.0 // test
golang.org/x/text v0.25.0 // ext/unicode
lukechampine.com/adiantum v1.1.1 // vfs/adiantum
)

16
go.sum
View File

@@ -10,13 +10,13 @@ github.com/psanford/httpreadat v0.1.0 h1:VleW1HS2zO7/4c7c7zNl33fO6oYACSagjJIyMIw
github.com/psanford/httpreadat v0.1.0/go.mod h1:Zg7P+TlBm3bYbyHTKv/EdtSJZn3qwbPwpfZ/I9GKCRE=
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
lukechampine.com/adiantum v1.1.1 h1:4fp6gTxWCqpEbLy40ExiYDDED3oUNWx5cTqBCtPdZqA=
lukechampine.com/adiantum v1.1.1/go.mod h1:LrAYVnTYLnUtE/yMp5bQr0HstAf060YUF8nM0B6+rUw=

View File

@@ -3,7 +3,7 @@ set -euo pipefail
cd -P -- "$(dirname -- "$0")"
curl -#OL "https://sqlite.org/2025/sqlite-amalgamation-3490100.zip"
curl -#OL "https://sqlite.org/2025/sqlite-amalgamation-3490200.zip"
unzip -d . sqlite-amalgamation-*.zip
mv sqlite-amalgamation-*/sqlite3.c .
mv sqlite-amalgamation-*/sqlite3.h .
@@ -19,30 +19,30 @@ rm -rf sqlite-amalgamation-*
mkdir -p ext/
cd ext/
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/anycollseq.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/base64.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/decimal.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/ieee754.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/regexp.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/series.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/spellfix.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/ext/misc/uint.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/anycollseq.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/base64.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/decimal.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/ieee754.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/regexp.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/series.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/spellfix.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/ext/misc/uint.c"
cd ~-
cd ../vfs/tests/mptest/testdata/
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/config01.test"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/config02.test"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/crash01.test"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/crash02.subtest"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/multiwrite01.test"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/config01.test"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/config02.test"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/crash01.test"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/crash02.subtest"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/multiwrite01.test"
cd ~-
cd ../vfs/tests/mptest/wasm/
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/mptest/mptest.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/mptest/mptest.c"
cd ~-
cd ../vfs/tests/speedtest1/wasm/
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.1/test/speedtest1.c"
curl -#OL "https://github.com/sqlite/sqlite/raw/version-3.49.2/test/speedtest1.c"
cd ~-
cat *.patch | patch -p0 --no-backup-if-mismatch

View File

@@ -27,16 +27,25 @@ EOF
-Wl,--stack-first \
-Wl,--import-undefined \
-Wl,--initial-memory=16777216 \
-Wl,--export=memccpy \
-Wl,--export=memchr \
-Wl,--export=memcmp \
-Wl,--export=memcpy \
-Wl,--export=memmove \
-Wl,--export=memrchr \
-Wl,--export=memset \
-Wl,--export=stpcpy \
-Wl,--export=stpncpy \
-Wl,--export=strchr \
-Wl,--export=strchrnul \
-Wl,--export=strcmp \
-Wl,--export=strcpy \
-Wl,--export=strcspn \
-Wl,--export=strlen \
-Wl,--export=strncat \
-Wl,--export=strncmp \
-Wl,--export=strncpy \
-Wl,--export=strrchr \
-Wl,--export=strspn \
-Wl,--export=qsort

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,7 @@ import (
"context"
_ "embed"
"os"
"strings"
"testing"
"github.com/tetratelabs/wazero"
@@ -31,6 +32,7 @@ var (
strchr api.Function
strcmp api.Function
strspn api.Function
strrchr api.Function
strncmp api.Function
strcspn api.Function
stack [8]uint64
@@ -63,6 +65,7 @@ func TestMain(m *testing.M) {
strchr = mod.ExportedFunction("strchr")
strcmp = mod.ExportedFunction("strcmp")
strspn = mod.ExportedFunction("strspn")
strrchr = mod.ExportedFunction("strrchr")
strncmp = mod.ExportedFunction("strncmp")
strcspn = mod.ExportedFunction("strcspn")
memory, _ = mod.Memory().Read(0, mod.Memory().Size())
@@ -139,6 +142,18 @@ func Benchmark_strchr(b *testing.B) {
}
}
func Benchmark_strrchr(b *testing.B) {
clear(memory)
fill(memory[ptr1:ptr1+size/2], 5)
fill(memory[ptr1+size/2:ptr1+size-1], 7)
b.SetBytes(size/2 + 1)
b.ResetTimer()
for range b.N {
call(strrchr, ptr1, 5)
}
}
func Benchmark_strcmp(b *testing.B) {
clear(memory)
fill(memory[ptr1:ptr1+size-1], 7)
@@ -195,43 +210,117 @@ func Benchmark_strcspn(b *testing.B) {
}
}
func Test_memchr(t *testing.T) {
for length := range 64 {
for pos := range length + 2 {
for alignment := range 24 {
clear(memory[:2*page])
func Test_memcmp(t *testing.T) {
const s1 string = "" +
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
"\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
"\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
"\x7f\xf3\x93\x01\x00\x01"
const s2 string = "" +
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
"\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
"\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
"\x80\xf3\x93\x01\x00\x02"
ptr := (page - 8) + alignment
fill(memory[ptr:ptr+max(pos, length)], 5)
memory[ptr+pos] = 7
p1 := ptr1
p2 := len(memory) - len(s2)
want := 0
if pos < length {
want = ptr + pos
}
clear(memory)
copy(memory[p1:], s1)
copy(memory[p2:], s2)
got := call(memchr, uint64(ptr), 7, uint64(length))
if uint32(got) != uint32(want) {
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
ptr, 7, uint64(length), uint32(got), uint32(want))
}
for i := range len(s1) + 1 {
for j := range len(s1) - i {
want := strings.Compare(s1[i:i+j], s2[i:i+j])
got := call(memcmp, uint64(p1+i), uint64(p2+i), uint64(j))
if sign(int32(got)) != want {
t.Errorf("strcmp(%d, %d, %d) = %d, want %d",
ptr1+i, ptr2+i, j, int32(got), want)
}
}
}
}
clear(memory)
ptr := len(memory) - length
fill(memory[ptr:ptr+length], 5)
memory[len(memory)-1] = 7
func Test_strcmp(t *testing.T) {
const s1 string = "" +
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
"\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
"\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
"\x7f\xf3\x93\x01\x00\x01"
const s2 string = "" +
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
"\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
"\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
"\x80\xf3\x93\x01\x00\x02"
want := len(memory) - 1
if length == 0 {
want = 0
p1 := ptr1
p2 := len(memory) - len(s2) - 1
clear(memory)
copy(memory[p1:], s1)
copy(memory[p2:], s2)
for i := range len(s1) + 1 {
want := strings.Compare(term(s1[i:]), term(s2[i:]))
got := call(strcmp, uint64(p1+i), uint64(p2+i))
if sign(int32(got)) != want {
t.Errorf("strcmp(%d, %d) = %d, want %d",
p1+i, ptr2+i, int32(got), want)
}
}
}
got := call(memchr, uint64(ptr), 7, uint64(length))
if uint32(got) != uint32(want) {
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
ptr, 7, uint64(length), uint32(got), uint32(want))
func Test_strncmp(t *testing.T) {
const s1 string = "" +
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
"\x14\xf4\x93\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
"\x94\xf3\x93\x01\x74\x80\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
"\x7f\xf3\x93\x01\x00\x01"
const s2 string = "" +
"\x94\x63\x8f\x01\x74\x63\x8f\x01\x54\x63\x8f\x01\x34\x63\x8f\x01" +
"\xb4\xf2\x93\x01\x94\xf2\x93\x01\x54\xf1\x93\x01\x34\xf1\x93\x01" +
"\x14\xf1\x93\x01\x14\xf2\x93\x01\x34\xf2\x93\x01\x54\xf2\x93\x01" +
"\x74\xf2\x93\x01\x74\xf1\x93\x01\xd4\xf2\x93\x01\x94\xf1\x93\x01" +
"\xb4\xf1\x93\x01\xd4\xf1\x93\x01\xf4\xf1\x93\x01\xf4\xf2\x93\x01" +
"\xbc\x40\x96\x01\xf4\xf3\x93\x01\xd4\xf3\x93\x01\xb4\xf3\x93\x01" +
"\x94\xf3\x93\x01\x74\x7f\x93\x01\x54\xf3\x93\x01\x34\xf3\x93\x01" +
"\x80\xf3\x93\x01\x00\x02"
p1 := ptr1
p2 := len(memory) - len(s2) - 1
clear(memory)
copy(memory[p1:], s1)
copy(memory[p2:], s2)
for i := range len(s1) + 1 {
for j := range len(s1) - i + 1 {
want := strings.Compare(term(s1[i:i+j]), term(s2[i:i+j]))
got := call(strncmp, uint64(p1+i), uint64(p2+i), uint64(j))
if sign(int32(got)) != want {
t.Errorf("strncmp(%d, %d, %d) = %d, want %d",
ptr1+i, ptr2+i, j, int32(got), want)
}
}
}
}
@@ -239,9 +328,9 @@ func Test_memchr(t *testing.T) {
func Test_strlen(t *testing.T) {
for length := range 64 {
for alignment := range 24 {
clear(memory[:2*page])
ptr := (page - 8) + alignment
clear(memory[:2*page])
fill(memory[ptr:ptr+length], 5)
got := call(strlen, uint64(ptr))
@@ -270,22 +359,62 @@ func Test_strlen(t *testing.T) {
}
}
func Test_strchr(t *testing.T) {
func Test_memchr(t *testing.T) {
for length := range 64 {
for pos := range length + 2 {
for alignment := range 24 {
clear(memory[:2*page])
ptr := (page - 8) + alignment
fill(memory[ptr:ptr+max(pos, length)], 5)
memory[ptr+pos] = 7
memory[ptr+length] = 0
want := 0
if pos < length {
want = ptr + pos
}
clear(memory[:2*page])
fill(memory[ptr:ptr+max(pos, length)], 5)
memory[ptr+pos] = 7
got := call(memchr, uint64(ptr), 7, uint64(length))
if uint32(got) != uint32(want) {
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
ptr, 7, uint64(length), uint32(got), uint32(want))
}
}
}
clear(memory)
ptr := len(memory) - length
fill(memory[ptr:ptr+length], 5)
memory[len(memory)-1] = 7
want := len(memory) - 1
if length == 0 {
want = 0
}
got := call(memchr, uint64(ptr), 7, uint64(length))
if uint32(got) != uint32(want) {
t.Errorf("memchr(%d, %d, %d) = %d, want %d",
ptr, 7, uint64(length), uint32(got), uint32(want))
}
}
}
func Test_strchr(t *testing.T) {
for length := range 64 {
for pos := range length + 2 {
for alignment := range 24 {
ptr := (page - 8) + alignment
want := 0
if pos < length {
want = ptr + pos
}
clear(memory[:2*page])
fill(memory[ptr:ptr+max(pos, length)], 5)
memory[ptr+pos] = 7
memory[ptr+pos+1] = 7
memory[ptr+length] = 0
got := call(strchr, uint64(ptr), 7)
if uint32(got) != uint32(want) {
t.Errorf("strchr(%d, %d) = %d, want %d",
@@ -312,21 +441,66 @@ func Test_strchr(t *testing.T) {
}
}
func Test_strrchr(t *testing.T) {
for length := range 64 {
for pos := range length + 2 {
for alignment := range 24 {
ptr := (page - 8) + alignment
want := 0
if pos < length {
want = ptr + pos
} else if length > 0 {
want = ptr
}
clear(memory[:2*page])
fill(memory[ptr:ptr+max(pos, length)], 5)
memory[ptr] = 7
memory[ptr+pos] = 7
memory[ptr+length] = 0
got := call(strrchr, uint64(ptr), 7)
if uint32(got) != uint32(want) {
t.Errorf("strrchr(%d, %d) = %d, want %d",
ptr, 7, uint32(got), uint32(want))
}
}
}
ptr := len(memory) - length
want := len(memory) - 2
if length <= 1 {
continue
}
clear(memory)
fill(memory[ptr:ptr+length], 5)
memory[ptr] = 7
memory[len(memory)-2] = 7
memory[len(memory)-1] = 0
got := call(strrchr, uint64(ptr), 7)
if uint32(got) != uint32(want) {
t.Errorf("strrchr(%d, %d) = %d, want %d",
ptr, 7, uint32(got), uint32(want))
}
}
}
func Test_strspn(t *testing.T) {
for length := range 64 {
for pos := range length + 2 {
for alignment := range 24 {
clear(memory[:2*page])
ptr := (page - 8) + alignment
want := min(pos, length)
clear(memory[:2*page])
fill(memory[ptr:ptr+max(pos, length)], 5)
memory[ptr+pos] = 7
memory[ptr+length] = 0
memory[128] = 3
memory[129] = 5
want := min(pos, length)
got := call(strspn, uint64(ptr), 129)
if uint32(got) != uint32(want) {
t.Errorf("strspn(%d, %d) = %d, want %d",
@@ -341,18 +515,18 @@ func Test_strspn(t *testing.T) {
}
}
clear(memory)
ptr := len(memory) - length
fill(memory[ptr:ptr+length], 5)
memory[len(memory)-1] = 7
memory[128] = 3
memory[129] = 5
want := length - 1
if length == 0 {
continue
}
clear(memory)
fill(memory[ptr:ptr+length], 5)
memory[len(memory)-1] = 7
memory[128] = 3
memory[129] = 5
got := call(strspn, uint64(ptr), 129)
if uint32(got) != uint32(want) {
t.Errorf("strspn(%d, %d) = %d, want %d",
@@ -371,17 +545,16 @@ func Test_strcspn(t *testing.T) {
for length := range 64 {
for pos := range length + 2 {
for alignment := range 24 {
clear(memory[:2*page])
ptr := (page - 8) + alignment
want := min(pos, length)
clear(memory[:2*page])
fill(memory[ptr:ptr+max(pos, length)], 5)
memory[ptr+pos] = 7
memory[ptr+length] = 0
memory[128] = 3
memory[129] = 7
want := min(pos, length)
got := call(strcspn, uint64(ptr), 129)
if uint32(got) != uint32(want) {
t.Errorf("strcspn(%d, %d) = %d, want %d",
@@ -396,18 +569,18 @@ func Test_strcspn(t *testing.T) {
}
}
clear(memory)
ptr := len(memory) - length
fill(memory[ptr:ptr+length], 5)
memory[len(memory)-1] = 7
memory[128] = 3
memory[129] = 7
want := length - 1
if length == 0 {
continue
}
clear(memory)
fill(memory[ptr:ptr+length], 5)
memory[len(memory)-1] = 7
memory[128] = 3
memory[129] = 7
got := call(strcspn, uint64(ptr), 129)
if uint32(got) != uint32(want) {
t.Errorf("strcspn(%d, %d) = %d, want %d",
@@ -427,3 +600,21 @@ func fill(s []byte, v byte) {
s[i] = v
}
}
func sign(x int32) int {
switch {
case x > 0:
return +1
case x < 0:
return -1
default:
return 0
}
}
func term(s string) string {
if i := strings.IndexByte(s, 0); i >= 0 {
return s[:i]
}
return s
}

36
sqlite3/libc/math.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef _WASM_SIMD128_MATH_H
#define _WASM_SIMD128_MATH_H
#include <wasm_simd128.h>
#include_next <math.h> // the system math.h
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __wasm_relaxed_simd__
// This header assumes "relaxed fused multiply-add"
// is both faster and more precise.
#define FP_FAST_FMA 1
__attribute__((weak))
double fma(double x, double y, double z) {
// If we get a software implementation from the host,
// this is enough to short circuit it on the 2nd lane.
const v128_t wx = wasm_f64x2_replace_lane(b, 0, x);
const v128_t wy = wasm_f64x2_splat(y);
const v128_t wz = wasm_f64x2_splat(z);
const v128_t wr = wasm_f64x2_relaxed_madd(wx, wy, wz);
return wasm_f64x2_extract_lane(wr, 0);
}
#endif // __wasm_relaxed_simd__
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _WASM_SIMD128_MATH_H

View File

@@ -17,22 +17,31 @@ extern "C" {
__attribute__((weak))
void qsort(void *base, size_t nel, size_t width,
int (*comp)(const void *, const void *)) {
if (width == 0) return;
// If nel is zero, we're required to do nothing.
// If it's one, the array is already sorted.
size_t wnel = width * nel;
size_t gap = nel;
while (gap > 1) {
// Use 64-bit unsigned arithmetic to avoid intermediate overflow.
// Absent overflow, gap will be strictly less than its previous value.
// Once it is one or zero, set it to one: do a final pass, and stop.
gap = (5ull * gap - 1) / 11;
if (gap == 0) gap = 1;
// It'd be undefined behavior for wnel to overflow a size_t;
// or if width is zero: the base pointer would be invalid.
// Since gap is stricly less than nel, we can assume
// wgap is strictly less than wnel.
size_t wgap = width * gap;
__builtin_assume(wgap < wnel);
for (size_t i = wgap; i < wnel; i += width) {
// Even without overflow flags, the overflow builtin helps the compiler.
for (size_t j = i; !__builtin_sub_overflow(j, wgap, &j);) {
char *a = j + (char *)base;
char *b = a + wgap;
if (comp(a, b) <= 0) break;
// This well known loop is automatically vectorized.
size_t s = width;
do {
char tmp = *a;

View File

@@ -25,7 +25,7 @@ void *memset(void *dest, int c, size_t n) {
}
__attribute__((weak))
void *memcpy(void *restrict dest, const void *restrict src, size_t n) {
void *memcpy(void *__restrict dest, const void *__restrict src, size_t n) {
return __builtin_memcpy(dest, src, n);
}
@@ -38,38 +38,46 @@ void *memmove(void *dest, const void *src, size_t n) {
#ifdef __wasm_simd128__
// SIMD versions of some string.h functions.
//
// These assume aligned v128_t loads can't fail,
// and so can't unaligned loads up to the last
// aligned address less than memory size.
//
// These also assume unaligned access is not painfully slow,
// but that bitmask extraction is really slow on AArch64.
// SIMD implementations of string.h functions.
__attribute__((weak))
int memcmp(const void *v1, const void *v2, size_t n) {
// memcmp can read up to n bytes from each object.
// Use unaligned loads to handle the case where
// the objects have mismatching alignments.
const v128_t *w1 = v1;
const v128_t *w2 = v2;
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
// Find any single bit difference.
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
break;
// Baseline algorithm.
if (n < sizeof(v128_t)) {
const unsigned char *u1 = (unsigned char *)v1;
const unsigned char *u2 = (unsigned char *)v2;
while (n--) {
if (*u1 != *u2) return *u1 - *u2;
u1++;
u2++;
}
w1++;
w2++;
return 0;
}
// Continue byte-by-byte.
const unsigned char *u1 = (void *)w1;
const unsigned char *u2 = (void *)w2;
while (n--) {
if (*u1 != *u2) return *u1 - *u2;
u1++;
u2++;
// memcmp is allowed to read up to n bytes from each object.
// Find the first different character in the objects.
// Unaligned loads handle the case where the objects
// have mismatching alignments.
const v128_t *w1 = (v128_t *)v1;
const v128_t *w2 = (v128_t *)v2;
while (n) {
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w1), wasm_v128_load(w2));
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(cmp)) {
// Find the offset of the first zero bit (little-endian).
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
const unsigned char *u1 = (unsigned char *)w1 + ctz;
const unsigned char *u2 = (unsigned char *)w2 + ctz;
// This may help the compiler if the function is inlined.
__builtin_assume(*u1 - *u2 != 0);
return *u1 - *u2;
}
// This makes n a multiple of sizeof(v128_t)
// for every iteration except the first.
size_t align = (n - 1) % sizeof(v128_t) + 1;
w1 = (v128_t *)((char *)w1 + align);
w2 = (v128_t *)((char *)w2 + align);
n -= align;
}
return 0;
}
@@ -77,7 +85,7 @@ int memcmp(const void *v1, const void *v2, size_t n) {
__attribute__((weak))
void *memchr(const void *v, int c, size_t n) {
// When n is zero, a function that locates a character finds no occurrence.
// Otherwise, decrement n to ensure __builtin_sub_overflow "overflows"
// Otherwise, decrement n to ensure sub_overflow overflows
// when n would go equal-to-or-below zero.
if (n-- == 0) {
return NULL;
@@ -85,20 +93,20 @@ void *memchr(const void *v, int c, size_t n) {
// memchr must behave as if it reads characters sequentially
// and stops as soon as a match is found.
// Aligning ensures loads can't fail.
// Aligning ensures loads beyond the first match don't fail.
uintptr_t align = (uintptr_t)v % sizeof(v128_t);
const v128_t *w = (void *)(v - align);
const v128_t *w = (v128_t *)((char *)v - align);
const v128_t wc = wasm_i8x16_splat(c);
while (true) {
for (;;) {
const v128_t cmp = wasm_i8x16_eq(*w, wc);
// Bitmask is slow on AArch64, any_true is much faster.
if (wasm_v128_any_true(cmp)) {
// Clear the bits corresponding to alignment
// Clear the bits corresponding to alignment (little-endian)
// so we can count trailing zeros.
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
// At least one bit will be set, unless we cleared them.
// Knowing this helps the compiler.
// Knowing this helps the compiler.
__builtin_assume(mask || align);
// If the mask is zero because of alignment,
// it's as if we didn't find anything.
@@ -106,10 +114,10 @@ void *memchr(const void *v, int c, size_t n) {
// We found a match, unless it is beyond the end of the object.
// Recall that we decremented n, so less-than-or-equal-to is correct.
size_t ctz = __builtin_ctz(mask);
return ctz <= n + align ? (void *)w + ctz : NULL;
return ctz <= n + align ? (char *)w + ctz : NULL;
}
}
// Decrement n; if it "overflows" we're done.
// Decrement n; if it overflows we're done.
if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) {
return NULL;
}
@@ -118,22 +126,45 @@ void *memchr(const void *v, int c, size_t n) {
}
}
__attribute__((weak))
void *memrchr(const void *v, int c, size_t n) {
// memrchr is allowed to read up to n bytes from the object.
// Search backward for the last matching character.
const v128_t *w = (v128_t *)((char *)v + n);
const v128_t wc = wasm_i8x16_splat(c);
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(--w), wc);
// Bitmask is slow on AArch64, any_true is much faster.
if (wasm_v128_any_true(cmp)) {
size_t clz = __builtin_clz(wasm_i8x16_bitmask(cmp)) - 15;
return (char *)(w + 1) - clz;
}
}
// Baseline algorithm.
const char *a = (char *)w;
while (n--) {
if (*(--a) == (char)c) return (char *)a;
}
return NULL;
}
__attribute__((weak))
size_t strlen(const char *s) {
// strlen must stop as soon as it finds the terminator.
// Aligning ensures loads can't fail.
// Aligning ensures loads beyond the terminator don't fail.
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
const v128_t *w = (void *)(s - align);
const v128_t *w = (v128_t *)(s - align);
while (true) {
for (;;) {
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(*w)) {
const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){});
// Clear the bits corresponding to alignment
// Clear the bits corresponding to alignment (little-endian)
// so we can count trailing zeros.
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
// At least one bit will be set, unless we cleared them.
// Knowing this helps the compiler.
// Knowing this helps the compiler.
__builtin_assume(mask || align);
if (mask) {
return (char *)w - s + __builtin_ctz(mask);
@@ -145,23 +176,23 @@ size_t strlen(const char *s) {
}
static int __strcmp(const char *s1, const char *s2) {
// Set limit to the largest possible valid v128_t pointer.
// Unsigned modular arithmetic gives the correct result
// unless memory size is zero, in which case all pointers are invalid.
const v128_t *const limit =
(v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1;
// How many bytes can be read before pointers go out of bounds.
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
(size_t)(s1 > s2 ? s1 : s2);
// Use unaligned loads to handle the case where
// the strings have mismatching alignments.
const v128_t *w1 = (void *)s1;
const v128_t *w2 = (void *)s2;
while (w1 <= limit && w2 <= limit) {
// Unaligned loads handle the case where the strings
// have mismatching alignments.
const v128_t *w1 = (v128_t *)s1;
const v128_t *w2 = (v128_t *)s2;
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
// Find any single bit difference.
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
// The strings may still be equal,
// if the terminator is found before that difference.
break;
}
// All bytes are equal.
// If any byte is zero (on both strings) the strings are equal.
// All characters are equal.
// If any is a terminator the strings are equal.
if (!wasm_i8x16_all_true(wasm_v128_load(w1))) {
return 0;
}
@@ -169,10 +200,22 @@ static int __strcmp(const char *s1, const char *s2) {
w2++;
}
// Continue byte-by-byte.
const unsigned char *u1 = (void *)w1;
const unsigned char *u2 = (void *)w2;
while (true) {
// Baseline algorithm.
const unsigned char *u1 = (unsigned char *)w1;
const unsigned char *u2 = (unsigned char *)w2;
for (;;) {
if (*u1 != *u2) return *u1 - *u2;
if (*u1 == 0) break;
u1++;
u2++;
}
return 0;
}
static int __strcmp_s(const char *s1, const char *s2) {
const unsigned char *u1 = (unsigned char *)s1;
const unsigned char *u2 = (unsigned char *)s2;
for (;;) {
if (*u1 != *u2) return *u1 - *u2;
if (*u1 == 0) break;
u1++;
@@ -183,33 +226,33 @@ static int __strcmp(const char *s1, const char *s2) {
__attribute__((weak, always_inline))
int strcmp(const char *s1, const char *s2) {
// Use strncmp when comparing against literal strings.
// If the literal is small, the vector search will be skipped.
if (__builtin_constant_p(strlen(s2))) {
return strncmp(s1, s2, strlen(s2));
// Skip the vector search when comparing against small literal strings.
if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) {
return __strcmp_s(s1, s2);
}
return __strcmp(s1, s2);
}
__attribute__((weak))
int strncmp(const char *s1, const char *s2, size_t n) {
// Set limit to the largest possible valid v128_t pointer.
// Unsigned modular arithmetic gives the correct result
// unless memory size is zero, in which case all pointers are invalid.
const v128_t *const limit =
(v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1;
// How many bytes can be read before pointers go out of bounds.
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
(size_t)(s1 > s2 ? s1 : s2);
if (n > N) n = N;
// Use unaligned loads to handle the case where
// the strings have mismatching alignments.
const v128_t *w1 = (void *)s1;
const v128_t *w2 = (void *)s2;
for (; w1 <= limit && w2 <= limit && n >= sizeof(v128_t); n -= sizeof(v128_t)) {
// Unaligned loads handle the case where the strings
// have mismatching alignments.
const v128_t *w1 = (v128_t *)s1;
const v128_t *w2 = (v128_t *)s2;
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
// Find any single bit difference.
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
// The strings may still be equal,
// if the terminator is found before that difference.
break;
}
// All bytes are equal.
// If any byte is zero (on both strings) the strings are equal.
// All characters are equal.
// If any is a terminator the strings are equal.
if (!wasm_i8x16_all_true(wasm_v128_load(w1))) {
return 0;
}
@@ -217,9 +260,9 @@ int strncmp(const char *s1, const char *s2, size_t n) {
w2++;
}
// Continue byte-by-byte.
const unsigned char *u1 = (void *)w1;
const unsigned char *u2 = (void *)w2;
// Baseline algorithm.
const unsigned char *u1 = (unsigned char *)w1;
const unsigned char *u2 = (unsigned char *)w2;
while (n--) {
if (*u1 != *u2) return *u1 - *u2;
if (*u1 == 0) break;
@@ -231,20 +274,20 @@ int strncmp(const char *s1, const char *s2, size_t n) {
static char *__strchrnul(const char *s, int c) {
// strchrnul must stop as soon as a match is found.
// Aligning ensures loads can't fail.
// Aligning ensures loads beyond the first match don't fail.
uintptr_t align = (uintptr_t)s % sizeof(v128_t);
const v128_t *w = (void *)(s - align);
const v128_t *w = (v128_t *)(s - align);
const v128_t wc = wasm_i8x16_splat(c);
while (true) {
for (;;) {
const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){}) | wasm_i8x16_eq(*w, wc);
// Bitmask is slow on AArch64, any_true is much faster.
if (wasm_v128_any_true(cmp)) {
// Clear the bits corresponding to alignment
// Clear the bits corresponding to alignment (little-endian)
// so we can count trailing zeros.
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
// At least one bit will be set, unless we cleared them.
// Knowing this helps the compiler.
// Knowing this helps the compiler.
__builtin_assume(mask || align);
if (mask) {
return (char *)w + __builtin_ctz(mask);
@@ -271,99 +314,221 @@ char *strchr(const char *s, int c) {
return (char *)s + strlen(s);
}
char *r = __strchrnul(s, c);
return *(char *)r == (char)c ? r : NULL;
return *r == (char)c ? r : NULL;
}
__attribute__((weak, always_inline))
char *strrchr(const char *s, int c) {
// For finding the terminator, strlen is faster.
if (__builtin_constant_p(c) && (char)c == 0) {
return (char *)s + strlen(s);
}
// This could also be implemented in a single pass using strchr,
// advancing to the next match until no more matches are found.
// That would be suboptimal with lots of consecutive matches.
return (char *)memrchr(s, c, strlen(s) + 1);
}
// http://0x80.pl/notesen/2018-10-18-simd-byte-lookup.html
#define _WASM_SIMD128_BITMAP256_T \
struct { \
uint8_t l __attribute__((__vector_size__(16), __aligned__(16))); \
uint8_t h __attribute__((__vector_size__(16), __aligned__(16))); \
}
#define _WASM_SIMD128_SETBIT(bitmap, i) \
({ \
uint8_t _c = (uint8_t)(i); \
uint8_t _hi_nibble = _c >> 4; \
uint8_t _lo_nibble = _c & 0xf; \
bitmap.l[_lo_nibble] |= 1 << (_hi_nibble - 0); \
bitmap.h[_lo_nibble] |= 1 << (_hi_nibble - 8); \
})
#define _WASM_SIMD128_CHKBIT(bitmap, i) \
({ \
uint8_t _c = (uint8_t)(i); \
uint8_t _hi_nibble = _c >> 4; \
uint8_t _lo_nibble = _c & 0xf; \
uint8_t _bitmask = 1 << (_hi_nibble & 0x7); \
uint8_t _bitset = (_hi_nibble < 8 ? bitmap.l : bitmap.h)[_lo_nibble]; \
_bitmask & _bitset; \
})
#define _WASM_SIMD128_CHKBITS(bitmap, v) \
({ \
v128_t _w = v; \
v128_t _hi_nibbles = wasm_u8x16_shr(_w, 4); \
v128_t _lo_nibbles = _w & wasm_u8x16_const_splat(0xf); \
\
v128_t _bitmask_lookup = wasm_u8x16_const(1, 2, 4, 8, 16, 32, 64, 128, \
1, 2, 4, 8, 16, 32, 64, 128); \
\
v128_t _bitmask = wasm_i8x16_swizzle(_bitmask_lookup, _hi_nibbles); \
v128_t _bitsets = wasm_v128_bitselect( \
wasm_i8x16_swizzle(bitmap.l, _lo_nibbles), \
wasm_i8x16_swizzle(bitmap.h, _lo_nibbles), \
wasm_i8x16_lt(_hi_nibbles, wasm_u8x16_const_splat(8))); \
\
wasm_i8x16_eq(_bitsets & _bitmask, _bitmask); \
})
__attribute__((weak))
size_t strspn(const char *s, const char *c) {
#ifndef _REENTRANT
static // Avoid the stack for builds without threads.
#endif
char byteset[UCHAR_MAX + 1];
// How many bytes can be read before the pointer goes out of bounds.
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - (size_t)s;
const v128_t *w = (v128_t *)s;
const char *const a = s;
if (!c[0]) return 0;
if (!c[1]) {
// Set limit to the largest possible valid v128_t pointer.
// Unsigned modular arithmetic gives the correct result
// unless memory size is zero, in which case all pointers are invalid.
const v128_t *const limit =
(v128_t *)(__builtin_wasm_memory_size(0) * PAGESIZE) - 1;
const v128_t *w = (void *)s;
const v128_t wc = wasm_i8x16_splat(*c);
while (w <= limit) {
if (!wasm_i8x16_all_true(wasm_i8x16_eq(wasm_v128_load(w), wc))) {
break;
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w), wc);
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(cmp)) {
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
return (char *)w + ctz - s;
}
w++;
}
s = (void *)w;
while (*s == *c) s++;
// Baseline algorithm.
for (s = (char *)w; *s == *c; s++);
return s - a;
}
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
_WASM_SIMD128_BITMAP256_T bitmap = {};
// Unoptimized version.
memset(byteset, 0, sizeof(byteset));
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
while (byteset[*(unsigned char *)s]) s++;
for (; *c; c++) {
_WASM_SIMD128_SETBIT(bitmap, *c);
// Terminator IS NOT on the bitmap.
}
#else
// This is faster than memset.
volatile v128_t *w = (void *)byteset;
#pragma unroll
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
// Keeping byteset[0] = 0 avoids the other loop having to test for it.
while (*c && (byteset[*(unsigned char *)c] = 1)) c++;
#pragma unroll 4
while (byteset[*(unsigned char *)s]) s++;
#endif
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
const v128_t cmp = _WASM_SIMD128_CHKBITS(bitmap, wasm_v128_load(w));
// Bitmask is slow on AArch64, all_true is much faster.
if (!wasm_i8x16_all_true(cmp)) {
size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
return (char *)w + ctz - s;
}
w++;
}
// Baseline algorithm.
for (s = (char *)w; _WASM_SIMD128_CHKBIT(bitmap, *s); s++);
return s - a;
}
__attribute__((weak))
size_t strcspn(const char *s, const char *c) {
#ifndef _REENTRANT
static // Avoid the stack for builds without threads.
#endif
char byteset[UCHAR_MAX + 1];
const char *const a = s;
if (!c[0] || !c[1]) return __strchrnul(s, *c) - s;
#if !__OPTIMIZE__ || __OPTIMIZE_SIZE__
// How many bytes can be read before the pointer goes out of bounds.
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - (size_t)s;
const v128_t *w = (v128_t *)s;
const char *const a = s;
// Unoptimized version.
memset(byteset, 0, sizeof(byteset));
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
while (!byteset[*(unsigned char *)s]) s++;
_WASM_SIMD128_BITMAP256_T bitmap = {};
#else
for (;;) {
_WASM_SIMD128_SETBIT(bitmap, *c);
// Terminator IS on the bitmap.
if (!*c++) break;
}
// This is faster than memset.
volatile v128_t *w = (void *)byteset;
#pragma unroll
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
static_assert(sizeof(byteset) % sizeof(v128_t) == 0);
// Setting byteset[0] = 1 avoids the other loop having to test for it.
while ((byteset[*(unsigned char *)c] = 1) && *c) c++;
#pragma unroll 4
while (!byteset[*(unsigned char *)s]) s++;
#endif
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
const v128_t cmp = _WASM_SIMD128_CHKBITS(bitmap, wasm_v128_load(w));
// Bitmask is slow on AArch64, any_true is much faster.
if (wasm_v128_any_true(cmp)) {
size_t ctz = __builtin_ctz(wasm_i8x16_bitmask(cmp));
return (char *)w + ctz - s;
}
w++;
}
// Baseline algorithm.
for (s = (char *)w; !_WASM_SIMD128_CHKBIT(bitmap, *s); s++);
return s - a;
}
#undef _WASM_SIMD128_SETBIT
#undef _WASM_SIMD128_CHKBIT
#undef _WASM_SIMD128_CHKBITS
#undef _WASM_SIMD128_BITMAP256_T
// Given the above SIMD implementations,
// these are best implemented as
// small wrappers over those functions.
// Simple wrappers already in musl:
// - mempcpy
// - strcat
// - strdup
// - strndup
// - strnlen
// - strpbrk
// - strsep
// - strtok
__attribute__((weak))
void *memccpy(void *__restrict dest, const void *__restrict src, int c, size_t n) {
void *memchr(const void *v, int c, size_t n);
const void *m = memchr(src, c, n);
if (m != NULL) {
n = (char *)m - (char *)src + 1;
m = (char *)dest + n;
}
memcpy(dest, src, n);
return (void *)m;
}
__attribute__((weak))
char *strncat(char *__restrict dest, const char *__restrict src, size_t n) {
size_t strnlen(const char *s, size_t n);
size_t dlen = strlen(dest);
size_t slen = strnlen(src, n);
memcpy(dest + dlen, src, slen);
dest[dlen + slen] = 0;
return dest;
}
static char *__stpcpy(char *__restrict dest, const char *__restrict src) {
size_t slen = strlen(src);
memcpy(dest, src, slen + 1);
return dest + slen;
}
static char *__stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
size_t strnlen(const char *s, size_t n);
size_t slen = strnlen(src, n);
memcpy(dest, src, slen);
memset(dest + slen, 0, n - slen);
return dest + slen;
}
__attribute__((weak, always_inline))
char *stpcpy(char *__restrict dest, const char *__restrict src) {
return __stpcpy(dest, src);
}
char *strcpy(char *__restrict dest, const char *__restrict src) {
__stpcpy(dest, src);
return dest;
}
__attribute__((weak, always_inline))
char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n) {
return __stpncpy(dest, src, n);
}
__attribute__((weak, always_inline))
char *strncpy(char *__restrict dest, const char *__restrict src, size_t n) {
__stpncpy(dest, src, n);
return dest;
}
#endif // __wasm_simd128__
#ifdef __cplusplus

57
sqlite3/libc/strings.h Normal file
View File

@@ -0,0 +1,57 @@
#ifndef _WASM_SIMD128_STRINGS_H
#define _WASM_SIMD128_STRINGS_H
#include <stddef.h>
#include <wasm_simd128.h>
#include_next <strings.h> // the system strings.h
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __wasm_simd128__
__attribute__((weak))
int bcmp(const void *v1, const void *v2, size_t n) {
// bcmp is the same as memcmp but only compares for equality.
// Baseline algorithm.
if (n < sizeof(v128_t)) {
const unsigned char *u1 = (unsigned char *)v1;
const unsigned char *u2 = (unsigned char *)v2;
while (n--) {
if (*u1 != *u2) return 1;
u1++;
u2++;
}
return 0;
}
// bcmp is allowed to read up to n bytes from each object.
// Unaligned loads handle the case where the objects
// have mismatching alignments.
const v128_t *w1 = (v128_t *)v1;
const v128_t *w2 = (v128_t *)v2;
while (n) {
// Find any single bit difference.
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
return 1;
}
// This makes n a multiple of sizeof(v128_t)
// for every iteration except the first.
size_t align = (n - 1) % sizeof(v128_t) + 1;
w1 = (v128_t *)((char *)w1 + align);
w2 = (v128_t *)((char *)w2 + align);
n -= align;
}
return 0;
}
#endif // __wasm_simd128__
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _WASM_SIMD128_STRINGS_H

Binary file not shown.