mirror of
https://github.com/ncruces/go-sqlite3.git
synced 2026-01-12 05:59:14 +00:00
Less SIMD.
This commit is contained in:
@@ -1,41 +0,0 @@
|
||||
# Using SIMD for libc
|
||||
|
||||
I found that implementing some libc functions with Wasm SIMD128 can make them significantly faster.
|
||||
|
||||
Rough numbers for [wazero](https://wazero.io/):
|
||||
|
||||
function | speedup
|
||||
------------ | -----
|
||||
`strlen` | 4.1×
|
||||
`memchr` | 4.1×
|
||||
`strchr` | 4.0×
|
||||
`strrchr` | 9.1×
|
||||
`memcmp` | 13.0×
|
||||
`strcmp` | 10.4×
|
||||
`strncmp` | 15.7×
|
||||
`strcasecmp` | 8.8×
|
||||
`strncasecmp`| 8.6×
|
||||
`strspn` | 9.9×
|
||||
`strcspn` | 9.0×
|
||||
`memmem` | 2.2×
|
||||
`strstr` | 5.5×
|
||||
`strcasestr` | 25.2×
|
||||
|
||||
For functions where musl uses SWAR on a 4-byte `size_t`,
|
||||
the improvement is around 4×.
|
||||
This is very close to the expected theoretical improvement,
|
||||
as we're processing 4× the bytes per cycle (16 _vs._ 4).
|
||||
|
||||
For other functions where there's no algorithmic change,
|
||||
the improvement is around 8×.
|
||||
These functions are harder to optimize
|
||||
(which is why musl doesn't bother with SWAR),
|
||||
so getting an 8× improvement from processing 16× bytes seems decent.
|
||||
|
||||
String search is harder to compare, since there are algorithmic changes,
|
||||
and different needles produce very different numbers.
|
||||
We use [Quick Search](https://igm.univ-mlv.fr/~lecroq/string/node19.html) for `memmem`,
|
||||
and a [Rabin–Karp](https://igm.univ-mlv.fr/~lecroq/string/node5.html) for `strstr` and `strcasestr`;
|
||||
musl uses [Two Way](https://igm.univ-mlv.fr/~lecroq/string/node26.html) for `memmem` and `strstr`,
|
||||
and [brute force](https://igm.univ-mlv.fr/~lecroq/string/node3.html) for `strcasestr`.
|
||||
Unlike Two-Way, both replacements can go quadratic for long, periodic needles.
|
||||
@@ -28,31 +28,18 @@ EOF
|
||||
-Wl,--stack-first \
|
||||
-Wl,--import-undefined \
|
||||
-Wl,--initial-memory=16777216 \
|
||||
-Wl,--export=memccpy \
|
||||
-Wl,--export=memchr \
|
||||
-Wl,--export=memcmp \
|
||||
-Wl,--export=memcpy \
|
||||
-Wl,--export=memmem \
|
||||
-Wl,--export=memmove \
|
||||
-Wl,--export=memrchr \
|
||||
-Wl,--export=memset \
|
||||
-Wl,--export=stpcpy \
|
||||
-Wl,--export=stpncpy \
|
||||
-Wl,--export=strcasecmp \
|
||||
-Wl,--export=strcasestr \
|
||||
-Wl,--export=strchr \
|
||||
-Wl,--export=strchrnul \
|
||||
-Wl,--export=strcmp \
|
||||
-Wl,--export=strcpy \
|
||||
-Wl,--export=strcspn \
|
||||
-Wl,--export=strlen \
|
||||
-Wl,--export=strncasecmp \
|
||||
-Wl,--export=strncat \
|
||||
-Wl,--export=strncmp \
|
||||
-Wl,--export=strncpy \
|
||||
-Wl,--export=strrchr \
|
||||
-Wl,--export=strspn \
|
||||
-Wl,--export=strstr \
|
||||
-Wl,--export=qsort
|
||||
|
||||
"$BINARYEN/wasm-ctor-eval" -g -c _initialize libc.wasm -o libc.tmp
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -24,25 +24,18 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
memory []byte
|
||||
module api.Module
|
||||
memset api.Function
|
||||
memcpy api.Function
|
||||
memchr api.Function
|
||||
memcmp api.Function
|
||||
memmem api.Function
|
||||
strlen api.Function
|
||||
strchr api.Function
|
||||
strcmp api.Function
|
||||
strstr api.Function
|
||||
strspn api.Function
|
||||
strrchr api.Function
|
||||
strncmp api.Function
|
||||
strcspn api.Function
|
||||
strcasecmp api.Function
|
||||
strcasestr api.Function
|
||||
strncasecmp api.Function
|
||||
stack [8]uint64
|
||||
memory []byte
|
||||
module api.Module
|
||||
memset api.Function
|
||||
memcpy api.Function
|
||||
memchr api.Function
|
||||
memcmp api.Function
|
||||
strlen api.Function
|
||||
strchr api.Function
|
||||
strspn api.Function
|
||||
strrchr api.Function
|
||||
strcspn api.Function
|
||||
stack [8]uint64
|
||||
)
|
||||
|
||||
func call(fn api.Function, arg ...uint64) uint64 {
|
||||
@@ -68,18 +61,11 @@ func TestMain(m *testing.M) {
|
||||
memcpy = mod.ExportedFunction("memcpy")
|
||||
memchr = mod.ExportedFunction("memchr")
|
||||
memcmp = mod.ExportedFunction("memcmp")
|
||||
memmem = mod.ExportedFunction("memmem")
|
||||
strlen = mod.ExportedFunction("strlen")
|
||||
strchr = mod.ExportedFunction("strchr")
|
||||
strcmp = mod.ExportedFunction("strcmp")
|
||||
strstr = mod.ExportedFunction("strstr")
|
||||
strspn = mod.ExportedFunction("strspn")
|
||||
strrchr = mod.ExportedFunction("strrchr")
|
||||
strncmp = mod.ExportedFunction("strncmp")
|
||||
strcspn = mod.ExportedFunction("strcspn")
|
||||
strcasecmp = mod.ExportedFunction("strcasecmp")
|
||||
strcasestr = mod.ExportedFunction("strcasestr")
|
||||
strncasecmp = mod.ExportedFunction("strncasecmp")
|
||||
memory, _ = mod.Memory().Read(0, mod.Memory().Size())
|
||||
|
||||
os.Exit(m.Run())
|
||||
@@ -166,58 +152,6 @@ func Benchmark_memcmp(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strcmp(b *testing.B) {
|
||||
clear(memory)
|
||||
fill(memory[ptr1:ptr1+size-1], 7)
|
||||
fill(memory[ptr2:ptr2+size/2], 7)
|
||||
fill(memory[ptr2+size/2:ptr2+size-1], 5)
|
||||
|
||||
b.SetBytes(size/2 + 1)
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(strcmp, ptr1, ptr2, size)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strncmp(b *testing.B) {
|
||||
clear(memory)
|
||||
fill(memory[ptr1:ptr1+size-1], 7)
|
||||
fill(memory[ptr2:ptr2+size/2], 7)
|
||||
fill(memory[ptr2+size/2:ptr2+size-1], 5)
|
||||
|
||||
b.SetBytes(size/2 + 1)
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(strncmp, ptr1, ptr2, size-1)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strcasecmp(b *testing.B) {
|
||||
clear(memory)
|
||||
fill(memory[ptr1:ptr1+size-1], 7)
|
||||
fill(memory[ptr2:ptr2+size/2], 7)
|
||||
fill(memory[ptr2+size/2:ptr2+size-1], 5)
|
||||
|
||||
b.SetBytes(size/2 + 1)
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(strcasecmp, ptr1, ptr2, size)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strncasecmp(b *testing.B) {
|
||||
clear(memory)
|
||||
fill(memory[ptr1:ptr1+size-1], 7)
|
||||
fill(memory[ptr2:ptr2+size/2], 7)
|
||||
fill(memory[ptr2+size/2:ptr2+size-1], 5)
|
||||
|
||||
b.SetBytes(size/2 + 1)
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(strncasecmp, ptr1, ptr2, size-1)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strspn(b *testing.B) {
|
||||
clear(memory)
|
||||
fill(memory[ptr1:ptr1+size/2], 7)
|
||||
@@ -248,51 +182,6 @@ func Benchmark_strcspn(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
//go:embed string.h
|
||||
var source string
|
||||
|
||||
func Benchmark_memmem(b *testing.B) {
|
||||
needle := "memcpy(dest, src, slen)"
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], source)
|
||||
copy(memory[ptr2:], needle)
|
||||
|
||||
b.SetBytes(int64(len(source)))
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(memmem, ptr1, uint64(len(source)), ptr2, uint64(len(needle)))
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strstr(b *testing.B) {
|
||||
needle := "memcpy(dest, src, slen)"
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], source)
|
||||
copy(memory[ptr2:], needle)
|
||||
|
||||
b.SetBytes(int64(len(source)))
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(strstr, ptr1, ptr2)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_strcasestr(b *testing.B) {
|
||||
needle := "MEMCPY(dest, src, slen)"
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], source)
|
||||
copy(memory[ptr2:], needle)
|
||||
|
||||
b.SetBytes(int64(len(source)))
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
call(strcasestr, ptr1, ptr2)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strlen(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for alignment := range 24 {
|
||||
@@ -498,48 +387,6 @@ func Test_memcmp(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strcmp(t *testing.T) {
|
||||
const s1 = compareTest1
|
||||
const s2 = compareTest2
|
||||
|
||||
ptr2 := len(memory) - len(s2) - 1
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], s1)
|
||||
copy(memory[ptr2:], s2)
|
||||
|
||||
for i := range len(s1) + 1 {
|
||||
want := strings.Compare(term(s1[i:]), term(s2[i:]))
|
||||
got := call(strcmp, uint64(ptr1+i), uint64(ptr2+i))
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strcmp(%d, %d) = %d, want %d",
|
||||
ptr1+i, ptr2+i, int32(got), want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strncmp(t *testing.T) {
|
||||
const s1 = compareTest1
|
||||
const s2 = compareTest2
|
||||
|
||||
ptr2 := len(memory) - len(s2) - 1
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], s1)
|
||||
copy(memory[ptr2:], s2)
|
||||
|
||||
for i := range len(s1) + 1 {
|
||||
for j := range len(s1) - i + 1 {
|
||||
want := strings.Compare(term(s1[i:i+j]), term(s2[i:i+j]))
|
||||
got := call(strncmp, uint64(ptr1+i), uint64(ptr2+i), uint64(j))
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strncmp(%d, %d, %d) = %d, want %d",
|
||||
ptr1+i, ptr2+i, j, int32(got), want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strspn(t *testing.T) {
|
||||
for length := range 64 {
|
||||
for pos := range length + 2 {
|
||||
@@ -782,102 +629,6 @@ var searchTests = []searchTest{
|
||||
{"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
|
||||
}
|
||||
|
||||
func Test_memmem(t *testing.T) {
|
||||
tt := append(searchTests,
|
||||
searchTest{"abcABCabc", "A", 3},
|
||||
searchTest{"fofofofofofo\x00foffofoobar", "foffof", 13},
|
||||
searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", 17},
|
||||
)
|
||||
|
||||
for i := range tt {
|
||||
ptr1 := uint64(len(memory) - len(tt[i].haystk))
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], tt[i].haystk)
|
||||
copy(memory[ptr2:], tt[i].needle)
|
||||
|
||||
var want uint64
|
||||
if tt[i].out >= 0 {
|
||||
want = ptr1 + uint64(tt[i].out)
|
||||
}
|
||||
|
||||
got := call(memmem,
|
||||
uint64(ptr1), uint64(len(tt[i].haystk)),
|
||||
uint64(ptr2), uint64(len(tt[i].needle)))
|
||||
if got != want {
|
||||
t.Errorf("memmem(%q, %q) = %d, want %d",
|
||||
tt[i].haystk, tt[i].needle,
|
||||
uint32(got), uint32(want))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strstr(t *testing.T) {
|
||||
tt := append(searchTests,
|
||||
searchTest{"abcABCabc", "A", 3},
|
||||
searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1},
|
||||
searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1},
|
||||
)
|
||||
|
||||
for i := range tt {
|
||||
ptr1 := uint64(len(memory) - len(tt[i].haystk) - 1)
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], tt[i].haystk)
|
||||
copy(memory[ptr2:], tt[i].needle)
|
||||
|
||||
var want uint64
|
||||
if tt[i].out >= 0 {
|
||||
want = ptr1 + uint64(tt[i].out)
|
||||
}
|
||||
|
||||
got := call(strstr, uint64(ptr1), uint64(ptr2))
|
||||
if got != want {
|
||||
t.Errorf("strstr(%q, %q) = %d, want %d",
|
||||
tt[i].haystk, tt[i].needle,
|
||||
uint32(got), uint32(want))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_strcasestr(t *testing.T) {
|
||||
tt := append(searchTests[1:],
|
||||
searchTest{"A", "a", 0},
|
||||
searchTest{"a", "A", 0},
|
||||
searchTest{"Z", "z", 0},
|
||||
searchTest{"z", "Z", 0},
|
||||
searchTest{"@", "`", -1},
|
||||
searchTest{"`", "@", -1},
|
||||
searchTest{"[", "{", -1},
|
||||
searchTest{"{", "[", -1},
|
||||
searchTest{"abcABCabc", "A", 0},
|
||||
searchTest{"fofofofofofofoffofoobarfoo", "FoFFoF", 12},
|
||||
searchTest{"fofofofofofofOffOfoobarfoo", "FoFFoF", 12},
|
||||
searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1},
|
||||
searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1},
|
||||
)
|
||||
|
||||
for i := range tt {
|
||||
ptr1 := uint64(len(memory) - len(tt[i].haystk) - 1)
|
||||
|
||||
clear(memory)
|
||||
copy(memory[ptr1:], tt[i].haystk)
|
||||
copy(memory[ptr2:], tt[i].needle)
|
||||
|
||||
var want uint64
|
||||
if tt[i].out >= 0 {
|
||||
want = ptr1 + uint64(tt[i].out)
|
||||
}
|
||||
|
||||
got := call(strcasestr, uint64(ptr1), uint64(ptr2))
|
||||
if got != want {
|
||||
t.Errorf("strcasestr(%q, %q) = %d, want %d",
|
||||
tt[i].haystk, tt[i].needle,
|
||||
uint32(got), uint32(want))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Fuzz_memchr(f *testing.F) {
|
||||
f.Fuzz(func(t *testing.T, s string, c, i byte) {
|
||||
if len(s) > 128 || int(i) > len(s) {
|
||||
@@ -971,120 +722,6 @@ func Fuzz_memcmp(f *testing.F) {
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_strcmp(f *testing.F) {
|
||||
const s1 = compareTest1
|
||||
const s2 = compareTest2
|
||||
|
||||
for i := range len(compareTest1) + 1 {
|
||||
f.Add(term(s1[i:]), term(s2[i:]))
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, s1, s2 string) {
|
||||
if len(s1) > 128 || len(s2) > 128 {
|
||||
t.SkipNow()
|
||||
}
|
||||
copy(memory[ptr1:], s1)
|
||||
copy(memory[ptr2:], s2)
|
||||
memory[ptr1+len(s1)] = 0
|
||||
memory[ptr2+len(s2)] = 0
|
||||
|
||||
got := call(strcmp, uint64(ptr1), uint64(ptr2))
|
||||
want := strings.Compare(term(s1), term(s2))
|
||||
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strcmp(%q, %q) = %d, want %d",
|
||||
s1, s2, uint32(got), uint32(want))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_strncmp(f *testing.F) {
|
||||
const s1 = compareTest1
|
||||
const s2 = compareTest2
|
||||
|
||||
for i := range len(compareTest1) + 1 {
|
||||
f.Add(term(s1[i:]), term(s2[i:]), byte(len(s1)))
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, s1, s2 string, n byte) {
|
||||
if len(s1) > 128 || len(s2) > 128 {
|
||||
t.SkipNow()
|
||||
}
|
||||
copy(memory[ptr1:], s1)
|
||||
copy(memory[ptr2:], s2)
|
||||
memory[ptr1+len(s1)] = 0
|
||||
memory[ptr2+len(s2)] = 0
|
||||
|
||||
got := call(strncmp, uint64(ptr1), uint64(ptr2), uint64(n))
|
||||
want := bytes.Compare(
|
||||
term(memory[ptr1:][:n]),
|
||||
term(memory[ptr2:][:n]))
|
||||
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strncmp(%q, %q, %d) = %d, want %d",
|
||||
s1, s2, n, uint32(got), uint32(want))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_strcasecmp(f *testing.F) {
|
||||
const s1 = compareTest1
|
||||
const s2 = compareTest2
|
||||
|
||||
for i := range len(compareTest1) + 1 {
|
||||
f.Add(term(s1[i:]), term(s2[i:]))
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, s1, s2 string) {
|
||||
if len(s1) > 128 || len(s2) > 128 {
|
||||
t.SkipNow()
|
||||
}
|
||||
copy(memory[ptr1:], s1)
|
||||
copy(memory[ptr2:], s2)
|
||||
memory[ptr1+len(s1)] = 0
|
||||
memory[ptr2+len(s2)] = 0
|
||||
|
||||
got := call(strcasecmp, uint64(ptr1), uint64(ptr2))
|
||||
want := bytes.Compare(
|
||||
lower(term(memory[ptr1:])),
|
||||
lower(term(memory[ptr2:])))
|
||||
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strcasecmp(%q, %q) = %d, want %d",
|
||||
s1, s2, uint32(got), uint32(want))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_strncasecmp(f *testing.F) {
|
||||
const s1 = compareTest1
|
||||
const s2 = compareTest2
|
||||
|
||||
for i := range len(compareTest1) + 1 {
|
||||
f.Add(term(s1[i:]), term(s2[i:]), byte(len(s1)))
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, s1, s2 string, n byte) {
|
||||
if len(s1) > 128 || len(s2) > 128 {
|
||||
t.SkipNow()
|
||||
}
|
||||
copy(memory[ptr1:], s1)
|
||||
copy(memory[ptr2:], s2)
|
||||
memory[ptr1+len(s1)] = 0
|
||||
memory[ptr2+len(s2)] = 0
|
||||
|
||||
got := call(strncasecmp, uint64(ptr1), uint64(ptr2), uint64(n))
|
||||
want := bytes.Compare(
|
||||
lower(term(memory[ptr1:][:n])),
|
||||
lower(term(memory[ptr2:][:n])))
|
||||
|
||||
if sign(int32(got)) != want {
|
||||
t.Errorf("strncasecmp(%q, %q, %d) = %d, want %d",
|
||||
s1, s2, n, uint32(got), uint32(want))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_strspn(f *testing.F) {
|
||||
for _, t := range searchTests {
|
||||
f.Add(t.haystk, t.needle)
|
||||
@@ -1155,129 +792,6 @@ func Fuzz_strcspn(f *testing.F) {
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_memmem(f *testing.F) {
|
||||
tt := append(searchTests,
|
||||
searchTest{"abcABCabc", "A", 3},
|
||||
searchTest{"fofofofofofo\x00foffofoobar", "foffof", 13},
|
||||
searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", 17},
|
||||
)
|
||||
|
||||
for _, t := range tt {
|
||||
f.Add(t.haystk, t.needle)
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, haystk, needle string) {
|
||||
if len(haystk) > 128 || len(needle) > 128 {
|
||||
t.SkipNow()
|
||||
}
|
||||
copy(memory[ptr1:], haystk)
|
||||
copy(memory[ptr2:], needle)
|
||||
|
||||
got := call(memmem,
|
||||
uint64(ptr1), uint64(len(haystk)),
|
||||
uint64(ptr2), uint64(len(needle)))
|
||||
|
||||
want := strings.Index(haystk, needle)
|
||||
if want >= 0 {
|
||||
want = ptr1 + want
|
||||
} else {
|
||||
want = 0
|
||||
}
|
||||
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("memmem(%q, %q) = %d, want %d",
|
||||
haystk, needle, uint32(got), uint32(want))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_strstr(f *testing.F) {
|
||||
tt := append(searchTests,
|
||||
searchTest{"abcABCabc", "A", 3},
|
||||
searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1},
|
||||
searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1},
|
||||
)
|
||||
|
||||
for _, t := range tt {
|
||||
f.Add(t.haystk, t.needle)
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, haystk, needle string) {
|
||||
if len(haystk) > 128 || len(needle) > 128 {
|
||||
t.SkipNow()
|
||||
}
|
||||
copy(memory[ptr1:], haystk)
|
||||
copy(memory[ptr2:], needle)
|
||||
memory[ptr1+len(haystk)] = 0
|
||||
memory[ptr2+len(needle)] = 0
|
||||
|
||||
got := call(strstr, uint64(ptr1), uint64(ptr2))
|
||||
|
||||
want := strings.Index(term(haystk), term(needle))
|
||||
if want >= 0 {
|
||||
want = ptr1 + want
|
||||
} else {
|
||||
want = 0
|
||||
}
|
||||
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strstr(%q, %q) = %d, want %d",
|
||||
haystk, needle, uint32(got), uint32(want))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Fuzz_strcasestr(f *testing.F) {
|
||||
tt := append(searchTests,
|
||||
searchTest{"A", "a", 0},
|
||||
searchTest{"a", "A", 0},
|
||||
searchTest{"Z", "z", 0},
|
||||
searchTest{"z", "Z", 0},
|
||||
searchTest{"@", "`", -1},
|
||||
searchTest{"`", "@", -1},
|
||||
searchTest{"[", "{", -1},
|
||||
searchTest{"{", "[", -1},
|
||||
searchTest{"abcABCabc", "A", 0},
|
||||
searchTest{"fofofofofofofoffofoobarfoo", "FoFFoF", 12},
|
||||
searchTest{"fofofofofofofOffOfoobarfoo", "FoFFoF", 12},
|
||||
searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1},
|
||||
searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1},
|
||||
)
|
||||
|
||||
for _, t := range tt {
|
||||
f.Add(t.haystk, t.needle)
|
||||
}
|
||||
|
||||
f.Fuzz(func(t *testing.T, haystk, needle string) {
|
||||
if len(haystk) > 128 || len(needle) > 128 {
|
||||
t.SkipNow()
|
||||
}
|
||||
if len(needle) == 0 {
|
||||
t.Skip("musl bug")
|
||||
}
|
||||
copy(memory[ptr1:], haystk)
|
||||
copy(memory[ptr2:], needle)
|
||||
memory[ptr1+len(haystk)] = 0
|
||||
memory[ptr2+len(needle)] = 0
|
||||
|
||||
got := call(strcasestr, uint64(ptr1), uint64(ptr2))
|
||||
|
||||
want := bytes.Index(
|
||||
lower(term(memory[ptr1:])),
|
||||
lower(term(memory[ptr2:])))
|
||||
if want >= 0 {
|
||||
want = ptr1 + want
|
||||
} else {
|
||||
want = 0
|
||||
}
|
||||
|
||||
if uint32(got) != uint32(want) {
|
||||
t.Errorf("strcasestr(%q, %q) = %d, want %d",
|
||||
haystk, needle, uint32(got), uint32(want))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func sign(x int32) int {
|
||||
switch {
|
||||
case x > 0:
|
||||
@@ -1295,15 +809,6 @@ func fill(s []byte, v byte) {
|
||||
}
|
||||
}
|
||||
|
||||
func lower(s []byte) []byte {
|
||||
for i, c := range s {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
s[i] = c - 'A' + 'a'
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func term[T interface{ []byte | string }](s T) T {
|
||||
for i, c := range []byte(s) {
|
||||
if c == 0 {
|
||||
|
||||
@@ -19,17 +19,17 @@ extern "C" {
|
||||
// Clang will intrinsify using SIMD for small, constant N.
|
||||
// For everything else, this helps inlining.
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, always_inline))
|
||||
void *memset(void *dest, int c, size_t n) {
|
||||
return __builtin_memset(dest, c, n);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, always_inline))
|
||||
void *memcpy(void *__restrict dest, const void *__restrict src, size_t n) {
|
||||
return __builtin_memcpy(dest, src, n);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, always_inline))
|
||||
void *memmove(void *dest, const void *src, size_t n) {
|
||||
return __builtin_memmove(dest, src, n);
|
||||
}
|
||||
@@ -80,7 +80,7 @@ int memcmp(const void *vl, const void *vr, size_t n) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, noinline))
|
||||
void *memchr(const void *s, int c, size_t n) {
|
||||
// When n is zero, a function that locates a character finds no occurrence.
|
||||
// Otherwise, decrement n to ensure sub_overflow overflows
|
||||
@@ -126,7 +126,7 @@ void *memchr(const void *s, int c, size_t n) {
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, noinline))
|
||||
void *memrchr(const void *s, int c, size_t n) {
|
||||
// memrchr is allowed to read up to n bytes from the object.
|
||||
// Search backward for the last matching character.
|
||||
@@ -150,7 +150,7 @@ void *memrchr(const void *s, int c, size_t n) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, noinline))
|
||||
size_t strlen(const char *s) {
|
||||
// strlen must stop as soon as it finds the terminator.
|
||||
// Aligning ensures loads beyond the terminator are safe.
|
||||
@@ -180,93 +180,6 @@ size_t strlen(const char *s) {
|
||||
}
|
||||
}
|
||||
|
||||
static int __strcmp_s(const char *s1, const char *s2) {
|
||||
// Scalar algorithm.
|
||||
const unsigned char *u1 = (unsigned char *)s1;
|
||||
const unsigned char *u2 = (unsigned char *)s2;
|
||||
for (;;) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
if (*u1 == 0) break;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __strcmp(const char *s1, const char *s2) {
|
||||
// How many bytes can be read before pointers go out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(size_t)(s1 > s2 ? s1 : s2);
|
||||
|
||||
// Unaligned loads handle the case where the strings
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)s1;
|
||||
const v128_t *w2 = (v128_t *)s2;
|
||||
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
// The terminator may come before the difference.
|
||||
break;
|
||||
}
|
||||
// We know all characters are equal.
|
||||
// If any is a terminator the strings are equal.
|
||||
if (!wasm_i8x16_all_true(wasm_v128_load(w1))) {
|
||||
return 0;
|
||||
}
|
||||
w1++;
|
||||
w2++;
|
||||
}
|
||||
|
||||
return __strcmp_s((char *)w1, (char *)w2);
|
||||
}
|
||||
|
||||
__attribute__((weak, always_inline))
|
||||
int strcmp(const char *s1, const char *s2) {
|
||||
// Skip the vector search when comparing against small literal strings.
|
||||
if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) {
|
||||
return __strcmp_s(s1, s2);
|
||||
}
|
||||
return __strcmp(s1, s2);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
int strncmp(const char *s1, const char *s2, size_t n) {
|
||||
// How many bytes can be read before pointers go out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(size_t)(s1 > s2 ? s1 : s2);
|
||||
if (n > N) n = N;
|
||||
|
||||
// Unaligned loads handle the case where the strings
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)s1;
|
||||
const v128_t *w2 = (v128_t *)s2;
|
||||
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
// The terminator may come before the difference.
|
||||
break;
|
||||
}
|
||||
// We know all characters are equal.
|
||||
// If any is a terminator the strings are equal.
|
||||
if (!wasm_i8x16_all_true(wasm_v128_load(w1))) {
|
||||
return 0;
|
||||
}
|
||||
w1++;
|
||||
w2++;
|
||||
}
|
||||
|
||||
// Scalar algorithm.
|
||||
const unsigned char *u1 = (unsigned char *)w1;
|
||||
const unsigned char *u2 = (unsigned char *)w2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return *u1 - *u2;
|
||||
if (*u1 == 0) break;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char *__strchrnul(const char *s, int c) {
|
||||
// strchrnul must stop as soon as it finds the terminator.
|
||||
// Aligning ensures loads beyond the terminator are safe.
|
||||
@@ -371,7 +284,7 @@ static v128_t __wasm_v128_chkbits(__wasm_v128_bitmap256_t bitmap, v128_t v) {
|
||||
|
||||
#undef wasm_i8x16_relaxed_swizzle
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, noinline))
|
||||
size_t strspn(const char *s, const char *c) {
|
||||
// strspn must stop as soon as it finds the terminator.
|
||||
// Aligning ensures loads beyond the terminator are safe.
|
||||
@@ -433,7 +346,7 @@ size_t strspn(const char *s, const char *c) {
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
__attribute__((weak, noinline))
|
||||
size_t strcspn(const char *s, const char *c) {
|
||||
if (!c[0] || !c[1]) return __strchrnul(s, *c) - s;
|
||||
|
||||
@@ -472,215 +385,6 @@ size_t strcspn(const char *s, const char *c) {
|
||||
}
|
||||
}
|
||||
|
||||
// SIMD-friendly algorithms for substring searching
|
||||
// http://0x80.pl/notesen/2016-11-28-simd-strfind.html
|
||||
|
||||
// For haystacks of known length and large enough needles,
|
||||
// Boyer-Moore's bad-character rule may be useful,
|
||||
// as proposed by Horspool, Sunday and Raita.
|
||||
//
|
||||
// We augment the SIMD algorithm with Quick Search's
|
||||
// bad-character shift.
|
||||
//
|
||||
// https://igm.univ-mlv.fr/~lecroq/string/node14.html
|
||||
// https://igm.univ-mlv.fr/~lecroq/string/node18.html
|
||||
// https://igm.univ-mlv.fr/~lecroq/string/node19.html
|
||||
// https://igm.univ-mlv.fr/~lecroq/string/node22.html
|
||||
|
||||
static const char *__memmem(const char *haystk, size_t sh, //
|
||||
const char *needle, size_t sn, //
|
||||
uint8_t bmbc[256]) {
|
||||
// We've handled empty and single character needles.
|
||||
// The needle is not longer than the haystack.
|
||||
__builtin_assume(2 <= sn && sn <= sh);
|
||||
|
||||
// Find the farthest character not equal to the first one.
|
||||
size_t i = sn - 1;
|
||||
while (i > 0 && needle[0] == needle[i]) i--;
|
||||
if (i == 0) i = sn - 1;
|
||||
|
||||
// Subtracting ensures sub_overflow overflows
|
||||
// when we reach the end of the haystack.
|
||||
if (sh != SIZE_MAX) sh -= sn;
|
||||
|
||||
const v128_t fst = wasm_i8x16_splat(needle[0]);
|
||||
const v128_t lst = wasm_i8x16_splat(needle[i]);
|
||||
|
||||
// The last haystack offset for which loading blk_lst is safe.
|
||||
const char *H = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(sizeof(v128_t) + i));
|
||||
|
||||
while (haystk <= H) {
|
||||
const v128_t blk_fst = wasm_v128_load((v128_t *)(haystk));
|
||||
const v128_t blk_lst = wasm_v128_load((v128_t *)(haystk + i));
|
||||
const v128_t eq_fst = wasm_i8x16_eq(fst, blk_fst);
|
||||
const v128_t eq_lst = wasm_i8x16_eq(lst, blk_lst);
|
||||
|
||||
const v128_t cmp = eq_fst & eq_lst;
|
||||
if (wasm_v128_any_true(cmp)) {
|
||||
// The terminator may come before the match.
|
||||
if (sh == SIZE_MAX && !wasm_i8x16_all_true(blk_fst)) break;
|
||||
// Find the offset of the first one bit (little-endian).
|
||||
// Each iteration clears that bit, tries again.
|
||||
for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) {
|
||||
size_t ctz = __builtin_ctz(mask);
|
||||
// The match may be after the end of the haystack.
|
||||
if (ctz > sh) return NULL;
|
||||
// We know the first character matches.
|
||||
if (!bcmp(haystk + ctz + 1, needle + 1, sn - 1)) {
|
||||
return haystk + ctz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t skip = sizeof(v128_t);
|
||||
if (sh == SIZE_MAX) {
|
||||
// Have we reached the end of the haystack?
|
||||
if (!wasm_i8x16_all_true(blk_fst)) return NULL;
|
||||
} else {
|
||||
// Apply the bad-character rule to the character to the right
|
||||
// of the righmost character of the search window.
|
||||
if (bmbc) skip += bmbc[(unsigned char)haystk[sn - 1 + sizeof(v128_t)]];
|
||||
// Have we reached the end of the haystack?
|
||||
if (__builtin_sub_overflow(sh, skip, &sh)) return NULL;
|
||||
}
|
||||
haystk += skip;
|
||||
}
|
||||
|
||||
// Scalar algorithm.
|
||||
for (size_t j = 0; j <= sh; j++) {
|
||||
for (size_t i = 0;; i++) {
|
||||
if (sn == i) return haystk;
|
||||
if (sh == SIZE_MAX && !haystk[i]) return NULL;
|
||||
if (needle[i] != haystk[i]) break;
|
||||
}
|
||||
haystk++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) {
|
||||
// Return immediately on empty needle.
|
||||
if (sn == 0) return (void *)vh;
|
||||
|
||||
// Return immediately when needle is longer than haystack.
|
||||
if (sn > sh) return NULL;
|
||||
|
||||
// Skip to the first matching character using memchr,
|
||||
// thereby handling single character needles.
|
||||
const char *needle = (char *)vn;
|
||||
const char *haystk = (char *)memchr(vh, *needle, sh);
|
||||
if (!haystk || sn == 1) return (void *)haystk;
|
||||
|
||||
// The haystack got shorter, is the needle now longer than it?
|
||||
sh -= haystk - (char *)vh;
|
||||
if (sn > sh) return NULL;
|
||||
|
||||
// Is Boyer-Moore's bad-character rule useful?
|
||||
if (sn < sizeof(v128_t) || sh - sn < sizeof(v128_t)) {
|
||||
return (void *)__memmem(haystk, sh, needle, sn, NULL);
|
||||
}
|
||||
|
||||
// Compute Boyer-Moore's bad-character shift function.
|
||||
// Only the last 255 characters of the needle matter for shifts up to 255,
|
||||
// which is good enough for most needles.
|
||||
size_t c = sn;
|
||||
size_t i = 0;
|
||||
if (c >= 255) {
|
||||
i = sn - 255;
|
||||
c = 255;
|
||||
}
|
||||
|
||||
#ifndef _REENTRANT
|
||||
static
|
||||
#endif
|
||||
uint8_t bmbc[256];
|
||||
memset(bmbc, c, sizeof(bmbc));
|
||||
for (; i < sn; i++) {
|
||||
// One less than the usual offset because
|
||||
// we advance at least one vector at a time.
|
||||
bmbc[(unsigned char)needle[i]] = sn - i - 1;
|
||||
}
|
||||
|
||||
return (void *)__memmem(haystk, sh, needle, sn, bmbc);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
char *strstr(const char *haystk, const char *needle) {
|
||||
// Return immediately on empty needle.
|
||||
if (!needle[0]) return (char *)haystk;
|
||||
|
||||
// Skip to the first matching character using strchr,
|
||||
// thereby handling single character needles.
|
||||
haystk = strchr(haystk, *needle);
|
||||
if (!haystk || !needle[1]) return (char *)haystk;
|
||||
|
||||
return (char *)__memmem(haystk, SIZE_MAX, needle, strlen(needle), NULL);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
char *strcasestr(const char *haystk, const char *needle) {
|
||||
// Return immediately on empty needle.
|
||||
if (!needle[0]) return (char *)haystk;
|
||||
|
||||
// We've handled empty needles.
|
||||
size_t sn = strlen(needle);
|
||||
__builtin_assume(sn >= 1);
|
||||
|
||||
// Find the farthest character not equal to the first one.
|
||||
size_t i = sn - 1;
|
||||
while (i > 0 && needle[0] == needle[i]) i--;
|
||||
if (i == 0) i = sn - 1;
|
||||
|
||||
const v128_t fstl = wasm_i8x16_splat(tolower(needle[0]));
|
||||
const v128_t fstu = wasm_i8x16_splat(toupper(needle[0]));
|
||||
const v128_t lstl = wasm_i8x16_splat(tolower(needle[i]));
|
||||
const v128_t lstu = wasm_i8x16_splat(toupper(needle[i]));
|
||||
|
||||
// The last haystk offset for which loading blk_lst is safe.
|
||||
const char *H = (char *)(__builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(sizeof(v128_t) + i));
|
||||
|
||||
while (haystk <= H) {
|
||||
const v128_t blk_fst = wasm_v128_load((v128_t *)(haystk));
|
||||
const v128_t blk_lst = wasm_v128_load((v128_t *)(haystk + i));
|
||||
const v128_t eq_fst =
|
||||
wasm_i8x16_eq(fstl, blk_fst) | wasm_i8x16_eq(fstu, blk_fst);
|
||||
const v128_t eq_lst =
|
||||
wasm_i8x16_eq(lstl, blk_lst) | wasm_i8x16_eq(lstu, blk_lst);
|
||||
|
||||
const v128_t cmp = eq_fst & eq_lst;
|
||||
if (wasm_v128_any_true(cmp)) {
|
||||
// The terminator may come before the match.
|
||||
if (!wasm_i8x16_all_true(blk_fst)) break;
|
||||
// Find the offset of the first one bit (little-endian).
|
||||
// Each iteration clears that bit, tries again.
|
||||
for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) {
|
||||
size_t ctz = __builtin_ctz(mask);
|
||||
if (!strncasecmp(haystk + ctz + 1, needle + 1, sn - 1)) {
|
||||
return (char *)haystk + ctz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Have we reached the end of the haystack?
|
||||
if (!wasm_i8x16_all_true(blk_fst)) return NULL;
|
||||
haystk += sizeof(v128_t);
|
||||
}
|
||||
|
||||
// Scalar algorithm.
|
||||
for (;;) {
|
||||
for (size_t i = 0;; i++) {
|
||||
if (sn == i) return (char *)haystk;
|
||||
if (!haystk[i]) return NULL;
|
||||
if (tolower(needle[i]) != tolower(haystk[i])) break;
|
||||
}
|
||||
haystk++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Given the above SIMD implementations,
|
||||
// these are best implemented as
|
||||
// small wrappers over those functions.
|
||||
|
||||
@@ -1,172 +0,0 @@
|
||||
#include_next <strings.h> // the system strings.h
|
||||
|
||||
#ifndef _WASM_SIMD128_STRINGS_H
|
||||
#define _WASM_SIMD128_STRINGS_H
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdint.h>
|
||||
#include <wasm_simd128.h>
|
||||
#include <__macro_PAGESIZE.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __wasm_simd128__
|
||||
|
||||
#ifdef __OPTIMIZE_SIZE__
|
||||
|
||||
// bcmp is the same as memcmp but only compares for equality.
|
||||
int bcmp(const void *v1, const void *v2, size_t n);
|
||||
|
||||
#else // __OPTIMIZE_SIZE__
|
||||
|
||||
__attribute__((weak))
|
||||
int bcmp(const void *v1, const void *v2, size_t n) {
|
||||
// Scalar algorithm.
|
||||
if (n < sizeof(v128_t)) {
|
||||
const unsigned char *u1 = (unsigned char *)v1;
|
||||
const unsigned char *u2 = (unsigned char *)v2;
|
||||
while (n--) {
|
||||
if (*u1 != *u2) return 1;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// bcmp is allowed to read up to n bytes from each object.
|
||||
// Unaligned loads handle the case where the objects
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)v1;
|
||||
const v128_t *w2 = (v128_t *)v2;
|
||||
while (n) {
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
|
||||
return 1;
|
||||
}
|
||||
// This makes n a multiple of sizeof(v128_t)
|
||||
// for every iteration except the first.
|
||||
size_t align = (n - 1) % sizeof(v128_t) + 1;
|
||||
w1 = (v128_t *)((char *)w1 + align);
|
||||
w2 = (v128_t *)((char *)w2 + align);
|
||||
n -= align;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __OPTIMIZE_SIZE__
|
||||
|
||||
__attribute__((always_inline))
|
||||
static v128_t __tolower8x16(v128_t v) {
|
||||
__i8x16 i = v;
|
||||
i = i + wasm_i8x16_splat(INT8_MAX - ('Z'));
|
||||
i = i > wasm_i8x16_splat(INT8_MAX - ('Z' - 'A' + 1));
|
||||
i = i & wasm_i8x16_splat('a' - 'A');
|
||||
return v | i;
|
||||
}
|
||||
|
||||
static int __strcasecmp_s(const char *s1, const char *s2) {
|
||||
// Scalar algorithm.
|
||||
const unsigned char *u1 = (unsigned char *)s1;
|
||||
const unsigned char *u2 = (unsigned char *)s2;
|
||||
for (;;) {
|
||||
int c1 = tolower(*u1);
|
||||
int c2 = tolower(*u2);
|
||||
if (c1 != c2) return c1 - c2;
|
||||
if (c1 == 0) break;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __strcasecmp(const char *s1, const char *s2) {
|
||||
// How many bytes can be read before pointers go out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(size_t)(s1 > s2 ? s1 : s2);
|
||||
|
||||
// Unaligned loads handle the case where the strings
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)s1;
|
||||
const v128_t *w2 = (v128_t *)s2;
|
||||
for (; N >= sizeof(v128_t); N -= sizeof(v128_t)) {
|
||||
v128_t v1 = __tolower8x16(wasm_v128_load(w1));
|
||||
v128_t v2 = __tolower8x16(wasm_v128_load(w2));
|
||||
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(v1 ^ v2)) {
|
||||
// The terminator may come before the difference.
|
||||
break;
|
||||
}
|
||||
// We know all characters are equal.
|
||||
// If any is a terminator the strings are equal.
|
||||
if (!wasm_i8x16_all_true(v1)) {
|
||||
return 0;
|
||||
}
|
||||
w1++;
|
||||
w2++;
|
||||
}
|
||||
|
||||
return __strcasecmp_s((char *)w1, (char *)w2);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
int strcasecmp(const char *s1, const char *s2) {
|
||||
// Skip the vector search when comparing against small literal strings.
|
||||
if (__builtin_constant_p(strlen(s2)) && strlen(s2) < sizeof(v128_t)) {
|
||||
return __strcasecmp_s(s1, s2);
|
||||
}
|
||||
return __strcasecmp(s1, s2);
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
int strncasecmp(const char *s1, const char *s2, size_t n) {
|
||||
// How many bytes can be read before pointers go out of bounds.
|
||||
size_t N = __builtin_wasm_memory_size(0) * PAGESIZE - //
|
||||
(size_t)(s1 > s2 ? s1 : s2);
|
||||
if (n > N) n = N;
|
||||
|
||||
// Unaligned loads handle the case where the strings
|
||||
// have mismatching alignments.
|
||||
const v128_t *w1 = (v128_t *)s1;
|
||||
const v128_t *w2 = (v128_t *)s2;
|
||||
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
|
||||
v128_t v1 = __tolower8x16(wasm_v128_load(w1));
|
||||
v128_t v2 = __tolower8x16(wasm_v128_load(w2));
|
||||
|
||||
// Find any single bit difference.
|
||||
if (wasm_v128_any_true(v1 ^ v2)) {
|
||||
// The terminator may come before the difference.
|
||||
break;
|
||||
}
|
||||
// We know all characters are equal.
|
||||
// If any is a terminator the strings are equal.
|
||||
if (!wasm_i8x16_all_true(v1)) {
|
||||
return 0;
|
||||
}
|
||||
w1++;
|
||||
w2++;
|
||||
}
|
||||
|
||||
// Scalar algorithm.
|
||||
const unsigned char *u1 = (unsigned char *)w1;
|
||||
const unsigned char *u2 = (unsigned char *)w2;
|
||||
while (n--) {
|
||||
int c1 = tolower(*u1);
|
||||
int c2 = tolower(*u2);
|
||||
if (c1 != c2) return c1 - c2;
|
||||
if (c1 == 0) break;
|
||||
u1++;
|
||||
u2++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // __wasm_simd128__
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // _WASM_SIMD128_STRINGS_H
|
||||
@@ -1,41 +0,0 @@
|
||||
# Use strcasecmp and strncasecmp.
|
||||
--- sqlite3.c.orig
|
||||
+++ sqlite3.c
|
||||
@@ -35685,35 +35685,15 @@
|
||||
return sqlite3StrICmp(zLeft, zRight);
|
||||
}
|
||||
SQLITE_PRIVATE int sqlite3StrICmp(const char *zLeft, const char *zRight){
|
||||
- unsigned char *a, *b;
|
||||
- int c, x;
|
||||
- a = (unsigned char *)zLeft;
|
||||
- b = (unsigned char *)zRight;
|
||||
- for(;;){
|
||||
- c = *a;
|
||||
- x = *b;
|
||||
- if( c==x ){
|
||||
- if( c==0 ) break;
|
||||
- }else{
|
||||
- c = (int)UpperToLower[c] - (int)UpperToLower[x];
|
||||
- if( c ) break;
|
||||
- }
|
||||
- a++;
|
||||
- b++;
|
||||
- }
|
||||
- return c;
|
||||
+ return strcasecmp(zLeft, zRight);
|
||||
}
|
||||
SQLITE_API int sqlite3_strnicmp(const char *zLeft, const char *zRight, int N){
|
||||
- register unsigned char *a, *b;
|
||||
if( zLeft==0 ){
|
||||
return zRight ? -1 : 0;
|
||||
}else if( zRight==0 ){
|
||||
return 1;
|
||||
}
|
||||
- a = (unsigned char *)zLeft;
|
||||
- b = (unsigned char *)zRight;
|
||||
- while( N-- > 0 && *a!=0 && UpperToLower[*a]==UpperToLower[*b]){ a++; b++; }
|
||||
- return N<0 ? 0 : UpperToLower[*a] - UpperToLower[*b];
|
||||
+ return strncasecmp(zLeft, zRight, N);
|
||||
}
|
||||
|
||||
/*
|
||||
Reference in New Issue
Block a user