From 341bd063e8fb75d921a8c0cf90f11a11f3dab4b1 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Tue, 20 May 2025 11:58:40 +0100 Subject: [PATCH] More fuzzing. --- .github/workflows/test.yml | 4 +- sqlite3/libc/libc.wasm | Bin 6394 -> 6398 bytes sqlite3/libc/libc.wat | 213 +++++++++++----------- sqlite3/libc/libc_test.go | 361 +++++++++++++++++++++++++++++++------ sqlite3/libc/string.h | 17 +- sqlite3/libc/strings.h | 2 +- 6 files changed, 424 insertions(+), 173 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5651bc9..215b399 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -112,7 +112,7 @@ jobs: version: '10.1' flags: '-test.v -test.short' - name: openbsd - version: '7.6' + version: '7.7' flags: '-test.v -test.short' runs-on: ubuntu-latest needs: test @@ -128,7 +128,7 @@ jobs: run: .github/workflows/build-test.sh - name: Test - uses: cross-platform-actions/action@v0.27.0 + uses: cross-platform-actions/action@v0.28.0 with: operating_system: ${{ matrix.os.name }} architecture: ${{ matrix.os.arch }} diff --git a/sqlite3/libc/libc.wasm b/sqlite3/libc/libc.wasm index e9962648748b65a3557011bb65de7a59f477ea15..ba5147f25433cbe509f473668ca2881f245b1e76 100755 GIT binary patch delta 255 zcmXAjF;2rk6h!~u^{$Q9PU1jE!R*pPkrUuWa$KOC0t#eTKst&&gnQ0_6mbDcI!X?~ z1(1UH4$YV5X=eWXtbSLoPt4!DEq~7WZCuya7cqgM2XIX5Q_reV4d{yEVhcOO#SmsU zSp)W_WH-Ll&3R1Tpx)oMhsZ;vaI(J(d0UYdAM*QP#-rtXVM^s9&E5F5JWB?=sc4cg zoU^df5%zEkWMZ@ZB~fsXt-Nti|GJYw6uI{#6SxnO-c+j84jOg> SwwroEzHlhq2dmxlaWa2atSsmN delta 247 zcmX9&J8r^25S`hz*GS9y0}&~i-4;>!1lf&55Cx}zBCTYS|I4or#J}?quNi)~Z^i3Oc;R0l 128 || len(s1) != len(s2) { + t.SkipNow() + } + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + + got := call(memcmp, uint64(ptr1), uint64(ptr2), uint64(len(s1))) + want := strings.Compare(s1, s2) + + if sign(int32(got)) != want { + t.Errorf("memcmp(%q, %q) = %d, want %d", + s1, s2, uint32(got), uint32(want)) + } + }) +} + +func Fuzz_strcmp(f *testing.F) { + const s1 = compareTest1 + const s2 = compareTest2 + + for i := range len(compareTest1) + 1 { + f.Add(term(s1[i:]), term(s2[i:])) + } + + f.Fuzz(func(t *testing.T, s1, s2 string) { + if len(s1) > 128 || len(s2) > 128 { + t.SkipNow() + } + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + memory[ptr1+len(s1)] = 0 + memory[ptr2+len(s2)] = 0 + + got := call(strcmp, uint64(ptr1), uint64(ptr2)) + want := strings.Compare(term(s1), term(s2)) + + if sign(int32(got)) != want { + t.Errorf("strcmp(%q, %q) = %d, want %d", + s1, s2, uint32(got), uint32(want)) + } + }) +} + +func Fuzz_strncmp(f *testing.F) { + const s1 = compareTest1 + const s2 = compareTest2 + + for i := range len(compareTest1) + 1 { + f.Add(term(s1[i:]), term(s2[i:]), uint8(len(s1))) + } + + f.Fuzz(func(t *testing.T, s1, s2 string, n uint8) { + if len(s1) > 128 || len(s2) > 128 { + t.SkipNow() + } + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + memory[ptr1+len(s1)] = 0 + memory[ptr2+len(s2)] = 0 + + got := call(strncmp, uint64(ptr1), uint64(ptr2), uint64(n)) + want := bytes.Compare( + term(memory[ptr1:][:n]), + term(memory[ptr2:][:n])) + + if sign(int32(got)) != want { + t.Errorf("strncmp(%q, %q, %d) = %d, want %d", + s1, s2, n, uint32(got), uint32(want)) + } + }) +} + +func Fuzz_strcasecmp(f *testing.F) { + const s1 = compareTest1 + const s2 = compareTest2 + + for i := range len(compareTest1) + 1 { + f.Add(term(s1[i:]), term(s2[i:])) + } + + f.Fuzz(func(t *testing.T, s1, s2 string) { + if len(s1) > 128 || len(s2) > 128 { + t.SkipNow() + } + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + memory[ptr1+len(s1)] = 0 + memory[ptr2+len(s2)] = 0 + + got := call(strcasecmp, uint64(ptr1), uint64(ptr2)) + want := bytes.Compare( + lower(term(memory[ptr1:])), + lower(term(memory[ptr2:]))) + + if sign(int32(got)) != want { + t.Errorf("strcasecmp(%q, %q) = %d, want %d", + s1, s2, uint32(got), uint32(want)) + } + }) +} + +func Fuzz_strncasecmp(f *testing.F) { + const s1 = compareTest1 + const s2 = compareTest2 + + for i := range len(compareTest1) + 1 { + f.Add(term(s1[i:]), term(s2[i:]), uint8(len(s1))) + } + + f.Fuzz(func(t *testing.T, s1, s2 string, n uint8) { + if len(s1) > 128 || len(s2) > 128 { + t.SkipNow() + } + copy(memory[ptr1:], s1) + copy(memory[ptr2:], s2) + memory[ptr1+len(s1)] = 0 + memory[ptr2+len(s2)] = 0 + + got := call(strncasecmp, uint64(ptr1), uint64(ptr2), uint64(n)) + want := bytes.Compare( + lower(term(memory[ptr1:][:n])), + lower(term(memory[ptr2:][:n]))) + + if sign(int32(got)) != want { + t.Errorf("strncasecmp(%q, %q, %d) = %d, want %d", + s1, s2, n, uint32(got), uint32(want)) + } + }) +} + +func Fuzz_strspn(f *testing.F) { + for _, t := range searchTests { + f.Add(t.haystk, t.needle) + } + + f.Fuzz(func(t *testing.T, text, chars string) { + if len(text) > 128 || len(chars) > 128 { + t.SkipNow() + } + copy(memory[ptr1:], text) + copy(memory[ptr2:], chars) + memory[ptr1+len(text)] = 0 + memory[ptr2+len(chars)] = 0 + + got := call(strspn, uint64(ptr1), uint64(ptr2)) + + text = term(text) + chars = term(chars) + want := strings.IndexFunc(text, func(r rune) bool { + if uint32(r) >= utf8.RuneSelf { + t.Skip() + } + return strings.IndexByte(chars, byte(r)) < 0 + }) + if want < 0 { + want = len(text) + } + + if uint32(got) != uint32(want) { + t.Errorf("strspn(%q, %q) = %d, want %d", + text, chars, uint32(got), uint32(want)) + } + }) +} + +func Fuzz_strcspn(f *testing.F) { + for _, t := range searchTests { + f.Add(t.haystk, t.needle) + } + + f.Fuzz(func(t *testing.T, text, chars string) { + if len(text) > 128 || len(chars) > 128 { + t.SkipNow() + } + copy(memory[ptr1:], text) + copy(memory[ptr2:], chars) + memory[ptr1+len(text)] = 0 + memory[ptr2+len(chars)] = 0 + + got := call(strcspn, uint64(ptr1), uint64(ptr2)) + + text = term(text) + chars = term(chars) + want := strings.IndexFunc(text, func(r rune) bool { + if uint32(r) >= utf8.RuneSelf { + t.Skip() + } + return strings.IndexByte(chars, byte(r)) >= 0 + }) + if want < 0 { + want = len(text) + } + + if uint32(got) != uint32(want) { + t.Errorf("strcspn(%q, %q) = %d, want %d", + text, chars, uint32(got), uint32(want)) + } + }) +} + func Fuzz_memmem(f *testing.F) { tt := append(searchTests, searchTest{"abcABCabc", "A", 3}, @@ -910,19 +1094,21 @@ func Fuzz_memmem(f *testing.F) { searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", 17}, ) - for i := range tt { - f.Add(tt[i].haystk, tt[i].needle) + for _, t := range tt { + f.Add(t.haystk, t.needle) } f.Fuzz(func(t *testing.T, haystk, needle string) { - if len(haystk) > 128 || len(needle) > 32 { + if len(haystk) > 128 || len(needle) > 128 { t.SkipNow() } - clear(memory[ptr1 : ptr1+256]) - clear(memory[ptr2 : ptr2+256]) copy(memory[ptr1:], haystk) copy(memory[ptr2:], needle) + got := call(memmem, + uint64(ptr1), uint64(len(haystk)), + uint64(ptr2), uint64(len(needle))) + want := strings.Index(haystk, needle) if want >= 0 { want = ptr1 + want @@ -930,9 +1116,6 @@ func Fuzz_memmem(f *testing.F) { want = 0 } - got := call(memmem, - uint64(ptr1), uint64(len(haystk)), - uint64(ptr2), uint64(len(needle))) if uint32(got) != uint32(want) { t.Errorf("memmem(%q, %q) = %d, want %d", haystk, needle, uint32(got), uint32(want)) @@ -947,18 +1130,20 @@ func Fuzz_strstr(f *testing.F) { searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, ) - for i := range tt { - f.Add(tt[i].haystk, tt[i].needle) + for _, t := range tt { + f.Add(t.haystk, t.needle) } f.Fuzz(func(t *testing.T, haystk, needle string) { - if len(haystk) > 128 || len(needle) > 32 { + if len(haystk) > 128 || len(needle) > 128 { t.SkipNow() } - clear(memory[ptr1 : ptr1+256]) - clear(memory[ptr2 : ptr2+256]) copy(memory[ptr1:], haystk) copy(memory[ptr2:], needle) + memory[ptr1+len(haystk)] = 0 + memory[ptr2+len(needle)] = 0 + + got := call(strstr, uint64(ptr1), uint64(ptr2)) want := strings.Index(term(haystk), term(needle)) if want >= 0 { @@ -967,7 +1152,6 @@ func Fuzz_strstr(f *testing.F) { want = 0 } - got := call(strstr, uint64(ptr1), uint64(ptr2)) if uint32(got) != uint32(want) { t.Errorf("strstr(%q, %q) = %d, want %d", haystk, needle, uint32(got), uint32(want)) @@ -975,10 +1159,52 @@ func Fuzz_strstr(f *testing.F) { }) } -func fill(s []byte, v byte) { - for i := range s { - s[i] = v +func Fuzz_strcasestr(f *testing.F) { + tt := append(searchTests, + searchTest{"A", "a", 0}, + searchTest{"a", "A", 0}, + searchTest{"Z", "z", 0}, + searchTest{"z", "Z", 0}, + searchTest{"@", "`", -1}, + searchTest{"`", "@", -1}, + searchTest{"[", "{", -1}, + searchTest{"{", "[", -1}, + searchTest{"abcABCabc", "A", 0}, + searchTest{"fofofofofofofoffofoobarfoo", "FoFFoF", 12}, + searchTest{"fofofofofofofOffOfoobarfoo", "FoFFoF", 12}, + searchTest{"fofofofofofo\x00foffofoobar", "foffof", -1}, + searchTest{"0000000000000000\x000123456789012345678901234567890", "0123456789012345", -1}, + ) + + for _, t := range tt { + f.Add(t.haystk, t.needle) } + + f.Fuzz(func(t *testing.T, haystk, needle string) { + if len(haystk) > 128 || len(needle) > 128 { + t.SkipNow() + } + copy(memory[ptr1:], haystk) + copy(memory[ptr2:], needle) + memory[ptr1+len(haystk)] = 0 + memory[ptr2+len(needle)] = 0 + + got := call(strcasestr, uint64(ptr1), uint64(ptr2)) + + want := bytes.Index( + lower(term(memory[ptr1:])), + lower(term(memory[ptr2:]))) + if want >= 0 { + want = ptr1 + want + } else { + want = 0 + } + + if uint32(got) != uint32(want) { + t.Errorf("strcasestr(%q, %q) = %d, want %d", + haystk, needle, uint32(got), uint32(want)) + } + }) } func sign(x int32) int { @@ -992,9 +1218,26 @@ func sign(x int32) int { } } -func term(s string) string { - if i := strings.IndexByte(s, 0); i >= 0 { - return s[:i] +func fill(s []byte, v byte) { + for i := range s { + s[i] = v + } +} + +func lower(s []byte) []byte { + for i, c := range s { + if 'A' <= c && c <= 'Z' { + s[i] = c - 'A' + 'a' + } + } + return s +} + +func term[T interface{ []byte | string }](s T) T { + for i, c := range []byte(s) { + if c == 0 { + return s[:i] + } } return s } diff --git a/sqlite3/libc/string.h b/sqlite3/libc/string.h index 98ce011..fed1c8f 100644 --- a/sqlite3/libc/string.h +++ b/sqlite3/libc/string.h @@ -463,8 +463,7 @@ size_t strcspn(const char *s, const char *c) { // as proposed by Horspool, Sunday and Raita. // // We augment the SIMD algorithm with Quick Search's -// bad-character shift. This does NOT depend on the order -// in which the window matched. +// bad-character shift. // // https://www-igm.univ-mlv.fr/~lecroq/string/node14.html // https://www-igm.univ-mlv.fr/~lecroq/string/node18.html @@ -483,6 +482,10 @@ static const char *__memmem(const char *haystk, size_t sh, while (i > 0 && needle[0] == needle[i]) i--; if (i == 0) i = sn - 1; + // Subtracting ensures sub_overflow overflows + // when we reach the end of the haystack. + if (sh != SIZE_MAX) sh -= sn; + const v128_t fst = wasm_i8x16_splat(needle[0]); const v128_t lst = wasm_i8x16_splat(needle[i]); @@ -505,7 +508,8 @@ static const char *__memmem(const char *haystk, size_t sh, for (uint32_t mask = wasm_i8x16_bitmask(cmp); mask; mask &= mask - 1) { size_t ctz = __builtin_ctz(mask); // The match may be after the end of the haystack. - if (ctz + sn > sh) return NULL; + if (ctz > sh) return NULL; + // We know the first character matches. if (!bcmp(haystk + ctz + 1, needle + 1, sn - 1)) { return haystk + ctz; } @@ -522,14 +526,12 @@ static const char *__memmem(const char *haystk, size_t sh, if (bmbc) skip += bmbc[(unsigned char)haystk[sn - 1 + sizeof(v128_t)]]; // Have we reached the end of the haystack? if (__builtin_sub_overflow(sh, skip, &sh)) return NULL; - // Is the needle longer than the haystack? - if (sn > sh) return NULL; } haystk += skip; } // Scalar algorithm. - for (size_t j = 0; j <= sh - sn; j++) { + for (size_t j = 0; j <= sh; j++) { for (size_t i = 0;; i++) { if (sn == i) return haystk; if (sh == SIZE_MAX && !haystk[i]) return NULL; @@ -581,8 +583,7 @@ void *memmem(const void *vh, size_t sh, const void *vn, size_t sn) { for (; i < sn; i++) { // One less than the usual offset because // we advance at least one vector at a time. - size_t t = sn - i - 1; - bmbc[(unsigned char)needle[i]] = t; + bmbc[(unsigned char)needle[i]] = sn - i - 1; } return (void *)__memmem(haystk, sh, needle, sn, bmbc); diff --git a/sqlite3/libc/strings.h b/sqlite3/libc/strings.h index 61f92b5..4cd92c9 100644 --- a/sqlite3/libc/strings.h +++ b/sqlite3/libc/strings.h @@ -35,7 +35,7 @@ int bcmp(const void *v1, const void *v2, size_t n) { return 0; } - // memcmpeq is allowed to read up to n bytes from each object. + // bcmp is allowed to read up to n bytes from each object. // Unaligned loads handle the case where the objects // have mismatching alignments. const v128_t *w1 = (v128_t *)v1;