Naming, volatile.

2026-01-11 21:49:13 +00:00 · 2025-06-20 12:43:17 +01:00
parent 64e5046f10
commit d7aef63844
1 changed files with 51 additions and 45 deletions
--- a/sqlite3/libc/string.h
+++ b/sqlite3/libc/string.h
@@ -39,11 +39,11 @@ void *memmove(void *dest, const void *src, size_t n) {
 #ifdef __wasm_simd128__

 __attribute__((weak))
-int memcmp(const void *v1, const void *v2, size_t n) {
+int memcmp(const void *vl, const void *vr, size_t n) {
  // Scalar algorithm.
  if (n < sizeof(v128_t)) {
-    const unsigned char *u1 = (unsigned char *)v1;
-    const unsigned char *u2 = (unsigned char *)v2;
+    const unsigned char *u1 = (unsigned char *)vl;
+    const unsigned char *u2 = (unsigned char *)vr;
    while (n--) {
      if (*u1 != *u2) return *u1 - *u2;
      u1++;
@@ -56,16 +56,16 @@ int memcmp(const void *v1, const void *v2, size_t n) {
  // Find the first different character in the objects.
  // Unaligned loads handle the case where the objects
  // have mismatching alignments.
-  const v128_t *w1 = (v128_t *)v1;
-  const v128_t *w2 = (v128_t *)v2;
+  const v128_t *v1 = (v128_t *)vl;
+  const v128_t *v2 = (v128_t *)vr;
  while (n) {
-    const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(w1), wasm_v128_load(w2));
+    const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(v1), wasm_v128_load(v2));
    // Bitmask is slow on AArch64, all_true is much faster.
    if (!wasm_i8x16_all_true(cmp)) {
      // Find the offset of the first zero bit (little-endian).
      size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp));
-      const unsigned char *u1 = (unsigned char *)w1 + ctz;
-      const unsigned char *u2 = (unsigned char *)w2 + ctz;
+      const unsigned char *u1 = (unsigned char *)v1 + ctz;
+      const unsigned char *u2 = (unsigned char *)v2 + ctz;
      // This may help the compiler if the function is inlined.
      __builtin_assume(*u1 - *u2 != 0);
      return *u1 - *u2;
@@ -73,15 +73,15 @@ int memcmp(const void *v1, const void *v2, size_t n) {
    // This makes n a multiple of sizeof(v128_t)
    // for every iteration except the first.
    size_t align = (n - 1) % sizeof(v128_t) + 1;
-    w1 = (v128_t *)((char *)w1 + align);
-    w2 = (v128_t *)((char *)w2 + align);
+    v1 = (v128_t *)((char *)v1 + align);
+    v2 = (v128_t *)((char *)v2 + align);
    n -= align;
  }
  return 0;
 }

 __attribute__((weak))
-void *memchr(const void *v, int c, size_t n) {
+void *memchr(const void *s, int c, size_t n) {
  // When n is zero, a function that locates a character finds no occurrence.
  // Otherwise, decrement n to ensure sub_overflow overflows
  // when n would go equal-to-or-below zero.
@@ -92,12 +92,13 @@ void *memchr(const void *v, int c, size_t n) {
  // memchr must behave as if it reads characters sequentially
  // and stops as soon as a match is found.
  // Aligning ensures loads beyond the first match are safe.
-  uintptr_t align = (uintptr_t)v % sizeof(v128_t);
-  const v128_t *w = (v128_t *)((char *)v - align);
-  const v128_t wc = wasm_i8x16_splat(c);
+  // Volatile avoids compiler tricks around out of bounds loads.
+  uintptr_t align = (uintptr_t)s % sizeof(v128_t);
+  const volatile v128_t *v = (v128_t *)((char *)s - align);
+  const v128_t vc = wasm_i8x16_splat(c);

  for (;;) {
-    const v128_t cmp = wasm_i8x16_eq(*w, wc);
+    const v128_t cmp = wasm_i8x16_eq(*v, vc);
    // Bitmask is slow on AArch64, any_true is much faster.
    if (wasm_v128_any_true(cmp)) {
      // Clear the bits corresponding to alignment (little-endian)
@@ -113,7 +114,7 @@ void *memchr(const void *v, int c, size_t n) {
        // That's a match, unless it is beyond the end of the object.
        // Recall that we decremented n, so less-than-or-equal-to is correct.
        size_t ctz = __builtin_ctz(mask);
-        return ctz - align <= n ? (char *)w + ctz : NULL;
+        return ctz - align <= n ? (char *)v + ctz : NULL;
      }
    }
    // Decrement n; if it overflows we're done.
@@ -121,28 +122,28 @@ void *memchr(const void *v, int c, size_t n) {
      return NULL;
    }
    align = 0;
-    w++;
+    v++;
  }
 }

 __attribute__((weak))
-void *memrchr(const void *v, int c, size_t n) {
+void *memrchr(const void *s, int c, size_t n) {
  // memrchr is allowed to read up to n bytes from the object.
  // Search backward for the last matching character.
-  const v128_t *w = (v128_t *)((char *)v + n);
-  const v128_t wc = wasm_i8x16_splat(c);
+  const v128_t *v = (v128_t *)((char *)s + n);
+  const v128_t vc = wasm_i8x16_splat(c);
  for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
-    const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(--w), wc);
+    const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(--v), vc);
    // Bitmask is slow on AArch64, any_true is much faster.
    if (wasm_v128_any_true(cmp)) {
      // Find the offset of the last one bit (little-endian).
      size_t clz = __builtin_clz(wasm_i8x16_bitmask(cmp)) - 15;
-      return (char *)(w + 1) - clz;
+      return (char *)(v + 1) - clz;
    }
  }

  // Scalar algorithm.
-  const char *a = (char *)w;
+  const char *a = (char *)v;
  while (n--) {
    if (*(--a) == (char)c) return (char *)a;
  }
@@ -154,12 +155,13 @@ size_t strlen(const char *s) {
  // strlen must stop as soon as it finds the terminator.
  // Aligning ensures loads beyond the terminator are safe.
  uintptr_t align = (uintptr_t)s % sizeof(v128_t);
-  const v128_t *w = (v128_t *)(s - align);
+  const volatile v128_t *v = (v128_t *)(s - align);

  for (;;) {
+    const v128_t vv = *v;
    // Bitmask is slow on AArch64, all_true is much faster.
-    if (!wasm_i8x16_all_true(*w)) {
-      const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){});
+    if (!wasm_i8x16_all_true(vv)) {
+      const v128_t cmp = wasm_i8x16_eq(vv, (v128_t){});
      // Clear the bits corresponding to alignment (little-endian)
      // so we can count trailing zeros.
      int mask = wasm_i8x16_bitmask(cmp) >> align << align;
@@ -170,11 +172,11 @@ size_t strlen(const char *s) {
      // it's as if we didn't find anything.
      if (mask) {
        // Find the offset of the first one bit (little-endian).
-        return (char *)w - s + __builtin_ctz(mask);
+        return (char *)v - s + __builtin_ctz(mask);
      }
    }
    align = 0;
-    w++;
+    v++;
  }
 }

@@ -268,12 +270,14 @@ int strncmp(const char *s1, const char *s2, size_t n) {
 static char *__strchrnul(const char *s, int c) {
  // strchrnul must stop as soon as it finds the terminator.
  // Aligning ensures loads beyond the terminator are safe.
+  // Volatile avoids compiler tricks around out of bounds loads.
  uintptr_t align = (uintptr_t)s % sizeof(v128_t);
-  const v128_t *w = (v128_t *)(s - align);
-  const v128_t wc = wasm_i8x16_splat(c);
+  const volatile v128_t *v = (v128_t *)(s - align);
+  const v128_t vc = wasm_i8x16_splat(c);

  for (;;) {
-    const v128_t cmp = wasm_i8x16_eq(*w, (v128_t){}) | wasm_i8x16_eq(*w, wc);
+    const v128_t vv = *v;
+    const v128_t cmp = wasm_i8x16_eq(vv, (v128_t){}) | wasm_i8x16_eq(vv, vc);
    // Bitmask is slow on AArch64, any_true is much faster.
    if (wasm_v128_any_true(cmp)) {
      // Clear the bits corresponding to alignment (little-endian)
@@ -286,11 +290,11 @@ static char *__strchrnul(const char *s, int c) {
      // it's as if we didn't find anything.
      if (mask) {
        // Find the offset of the first one bit (little-endian).
-        return (char *)w + __builtin_ctz(mask);
+        return (char *)v + __builtin_ctz(mask);
      }
    }
    align = 0;
-    w++;
+    v++;
  }
 }

@@ -371,14 +375,15 @@ __attribute__((weak))
 size_t strspn(const char *s, const char *c) {
  // strspn must stop as soon as it finds the terminator.
  // Aligning ensures loads beyond the terminator are safe.
+  // Volatile avoids compiler tricks around out of bounds loads.
  uintptr_t align = (uintptr_t)s % sizeof(v128_t);
-  const v128_t *w = (v128_t *)(s - align);
+  const volatile v128_t *v = (v128_t *)(s - align);

  if (!c[0]) return 0;
  if (!c[1]) {
-    const v128_t wc = wasm_i8x16_splat(*c);
+    const v128_t vc = wasm_i8x16_splat(*c);
    for (;;) {
-      const v128_t cmp = wasm_i8x16_eq(*w, wc);
+      const v128_t cmp = wasm_i8x16_eq(*v, vc);
      // Bitmask is slow on AArch64, all_true is much faster.
      if (!wasm_i8x16_all_true(cmp)) {
        // Clear the bits corresponding to alignment (little-endian)
@@ -391,11 +396,11 @@ size_t strspn(const char *s, const char *c) {
        // it's as if we didn't find anything.
        if (mask) {
          // Find the offset of the first one bit (little-endian).
-          return (char *)w - s + __builtin_ctz(mask);
+          return (char *)v - s + __builtin_ctz(mask);
        }
      }
      align = 0;
-      w++;
+      v++;
    }
  }

@@ -407,7 +412,7 @@ size_t strspn(const char *s, const char *c) {
  }

  for (;;) {
-    const v128_t cmp = __wasm_v128_chkbits(bitmap, *w);
+    const v128_t cmp = __wasm_v128_chkbits(bitmap, *v);
    // Bitmask is slow on AArch64, all_true is much faster.
    if (!wasm_i8x16_all_true(cmp)) {
      // Clear the bits corresponding to alignment (little-endian)
@@ -420,11 +425,11 @@ size_t strspn(const char *s, const char *c) {
      // it's as if we didn't find anything.
      if (mask) {
        // Find the offset of the first one bit (little-endian).
-        return (char *)w - s + __builtin_ctz(mask);
+        return (char *)v - s + __builtin_ctz(mask);
      }
    }
    align = 0;
-    w++;
+    v++;
  }
 }

@@ -434,8 +439,9 @@ size_t strcspn(const char *s, const char *c) {

  // strcspn must stop as soon as it finds the terminator.
  // Aligning ensures loads beyond the terminator are safe.
+  // Volatile avoids compiler tricks around out of bounds loads.
  uintptr_t align = (uintptr_t)s % sizeof(v128_t);
-  const v128_t *w = (v128_t *)(s - align);
+  const volatile v128_t *v = (v128_t *)(s - align);

  __wasm_v128_bitmap256_t bitmap = {};

@@ -445,7 +451,7 @@ size_t strcspn(const char *s, const char *c) {
  } while (*c++);

  for (;;) {
-    const v128_t cmp = __wasm_v128_chkbits(bitmap, *w);
+    const v128_t cmp = __wasm_v128_chkbits(bitmap, *v);
    // Bitmask is slow on AArch64, any_true is much faster.
    if (wasm_v128_any_true(cmp)) {
      // Clear the bits corresponding to alignment (little-endian)
@@ -458,11 +464,11 @@ size_t strcspn(const char *s, const char *c) {
      // it's as if we didn't find anything.
      if (mask) {
        // Find the offset of the first one bit (little-endian).
-        return (char *)w - s + __builtin_ctz(mask);
+        return (char *)v - s + __builtin_ctz(mask);
      }
    }
    align = 0;
-    w++;
+    v++;
  }
 }