This commit is contained in:
Nuno Cruces
2025-04-24 00:38:59 +01:00
parent bb87a920f7
commit 29aa365806
2 changed files with 101 additions and 106 deletions

View File

@@ -142,38 +142,42 @@
(local $3 i32)
(local $4 i32)
(local $5 i32)
(local $6 i32)
(local $6 v128)
(local $7 v128)
(local $8 v128)
(local $scratch i32)
(block $block
(br_if $block
(i32.eqz
(local.get $2)
(block $block2
(block $block
(br_if $block
(i32.eqz
(local.get $2)
)
)
)
(local.set $4
(i32.and
(local.get $0)
(i32.const 15)
(local.set $3
(i32.and
(local.get $0)
(i32.const 15)
)
)
(local.set $4
(i32.sub
(local.get $2)
(i32.const 1)
)
)
)
(block $block2
(block $block1
(br_if $block1
(i32.eqz
(v128.any_true
(local.tee $7
(local.tee $6
(i8x16.eq
(v128.load
(local.tee $3
(local.tee $2
(i32.and
(local.get $0)
(i32.const -16)
)
)
)
(local.tee $8
(local.tee $7
(i8x16.splat
(local.get $1)
)
@@ -188,123 +192,109 @@
(local.tee $5
(i32.and
(i8x16.bitmask
(local.get $7)
(local.get $6)
)
(i32.shl
(i32.const -1)
(local.get $4)
(local.get $3)
)
)
)
)
)
(local.set $1
(local.get $2)
(local.get $4)
)
(br $block2)
)
(br_if $block
(i32.lt_u
(local.get $2)
(i32.gt_u
(local.tee $1
(i32.sub
(local.get $2)
(local.tee $3
(i32.sub
(i32.const 16)
(local.get $4)
)
(i32.add
(local.get $3)
(local.get $4)
)
)
)
)
)
(br_if $block
(i32.eqz
(local.get $1)
)
)
(local.set $3
(i32.add
(local.get $0)
(local.get $3)
)
)
(block $block3
(loop $label
(br_if $block3
(v128.any_true
(local.tee $7
(i8x16.eq
(v128.load
(local.get $3)
)
(local.get $8)
)
)
)
)
(br_if $block
(i32.gt_u
(local.tee $0
(i32.sub
(local.get $1)
(i32.const 16)
)
)
(local.get $1)
)
)
(local.set $3
(i32.add
(local.get $3)
(i32.const 16)
)
)
(br_if $label
(i32.eqz
(block (result i32)
(local.set $scratch
(i32.eq
(local.get $1)
(i32.const 16)
)
(local.get $4)
)
)
(local.set $2
(i32.add
(i32.sub
(local.get $0)
(local.get $3)
)
(i32.const 16)
)
)
(loop $label
(if
(v128.any_true
(local.tee $6
(i8x16.eq
(v128.load
(local.get $2)
)
(local.set $1
(local.get $0)
)
(local.get $scratch)
(local.get $7)
)
)
)
(then
(local.set $5
(i8x16.bitmask
(local.get $6)
)
)
(local.set $3
(i32.const 0)
)
(br $block2)
)
)
(local.set $2
(i32.add
(local.get $2)
(i32.const 16)
)
)
(br_if $label
(i32.ge_u
(local.get $1)
(local.tee $1
(i32.sub
(local.get $1)
(i32.const 16)
)
)
)
)
(br $block)
)
(local.set $5
(i8x16.bitmask
(local.get $7)
)
(return
(i32.const 0)
)
)
(select
(i32.add
(local.get $2)
(local.tee $0
(i32.ctz
(local.get $5)
)
)
)
(local.set $6
(select
(i32.add
(local.get $3)
(local.tee $0
(i32.ctz
(local.get $5)
)
)
)
(i32.const 0)
(i32.lt_u
(local.get $0)
(local.get $1)
)
(i32.const 0)
(i32.le_u
(local.get $0)
(i32.add
(local.get $1)
(local.get $3)
)
)
)
(local.get $6)
)
(func $strlen (param $0 i32) (result i32)
(local $1 i32)

View File

@@ -71,18 +71,22 @@ int memcmp(const void *v1, const void *v2, size_t n) {
__attribute__((weak))
void *memchr(const void *v, int c, size_t n) {
if (n-- == 0) {
return NULL;
}
uintptr_t align = (uintptr_t)v % sizeof(v128_t);
const v128_t *w = (void *)(v - align);
const v128_t wc = wasm_i8x16_splat(c);
while (n) {
while (true) {
const v128_t cmp = wasm_i8x16_eq(*w, wc);
if (wasm_v128_any_true(cmp)) {
int mask = wasm_i8x16_bitmask(cmp) >> align << align;
__builtin_assume(mask || align);
if (mask) {
size_t ctz = __builtin_ctz(mask);
return ctz < n ? (void *)w + ctz : NULL;
return ctz <= n + align ? (void *)w + ctz : NULL;
}
}
if (__builtin_sub_overflow(n, sizeof(v128_t) - align, &n)) {
@@ -91,7 +95,6 @@ void *memchr(const void *v, int c, size_t n) {
align = 0;
w++;
}
return NULL;
}
__attribute__((weak))
@@ -244,6 +247,7 @@ size_t strspn(const char *s, const char *c) {
volatile v128_t *w = (void *)byteset;
#pragma unroll
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
while (*c && (byteset[*(uint8_t *)c] = 1)) c++;
#pragma unroll 4
while (byteset[*(uint8_t *)s]) s++;
@@ -263,6 +267,7 @@ size_t strcspn(const char *s, const char *c) {
volatile v128_t *w = (void *)byteset;
#pragma unroll
for (size_t i = sizeof(byteset) / sizeof(v128_t); i--;) w[i] = (v128_t){};
while ((byteset[*(uint8_t *)c] = 1) && *c) c++;
#pragma unroll 4
while (!byteset[*(uint8_t *)s]) s++;