More libc.

This commit is contained in:
Nuno Cruces
2025-04-27 13:34:29 +01:00
parent b32db76da6
commit 29c5c816cb
5 changed files with 480 additions and 400 deletions

Binary file not shown.

View File

@@ -1505,433 +1505,428 @@
(local $19 i32)
(local $20 v128)
(local $scratch i32)
(block $block
(br_if $block
(i32.eqz
(local.get $2)
)
(if
(i32.ge_u
(local.get $1)
(i32.const 2)
)
(br_if $block
(i32.lt_u
(local.get $1)
(i32.const 2)
)
)
(local.set $14
(i32.mul
(local.get $1)
(local.get $2)
)
)
(local.set $15
(i32.and
(local.get $2)
(i32.const 15)
)
)
(local.set $9
(i32.and
(local.get $2)
(i32.const -16)
)
)
(local.set $16
(i32.add
(local.get $0)
(local.get $2)
)
)
(local.set $17
(i32.lt_u
(local.get $2)
(i32.const 16)
)
)
(loop $label5
(local.set $6
(i32.eq
(then
(local.set $14
(i32.mul
(local.get $1)
(i32.const 2)
(local.get $2)
)
)
(local.set $18
(i32.le_u
(i32.add
(local.get $0)
(i32.mul
(i32.add
(local.tee $13
(select
(i32.const 1)
(local.tee $1
(i32.wrap_i64
(i64.div_u
(i64.sub
(i64.mul
(i64.extend_i32_u
(local.get $1)
)
(i64.const 5)
)
(i64.const 1)
)
(i64.const 11)
)
)
)
(local.get $6)
)
)
(i32.const 1)
)
(local.get $2)
)
)
(local.set $15
(i32.and
(local.get $2)
(i32.const 15)
)
)
(local.set $9
(i32.and
(local.get $2)
(i32.const -16)
)
)
(local.set $16
(i32.add
(local.get $0)
(local.get $2)
)
)
(local.set $11
(local.tee $10
(i32.mul
(local.get $2)
(local.get $13)
)
(local.set $17
(i32.lt_u
(local.get $2)
(i32.const 16)
)
)
(loop $label4
(block $block1
(br_if $block1
(i32.gt_u
(local.tee $5
(i32.sub
(local.get $11)
(local.get $10)
)
)
(local.get $11)
)
(loop $label5
(local.set $6
(i32.eq
(local.get $1)
(i32.const 2)
)
(loop $label3
(br_if $block1
(i32.le_s
(call_indirect $0 (type $0)
(local.tee $4
(i32.add
(local.get $0)
(local.tee $12
(local.get $5)
)
)
)
(local.tee $5
(i32.add
(local.get $4)
(local.get $10)
)
)
(local.get $3)
)
(i32.const 0)
)
)
(block $block3
(block $block4
(block $block2
(br_if $block2
(local.get $17)
)
(br_if $block2
(i32.and
(i32.eqz
(local.get $18)
)
(i32.lt_u
(local.get $5)
(i32.add
(local.get $12)
(local.get $16)
)
(local.set $18
(i32.le_u
(i32.add
(local.get $0)
(i32.mul
(i32.add
(local.tee $13
(select
(i32.const 1)
(local.tee $1
(i32.wrap_i64
(i64.div_u
(i64.sub
(i64.mul
(i64.extend_i32_u
(local.get $1)
)
(i64.const 5)
)
(i64.const 1)
)
(i64.const 11)
)
)
)
(local.get $6)
)
)
(i32.const 1)
)
(local.get $2)
)
)
(local.get $0)
)
)
(local.set $11
(local.tee $10
(i32.mul
(local.get $2)
(local.get $13)
)
)
)
(loop $label4
(block $block
(br_if $block
(i32.gt_u
(local.tee $5
(i32.sub
(local.get $11)
(local.get $10)
)
)
(local.get $11)
)
)
(loop $label3
(br_if $block
(i32.le_s
(call_indirect $0 (type $0)
(local.tee $4
(i32.add
(local.get $0)
(local.tee $12
(local.get $5)
)
)
)
(local.tee $5
(i32.add
(local.get $4)
(local.get $10)
)
)
(local.get $3)
)
(i32.const 0)
)
)
(block $block2
(block $block3
(block $block1
(br_if $block1
(local.get $17)
)
(br_if $block1
(i32.and
(i32.eqz
(local.get $18)
)
(i32.lt_u
(local.get $5)
(i32.add
(local.get $12)
(local.get $16)
)
)
)
)
(local.set $5
(i32.add
(local.get $5)
(local.get $9)
)
)
(local.set $7
(i32.add
(local.get $4)
(local.get $9)
)
)
(local.set $6
(local.get $9)
)
(loop $label
(local.set $20
(v128.load align=1
(local.get $4)
)
)
(v128.store align=1
(local.get $4)
(v128.load align=1
(local.tee $8
(i32.add
(local.get $4)
(local.get $10)
)
)
)
)
(v128.store align=1
(local.get $8)
(local.get $20)
)
(local.set $4
(i32.add
(local.get $4)
(i32.const 16)
)
)
(br_if $label
(local.tee $6
(i32.sub
(local.get $6)
(i32.const 16)
)
)
)
)
(local.set $6
(local.get $15)
)
(br_if $block2
(i32.eq
(local.get $2)
(local.get $9)
)
)
(br $block3)
)
(local.set $7
(local.get $4)
)
(local.set $6
(local.get $2)
)
)
(br_if $block2
(i32.lt_u
(block (result i32)
(local.set $scratch
(i32.sub
(local.get $6)
(i32.const 1)
)
)
(if
(local.tee $4
(i32.and
(local.get $6)
(i32.const 3)
)
)
(then
(local.set $6
(i32.and
(local.get $6)
(i32.const -4)
)
)
(loop $label1
(local.set $19
(i32.load8_u
(local.get $7)
)
)
(i32.store8
(local.get $7)
(i32.load8_u
(local.get $5)
)
)
(i32.store8
(local.get $5)
(local.get $19)
)
(local.set $5
(i32.add
(local.get $5)
(i32.const 1)
)
)
(local.set $7
(i32.add
(local.get $7)
(i32.const 1)
)
)
(br_if $label1
(local.tee $4
(i32.sub
(local.get $4)
(i32.const 1)
)
)
)
)
)
)
(local.get $scratch)
)
(i32.const 3)
)
)
(loop $label2
(local.set $4
(i32.load8_u
(local.get $7)
)
)
(i32.store8
(local.get $7)
(i32.load8_u
(local.get $5)
)
)
(i32.store8
(local.get $5)
(local.get $4)
)
(local.set $8
(i32.load8_u
(local.tee $4
(i32.add
(local.get $7)
(i32.const 1)
)
)
)
)
(i32.store8
(local.get $4)
(i32.load8_u
(local.tee $4
(i32.add
(local.get $5)
(i32.const 1)
)
)
)
)
(i32.store8
(local.get $4)
(local.get $8)
)
(local.set $8
(i32.load8_u
(local.tee $4
(i32.add
(local.get $7)
(i32.const 2)
)
)
)
)
(i32.store8
(local.get $4)
(i32.load8_u
(local.tee $4
(i32.add
(local.get $5)
(i32.const 2)
)
)
)
)
(i32.store8
(local.get $4)
(local.get $8)
)
(local.set $8
(i32.load8_u
(local.tee $4
(i32.add
(local.get $7)
(i32.const 3)
)
)
)
)
(i32.store8
(local.get $4)
(i32.load8_u
(local.tee $4
(i32.add
(local.get $5)
(i32.const 3)
)
)
)
)
(i32.store8
(local.get $4)
(local.get $8)
)
(local.set $7
(i32.add
(local.get $7)
(i32.const 4)
)
)
(local.set $5
(i32.add
(local.get $5)
(local.get $9)
)
)
(local.set $7
(i32.add
(local.get $4)
(local.get $9)
)
)
(local.set $6
(local.get $9)
)
(loop $label
(local.set $20
(v128.load align=1
(local.get $4)
)
)
(v128.store align=1
(local.get $4)
(v128.load align=1
(local.tee $8
(i32.add
(local.get $4)
(local.get $10)
)
)
)
)
(v128.store align=1
(local.get $8)
(local.get $20)
)
(local.set $4
(i32.add
(local.get $4)
(i32.const 16)
)
)
(br_if $label
(local.tee $6
(i32.sub
(local.get $6)
(i32.const 16)
)
)
)
)
(local.set $6
(local.get $15)
)
(br_if $block3
(i32.eq
(local.get $2)
(local.get $9)
)
)
(br $block4)
)
(local.set $7
(local.get $4)
)
(local.set $6
(local.get $2)
)
)
(br_if $block3
(i32.lt_u
(block (result i32)
(local.set $scratch
(i32.sub
(local.get $6)
(i32.const 1)
)
)
(if
(local.tee $4
(i32.and
(local.get $6)
(i32.const 3)
)
)
(then
(local.set $6
(i32.and
(local.get $6)
(i32.const -4)
)
)
(loop $label1
(local.set $19
(i32.load8_u
(local.get $7)
)
)
(i32.store8
(local.get $7)
(i32.load8_u
(local.get $5)
)
)
(i32.store8
(local.get $5)
(local.get $19)
)
(local.set $5
(i32.add
(local.get $5)
(i32.const 1)
)
)
(local.set $7
(i32.add
(local.get $7)
(i32.const 1)
)
)
(br_if $label1
(local.tee $4
(i32.sub
(local.get $4)
(i32.const 1)
)
)
)
)
)
)
(local.get $scratch)
)
(i32.const 3)
)
)
(loop $label2
(local.set $4
(i32.load8_u
(local.get $7)
)
)
(i32.store8
(local.get $7)
(i32.load8_u
(local.get $5)
)
)
(i32.store8
(local.get $5)
(local.get $4)
)
(local.set $8
(i32.load8_u
(local.tee $4
(i32.add
(local.get $7)
(i32.const 1)
)
)
)
)
(i32.store8
(local.get $4)
(i32.load8_u
(local.tee $4
(i32.add
(local.get $5)
(i32.const 1)
)
)
)
)
(i32.store8
(local.get $4)
(local.get $8)
)
(local.set $8
(i32.load8_u
(local.tee $4
(i32.add
(local.get $7)
(i32.const 2)
)
)
)
)
(i32.store8
(local.get $4)
(i32.load8_u
(local.tee $4
(i32.add
(local.get $5)
(i32.const 2)
)
)
)
)
(i32.store8
(local.get $4)
(local.get $8)
)
(local.set $8
(i32.load8_u
(local.tee $4
(i32.add
(local.get $7)
(i32.const 3)
)
)
)
)
(i32.store8
(local.get $4)
(i32.load8_u
(local.tee $4
(i32.add
(local.get $5)
(i32.const 3)
)
)
)
)
(i32.store8
(local.get $4)
(local.get $8)
)
(local.set $7
(i32.add
(local.get $7)
(i32.const 4)
)
)
(local.set $5
(i32.add
(local.get $5)
(i32.const 4)
)
)
(br_if $label2
(local.tee $6
(i32.sub
(local.get $6)
(i32.const 4)
)
)
)
)
)
(br_if $label3
(i32.le_u
(local.tee $5
(i32.sub
(local.get $12)
(local.get $10)
(br_if $label2
(local.tee $6
(i32.sub
(local.get $6)
(i32.const 4)
)
)
)
)
(local.get $12)
)
(br_if $label3
(i32.le_u
(local.tee $5
(i32.sub
(local.get $12)
(local.get $10)
)
)
(local.get $12)
)
)
)
)
)
(br_if $label4
(i32.lt_u
(local.tee $11
(i32.add
(local.get $2)
(local.get $11)
(br_if $label4
(i32.lt_u
(local.tee $11
(i32.add
(local.get $2)
(local.get $11)
)
)
(local.get $14)
)
(local.get $14)
)
)
)
(br_if $label5
(i32.ge_u
(local.get $13)
(i32.const 2)
(br_if $label5
(i32.ge_u
(local.get $13)
(i32.const 2)
)
)
)
)

29
sqlite3/libc/math.h Normal file
View File

@@ -0,0 +1,29 @@
#ifndef _WASM_SIMD128_MATH_H
#define _WASM_SIMD128_MATH_H
#include <wasm_simd128.h>
#include_next <math.h> // the system math.h
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __wasm_relaxed_simd__
__attribute__((weak))
double fma(double x, double y, double z) {
const v128_t wx = wasm_f64x2_splat(x);
const v128_t wy = wasm_f64x2_splat(y);
const v128_t wz = wasm_f64x2_splat(z);
const v128_t wr = wasm_f64x2_relaxed_madd(wx, wy, wz);
return wasm_f64x2_extract_lane(wr, 0);
}
#endif // __wasm_relaxed_simd__
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _WASM_SIMD128_MATH_H

View File

@@ -17,22 +17,31 @@ extern "C" {
__attribute__((weak))
void qsort(void *base, size_t nel, size_t width,
int (*comp)(const void *, const void *)) {
if (width == 0) return;
// If nel is zero, we're required to do nothing.
// If it's one, the array is already sorted.
size_t wnel = width * nel;
size_t gap = nel;
while (gap > 1) {
// Use 64-bit unsigned arithmetic to avoid intermediate overflow.
// Absent overflow, gap will be strictly less than its previous value.
// Once it is one or zero, set it to one: do a final pass, and stop.
gap = (5ull * gap - 1) / 11;
if (gap == 0) gap = 1;
// It'd be undefined behavior for wnel to overflow a size_t;
// or if width is zero: the base pointer would be invalid.
// Since gap is stricly less than nel, we can assume
// wgap is strictly less than wnel.
size_t wgap = width * gap;
__builtin_assume(wgap < wnel);
for (size_t i = wgap; i < wnel; i += width) {
// Even without overflow flags, the overflow builtin helps the compiler.
for (size_t j = i; !__builtin_sub_overflow(j, wgap, &j);) {
char *a = j + (char *)base;
char *b = a + wgap;
if (comp(a, b) <= 0) break;
// This well known loop is automatically vectorized.
size_t s = width;
do {
char tmp = *a;

47
sqlite3/libc/strings.h Normal file
View File

@@ -0,0 +1,47 @@
#ifndef _WASM_SIMD128_STRINGS_H
#define _WASM_SIMD128_STRINGS_H
#include <stddef.h>
#include <wasm_simd128.h>
#include_next <strings.h> // the system strings.h
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __wasm_simd128__
__attribute__((weak))
int bcmp(const void *v1, const void *v2, size_t n) {
// bcmp is the same as memcmp but only compares for equality.
const v128_t *w1 = v1;
const v128_t *w2 = v2;
for (; n >= sizeof(v128_t); n -= sizeof(v128_t)) {
// Find any single bit difference.
if (wasm_v128_any_true(wasm_v128_load(w1) ^ wasm_v128_load(w2))) {
return 1;
}
w1++;
w2++;
}
// Continue byte-by-byte.
const unsigned char *u1 = (void *)w1;
const unsigned char *u2 = (void *)w2;
while (n--) {
if (*u1 != *u2) return 1;
u1++;
u2++;
}
return 0;
}
#endif // __wasm_simd128__
#ifdef __cplusplus
} // extern "C"
#endif
#endif // _WASM_SIMD128_STRINGS_H