Files
sqlite3/ext/stats/percentile.go

110 lines
2.1 KiB
Go
Raw Normal View History

2024-05-31 17:36:16 +01:00
package stats
import (
2024-06-02 13:37:29 +01:00
"encoding/json"
"fmt"
2024-05-31 17:36:16 +01:00
"math"
"slices"
"github.com/ncruces/go-sqlite3"
"github.com/ncruces/go-sqlite3/internal/util"
2024-10-18 12:57:22 +01:00
"github.com/ncruces/sort/quick"
2024-05-31 17:36:16 +01:00
)
2025-01-20 14:39:36 +00:00
// Compatible with:
// https://sqlite.org/src/file/ext/misc/percentile.c
2024-05-31 17:36:16 +01:00
const (
median = iota
2024-10-16 14:00:22 +01:00
percentile_100
2024-06-06 19:52:49 +01:00
percentile_cont
percentile_disc
2024-05-31 17:36:16 +01:00
)
2024-06-06 19:52:49 +01:00
func newPercentile(kind int) func() sqlite3.AggregateFunction {
return func() sqlite3.AggregateFunction { return &percentile{kind: kind} }
2024-05-31 17:36:16 +01:00
}
2024-06-06 19:52:49 +01:00
type percentile struct {
2024-06-02 13:37:29 +01:00
nums []float64
arg1 []byte
2024-06-06 00:09:14 +01:00
kind int
2024-05-31 17:36:16 +01:00
}
2024-06-06 19:52:49 +01:00
func (q *percentile) Step(ctx sqlite3.Context, arg ...sqlite3.Value) {
2024-10-01 15:16:06 +01:00
a := arg[0]
f := a.Float()
if f != 0.0 || a.NumericType() != sqlite3.NULL {
q.nums = append(q.nums, f)
2024-05-31 17:36:16 +01:00
}
2024-10-01 15:16:06 +01:00
if q.kind != median && q.arg1 == nil {
q.arg1 = append(q.arg1, arg[1].RawText()...)
2024-05-31 17:36:16 +01:00
}
}
2024-06-06 19:52:49 +01:00
func (q *percentile) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
2024-10-17 08:15:44 +01:00
a := arg[0]
f := a.Float()
if f != 0.0 || a.NumericType() != sqlite3.NULL {
i := slices.Index(q.nums, f)
l := len(q.nums) - 1
q.nums[i] = q.nums[l]
q.nums = q.nums[:l]
}
2024-06-06 19:52:49 +01:00
}
func (q *percentile) Value(ctx sqlite3.Context) {
2024-06-02 13:37:29 +01:00
if len(q.nums) == 0 {
2024-05-31 17:36:16 +01:00
return
}
2024-06-02 13:37:29 +01:00
var (
err error
float float64
floats []float64
)
2024-05-31 17:36:16 +01:00
if q.kind == median {
2024-10-17 08:15:44 +01:00
float, err = q.at(0.5)
2024-06-02 13:37:29 +01:00
ctx.ResultFloat(float)
} else if err = json.Unmarshal(q.arg1, &float); err == nil {
2024-10-17 08:15:44 +01:00
float, err = q.at(float)
2024-06-02 13:37:29 +01:00
ctx.ResultFloat(float)
} else if err = json.Unmarshal(q.arg1, &floats); err == nil {
2024-10-17 08:15:44 +01:00
err = q.atMore(floats)
2024-06-02 13:37:29 +01:00
ctx.ResultJSON(floats)
2024-05-31 17:36:16 +01:00
}
2024-06-02 13:37:29 +01:00
if err != nil {
2024-07-26 13:29:24 +01:00
ctx.ResultError(fmt.Errorf("percentile: %w", err)) // notest
2024-05-31 17:36:16 +01:00
}
2024-06-02 13:37:29 +01:00
}
2024-05-31 17:36:16 +01:00
2024-10-17 08:15:44 +01:00
func (q *percentile) at(pos float64) (float64, error) {
if q.kind == percentile_100 {
2024-10-16 14:00:22 +01:00
pos = pos / 100
}
2024-06-02 13:37:29 +01:00
if pos < 0 || pos > 1 {
return 0, util.ErrorString("invalid pos")
}
2024-05-31 17:36:16 +01:00
2024-10-17 08:15:44 +01:00
i, f := math.Modf(pos * float64(len(q.nums)-1))
m0 := quick.Select(q.nums, int(i))
2024-06-02 13:37:29 +01:00
2024-10-17 08:15:44 +01:00
if f == 0 || q.kind == percentile_disc {
2024-06-02 13:37:29 +01:00
return m0, nil
2024-05-31 17:36:16 +01:00
}
2024-10-17 08:15:44 +01:00
m1 := slices.Min(q.nums[int(i)+1:])
2024-10-18 12:57:22 +01:00
return util.Lerp(m0, m1, f), nil
2024-06-02 13:37:29 +01:00
}
2024-10-17 08:15:44 +01:00
func (q *percentile) atMore(pos []float64) error {
2024-06-02 13:37:29 +01:00
for i := range pos {
2024-10-17 08:15:44 +01:00
v, err := q.at(pos[i])
2024-06-02 13:37:29 +01:00
if err != nil {
return err
}
pos[i] = v
}
return nil
2024-05-31 17:36:16 +01:00
}