Files
sqlite3/ext/stats/stats.go

110 lines
2.7 KiB
Go
Raw Normal View History

2023-08-31 16:30:52 +01:00
// Package stats provides aggregate functions for statistics.
//
// Functions:
// - stddev_pop: population standard deviation
2023-09-01 02:26:30 +01:00
// - stddev_samp: sample standard deviation
// - var_pop: population variance
// - var_samp: sample variance
// - covar_pop: population covariance
// - covar_samp: sample covariance
2023-09-02 00:48:55 +01:00
// - corr: correlation coefficient
2023-08-31 16:30:52 +01:00
//
// See: [ANSI SQL Aggregate Functions]
//
// [ANSI SQL Aggregate Functions]: https://www.oreilly.com/library/view/sql-in-a/9780596155322/ch04s02.html
package stats
import "github.com/ncruces/go-sqlite3"
// Register registers statistics functions.
func Register(db *sqlite3.Conn) {
flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS
2023-09-01 02:26:30 +01:00
db.CreateWindowFunction("var_pop", 1, flags, newVariance(var_pop))
db.CreateWindowFunction("var_samp", 1, flags, newVariance(var_samp))
db.CreateWindowFunction("stddev_pop", 1, flags, newVariance(stddev_pop))
db.CreateWindowFunction("stddev_samp", 1, flags, newVariance(stddev_samp))
db.CreateWindowFunction("covar_pop", 2, flags, newCovariance(var_pop))
db.CreateWindowFunction("covar_samp", 2, flags, newCovariance(var_samp))
2023-09-02 00:48:55 +01:00
db.CreateWindowFunction("corr", 2, flags, newCovariance(corr))
2023-08-31 16:30:52 +01:00
}
const (
var_pop = iota
var_samp
stddev_pop
stddev_samp
2023-09-02 00:48:55 +01:00
corr
2023-08-31 16:30:52 +01:00
)
2023-09-01 02:26:30 +01:00
func newVariance(kind int) func() sqlite3.AggregateFunction {
return func() sqlite3.AggregateFunction { return &variance{kind: kind} }
2023-08-31 16:30:52 +01:00
}
2023-09-01 02:26:30 +01:00
type variance struct {
2023-08-31 16:30:52 +01:00
kind int
welford
}
2023-09-01 02:26:30 +01:00
func (fn *variance) Value(ctx sqlite3.Context) {
2023-08-31 16:30:52 +01:00
var r float64
2023-09-01 02:26:30 +01:00
switch fn.kind {
2023-08-31 16:30:52 +01:00
case var_pop:
2023-09-01 02:26:30 +01:00
r = fn.var_pop()
2023-08-31 16:30:52 +01:00
case var_samp:
2023-09-01 02:26:30 +01:00
r = fn.var_samp()
2023-08-31 16:30:52 +01:00
case stddev_pop:
2023-09-01 02:26:30 +01:00
r = fn.stddev_pop()
2023-08-31 16:30:52 +01:00
case stddev_samp:
2023-09-01 02:26:30 +01:00
r = fn.stddev_samp()
2023-08-31 16:30:52 +01:00
}
ctx.ResultFloat(r)
}
2023-09-01 02:26:30 +01:00
func (fn *variance) Step(ctx sqlite3.Context, arg ...sqlite3.Value) {
2023-08-31 16:30:52 +01:00
if a := arg[0]; a.Type() != sqlite3.NULL {
2023-09-01 02:26:30 +01:00
fn.enqueue(a.Float())
2023-08-31 16:30:52 +01:00
}
}
2023-09-01 02:26:30 +01:00
func (fn *variance) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
2023-08-31 16:30:52 +01:00
if a := arg[0]; a.Type() != sqlite3.NULL {
2023-09-01 02:26:30 +01:00
fn.dequeue(a.Float())
}
}
func newCovariance(kind int) func() sqlite3.AggregateFunction {
return func() sqlite3.AggregateFunction { return &covariance{kind: kind} }
}
type covariance struct {
kind int
welford2
}
func (fn *covariance) Value(ctx sqlite3.Context) {
var r float64
switch fn.kind {
case var_pop:
r = fn.covar_pop()
case var_samp:
r = fn.covar_samp()
2023-09-02 00:48:55 +01:00
case corr:
r = fn.correlation()
2023-09-01 02:26:30 +01:00
}
ctx.ResultFloat(r)
}
func (fn *covariance) Step(ctx sqlite3.Context, arg ...sqlite3.Value) {
a, b := arg[0], arg[1]
if a.Type() != sqlite3.NULL && b.Type() != sqlite3.NULL {
fn.enqueue(a.Float(), b.Float())
}
}
func (fn *covariance) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
a, b := arg[0], arg[1]
if a.Type() != sqlite3.NULL && b.Type() != sqlite3.NULL {
fn.dequeue(a.Float(), b.Float())
2023-08-31 16:30:52 +01:00
}
}