More stats.

This commit is contained in:
Nuno Cruces
2024-01-09 03:20:59 +00:00
parent d48a92fcdf
commit af42af2978
4 changed files with 78 additions and 20 deletions

View File

@@ -34,6 +34,11 @@ func Register(db *sqlite3.Conn) {
db.CreateWindowFunction("covar_pop", 2, flags, newCovariance(var_pop))
db.CreateWindowFunction("covar_samp", 2, flags, newCovariance(var_samp))
db.CreateWindowFunction("corr", 2, flags, newCovariance(corr))
db.CreateWindowFunction("regr_avgx", 2, flags, newCovariance(regr_avgx))
db.CreateWindowFunction("regr_avgy", 2, flags, newCovariance(regr_avgy))
db.CreateWindowFunction("regr_r2", 2, flags, newCovariance(regr_r2))
db.CreateWindowFunction("regr_slope", 2, flags, newCovariance(regr_slope))
db.CreateWindowFunction("regr_intercept", 2, flags, newCovariance(regr_intercept))
}
const (
@@ -42,6 +47,11 @@ const (
stddev_pop
stddev_samp
corr
regr_avgx
regr_avgy
regr_r2
regr_slope
regr_intercept
)
func newVariance(kind int) func() sqlite3.AggregateFunction {
@@ -98,6 +108,16 @@ func (fn *covariance) Value(ctx sqlite3.Context) {
r = fn.covar_samp()
case corr:
r = fn.correlation()
case regr_avgx:
r = fn.regr_avgx()
case regr_avgy:
r = fn.regr_avgy()
case regr_r2:
r = fn.regr_r2()
case regr_slope:
r = fn.regr_slope()
case regr_intercept:
r = fn.regr_intercept()
}
ctx.ResultFloat(r)
}

View File

@@ -103,7 +103,10 @@ func TestRegister_covariance(t *testing.T) {
}
stmt, _, err := db.Prepare(`SELECT
corr(x, y), covar_samp(x, y), covar_pop(x, y) FROM data`)
corr(x, y), covar_samp(x, y), covar_pop(x, y),
regr_avgx(x, y), regr_avgy(x, y), regr_r2(x, y),
regr_slope(x, y), regr_intercept(x, y)
FROM data`)
if err != nil {
t.Fatal(err)
}
@@ -119,6 +122,21 @@ func TestRegister_covariance(t *testing.T) {
if got := stmt.ColumnFloat(2); got != 17 {
t.Errorf("got %v, want 17", got)
}
if got := stmt.ColumnFloat(3); got != 75 {
t.Errorf("got %v, want 75", got)
}
if got := stmt.ColumnFloat(4); got != 4.2 {
t.Errorf("got %v, want 4.2", got)
}
if got := stmt.ColumnFloat(5); got != 0.9763513513513513 {
t.Errorf("got %v, want 0.9763513513513513", got)
}
if got := stmt.ColumnFloat(6); got != 0.17 {
t.Errorf("got %v, want 0.17", got)
}
if got := stmt.ColumnFloat(7); got != -8.55 {
t.Errorf("got %v, want -8.55", got)
}
}
{

View File

@@ -25,12 +25,12 @@ https://sqlite.org/lang_aggfunc.html
## Linear regression
- [ ] `REGR_AVGX(dependent, independent)`
- [ ] `REGR_AVGY(dependent, independent)`
- [X] `REGR_AVGX(dependent, independent)`
- [X] `REGR_AVGY(dependent, independent)`
- [ ] `REGR_COUNT(dependent, independent)`
- [ ] `REGR_INTERCEPT(dependent, independent)`
- [ ] `REGR_R2(dependent, independent)`
- [ ] `REGR_SLOPE(dependent, independent)`
- [X] `REGR_INTERCEPT(dependent, independent)`
- [X] `REGR_R2(dependent, independent)`
- [X] `REGR_SLOPE(dependent, independent)`
- [ ] `REGR_SXX(dependent, independent)`
- [ ] `REGR_SXY(dependent, independent)`
- [ ] `REGR_SYY(dependent, independent)`

View File

@@ -48,8 +48,8 @@ func (w *welford) dequeue(x float64) {
}
type welford2 struct {
m1x, m2x kahan
m1y, m2y kahan
m1x, m2x kahan
cov kahan
n uint64
}
@@ -63,33 +63,53 @@ func (w welford2) covar_samp() float64 {
}
func (w welford2) correlation() float64 {
return w.cov.hi / math.Sqrt(w.m2x.hi*w.m2y.hi)
return w.cov.hi / math.Sqrt(w.m2y.hi*w.m2x.hi)
}
func (w *welford2) enqueue(x, y float64) {
func (w welford2) regr_avgy() float64 {
return w.m1y.hi
}
func (w welford2) regr_avgx() float64 {
return w.m1x.hi
}
func (w welford2) regr_slope() float64 {
return w.cov.hi / w.m2x.hi
}
func (w welford2) regr_intercept() float64 {
return w.m1y.hi - w.m1x.hi*w.regr_slope()
}
func (w welford2) regr_r2() float64 {
return w.cov.hi * w.cov.hi / (w.m2y.hi * w.m2x.hi)
}
func (w *welford2) enqueue(y, x float64) {
w.n++
d1x := x - w.m1x.hi - w.m1x.lo
d1y := y - w.m1y.hi - w.m1y.lo
w.m1x.add(d1x / float64(w.n))
d1x := x - w.m1x.hi - w.m1x.lo
w.m1y.add(d1y / float64(w.n))
d2x := x - w.m1x.hi - w.m1x.lo
w.m1x.add(d1x / float64(w.n))
d2y := y - w.m1y.hi - w.m1y.lo
w.m2x.add(d1x * d2x)
d2x := x - w.m1x.hi - w.m1x.lo
w.m2y.add(d1y * d2y)
w.cov.add(d1x * d2y)
w.m2x.add(d1x * d2x)
w.cov.add(d1y * d2x)
}
func (w *welford2) dequeue(x, y float64) {
func (w *welford2) dequeue(y, x float64) {
w.n--
d1x := x - w.m1x.hi - w.m1x.lo
d1y := y - w.m1y.hi - w.m1y.lo
w.m1x.sub(d1x / float64(w.n))
d1x := x - w.m1x.hi - w.m1x.lo
w.m1y.sub(d1y / float64(w.n))
d2x := x - w.m1x.hi - w.m1x.lo
w.m1x.sub(d1x / float64(w.n))
d2y := y - w.m1y.hi - w.m1y.lo
w.m2x.sub(d1x * d2x)
d2x := x - w.m1x.hi - w.m1x.lo
w.m2y.sub(d1y * d2y)
w.cov.sub(d1x * d2y)
w.m2x.sub(d1x * d2x)
w.cov.sub(d1y * d2x)
}
type kahan struct{ hi, lo float64 }