mirror of
https://github.com/ncruces/go-sqlite3.git
synced 2026-01-11 21:49:13 +00:00
Skewness and excess kurtosis.
This commit is contained in:
@@ -2,6 +2,7 @@ package stats
|
||||
|
||||
import "math"
|
||||
|
||||
// Fisher–Pearson skewness and kurtosis using
|
||||
// Terriberry's algorithm with Kahan summation:
|
||||
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
|
||||
|
||||
@@ -45,6 +46,10 @@ func (m moments) skewness_samp() float64 {
|
||||
}
|
||||
|
||||
func (m moments) kurtosis_pop() float64 {
|
||||
return m.raw_kurtosis_pop() - 3
|
||||
}
|
||||
|
||||
func (m moments) raw_kurtosis_pop() float64 {
|
||||
m2 := m.m2.hi
|
||||
if div := m2 * m2; div != 0 {
|
||||
return m.m4.hi * float64(m.n) / div
|
||||
@@ -53,9 +58,15 @@ func (m moments) kurtosis_pop() float64 {
|
||||
}
|
||||
|
||||
func (m moments) kurtosis_samp() float64 {
|
||||
n := m.n
|
||||
k := math.FMA(m.raw_kurtosis_pop(), float64(n+1), float64(3-3*n))
|
||||
return k * float64(n-1) / float64((n-2)*(n-3))
|
||||
}
|
||||
|
||||
func (m moments) raw_kurtosis_samp() float64 {
|
||||
n := m.n
|
||||
// https://mathworks.com/help/stats/kurtosis.html#f4975293
|
||||
k := math.FMA(m.kurtosis_pop(), float64(n+1), float64(3-3*n))
|
||||
k := math.FMA(m.raw_kurtosis_pop(), float64(n+1), float64(3-3*n))
|
||||
return math.FMA(k, float64(n-1)/float64((n-2)*(n-3)), 3)
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ func Test_moments(t *testing.T) {
|
||||
if !math.IsNaN(s1.skewness_pop()) {
|
||||
t.Errorf("want NaN")
|
||||
}
|
||||
if !math.IsNaN(s1.kurtosis_pop()) {
|
||||
if !math.IsNaN(s1.raw_kurtosis_pop()) {
|
||||
t.Errorf("want NaN")
|
||||
}
|
||||
|
||||
@@ -29,16 +29,16 @@ func Test_moments(t *testing.T) {
|
||||
s1.enqueue(+3.5784)
|
||||
s1.enqueue(+2.7694)
|
||||
|
||||
if got := float32(s1.skewness_pop()); got != 0.106098293 {
|
||||
if got := s1.skewness_pop(); float32(got) != 0.106098293 {
|
||||
t.Errorf("got %v, want 0.1061", got)
|
||||
}
|
||||
if got := float32(s1.skewness_samp()); got != 0.1258171 {
|
||||
if got := s1.skewness_samp(); float32(got) != 0.1258171 {
|
||||
t.Errorf("got %v, want 0.1258", got)
|
||||
}
|
||||
if got := float32(s1.kurtosis_pop()); got != 2.3121266 {
|
||||
if got := s1.raw_kurtosis_pop(); float32(got) != 2.3121266 {
|
||||
t.Errorf("got %v, want 2.3121", got)
|
||||
}
|
||||
if got := float32(s1.kurtosis_samp()); got != 2.7482237 {
|
||||
if got := s1.raw_kurtosis_samp(); float32(got) != 2.7482237 {
|
||||
t.Errorf("got %v, want 2.7483", got)
|
||||
}
|
||||
|
||||
@@ -72,16 +72,16 @@ func Test_moments(t *testing.T) {
|
||||
s1.dequeue(math.E)
|
||||
s1.dequeue(math.Sqrt2)
|
||||
|
||||
if got := float32(s1.skewness_pop()); got != 0.106098293 {
|
||||
if got := s1.skewness_pop(); float32(got) != 0.106098293 {
|
||||
t.Errorf("got %v, want 0.1061", got)
|
||||
}
|
||||
if got := float32(s1.skewness_samp()); got != 0.1258171 {
|
||||
if got := s1.skewness_samp(); float32(got) != 0.1258171 {
|
||||
t.Errorf("got %v, want 0.1258", got)
|
||||
}
|
||||
if got := float32(s1.kurtosis_pop()); got != 2.3121266 {
|
||||
if got := s1.raw_kurtosis_pop(); float32(got) != 2.3121266 {
|
||||
t.Errorf("got %v, want 2.3121", got)
|
||||
}
|
||||
if got := float32(s1.kurtosis_samp()); got != 2.7482237 {
|
||||
if got := s1.raw_kurtosis_samp(); float32(got) != 2.7482237 {
|
||||
t.Errorf("got %v, want 2.7483", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
// Package stats provides aggregate functions for statistics.
|
||||
//
|
||||
// Provided functions:
|
||||
// - stddev_pop: population standard deviation
|
||||
// - stddev_samp: sample standard deviation
|
||||
// - var_pop: population variance
|
||||
// - var_samp: sample variance
|
||||
// - stddev_pop: population standard deviation
|
||||
// - stddev_samp: sample standard deviation
|
||||
// - skewness_pop: Pearson population skewness
|
||||
// - skewness_samp: Pearson sample skewness
|
||||
// - kurtosis_pop: Fisher population excess kurtosis
|
||||
// - kurtosis_samp: Fisher sample excess kurtosis
|
||||
// - covar_pop: population covariance
|
||||
// - covar_samp: sample covariance
|
||||
// - corr: correlation coefficient
|
||||
// - corr: Pearson correlation coefficient
|
||||
// - regr_r2: correlation coefficient squared
|
||||
// - regr_avgx: average of the independent variable
|
||||
// - regr_avgy: average of the dependent variable
|
||||
@@ -61,6 +65,10 @@ func Register(db *sqlite3.Conn) error {
|
||||
db.CreateWindowFunction("var_samp", 1, flags, newVariance(var_samp)),
|
||||
db.CreateWindowFunction("stddev_pop", 1, flags, newVariance(stddev_pop)),
|
||||
db.CreateWindowFunction("stddev_samp", 1, flags, newVariance(stddev_samp)),
|
||||
db.CreateWindowFunction("skewness_pop", 1, flags, newMoments(skewness_pop)),
|
||||
db.CreateWindowFunction("skewness_samp", 1, flags, newMoments(skewness_samp)),
|
||||
db.CreateWindowFunction("kurtosis_pop", 1, flags, newMoments(kurtosis_pop)),
|
||||
db.CreateWindowFunction("kurtosis_samp", 1, flags, newMoments(kurtosis_samp)),
|
||||
db.CreateWindowFunction("covar_pop", 2, flags, newCovariance(var_pop)),
|
||||
db.CreateWindowFunction("covar_samp", 2, flags, newCovariance(var_samp)),
|
||||
db.CreateWindowFunction("corr", 2, flags, newCovariance(corr)),
|
||||
@@ -88,6 +96,10 @@ const (
|
||||
var_samp
|
||||
stddev_pop
|
||||
stddev_samp
|
||||
skewness_pop
|
||||
skewness_samp
|
||||
kurtosis_pop
|
||||
kurtosis_samp
|
||||
corr
|
||||
regr_r2
|
||||
regr_sxx
|
||||
@@ -101,6 +113,23 @@ const (
|
||||
regr_json
|
||||
)
|
||||
|
||||
func special(kind int, n int64) (null, zero bool) {
|
||||
switch kind {
|
||||
case var_pop, stddev_pop, regr_sxx, regr_syy, regr_sxy:
|
||||
return n <= 0, n == 1
|
||||
case regr_avgx, regr_avgy:
|
||||
return n <= 0, false
|
||||
case kurtosis_samp:
|
||||
return n <= 3, false
|
||||
case skewness_samp:
|
||||
return n <= 2, false
|
||||
case skewness_pop:
|
||||
return n <= 1, n == 2
|
||||
default:
|
||||
return n <= 1, false
|
||||
}
|
||||
}
|
||||
|
||||
func newVariance(kind int) func() sqlite3.AggregateFunction {
|
||||
return func() sqlite3.AggregateFunction { return &variance{kind: kind} }
|
||||
}
|
||||
@@ -111,14 +140,11 @@ type variance struct {
|
||||
}
|
||||
|
||||
func (fn *variance) Value(ctx sqlite3.Context) {
|
||||
switch fn.n {
|
||||
case 1:
|
||||
switch fn.kind {
|
||||
case var_pop, stddev_pop:
|
||||
ctx.ResultFloat(0)
|
||||
}
|
||||
switch null, zero := special(fn.kind, fn.n); {
|
||||
case zero:
|
||||
ctx.ResultFloat(0)
|
||||
return
|
||||
case 0:
|
||||
case null:
|
||||
return
|
||||
}
|
||||
|
||||
@@ -166,18 +192,11 @@ func (fn *covariance) Value(ctx sqlite3.Context) {
|
||||
ctx.ResultInt64(fn.regr_count())
|
||||
return
|
||||
}
|
||||
switch fn.n {
|
||||
case 1:
|
||||
switch fn.kind {
|
||||
case var_pop, stddev_pop, regr_sxx, regr_syy, regr_sxy:
|
||||
ctx.ResultFloat(0)
|
||||
return
|
||||
case regr_avgx, regr_avgy:
|
||||
break
|
||||
default:
|
||||
return
|
||||
}
|
||||
case 0:
|
||||
switch null, zero := special(fn.kind, fn.n); {
|
||||
case zero:
|
||||
ctx.ResultFloat(0)
|
||||
return
|
||||
case null:
|
||||
return
|
||||
}
|
||||
|
||||
@@ -234,3 +253,51 @@ func (fn *covariance) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
||||
fn.dequeue(fa, fb)
|
||||
}
|
||||
}
|
||||
|
||||
func newMoments(kind int) func() sqlite3.AggregateFunction {
|
||||
return func() sqlite3.AggregateFunction { return &momentfn{kind: kind} }
|
||||
}
|
||||
|
||||
type momentfn struct {
|
||||
kind int
|
||||
moments
|
||||
}
|
||||
|
||||
func (fn *momentfn) Value(ctx sqlite3.Context) {
|
||||
switch null, zero := special(fn.kind, fn.n); {
|
||||
case zero:
|
||||
ctx.ResultFloat(0)
|
||||
return
|
||||
case null:
|
||||
return
|
||||
}
|
||||
|
||||
var r float64
|
||||
switch fn.kind {
|
||||
case skewness_pop:
|
||||
r = fn.skewness_pop()
|
||||
case skewness_samp:
|
||||
r = fn.skewness_samp()
|
||||
case kurtosis_pop:
|
||||
r = fn.kurtosis_pop()
|
||||
case kurtosis_samp:
|
||||
r = fn.kurtosis_samp()
|
||||
}
|
||||
ctx.ResultFloat(r)
|
||||
}
|
||||
|
||||
func (fn *momentfn) Step(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
||||
a := arg[0]
|
||||
f := a.Float()
|
||||
if f != 0.0 || a.NumericType() != sqlite3.NULL {
|
||||
fn.enqueue(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (fn *momentfn) Inverse(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
||||
a := arg[0]
|
||||
f := a.Float()
|
||||
if f != 0.0 || a.NumericType() != sqlite3.NULL {
|
||||
fn.dequeue(f)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,7 +49,9 @@ func TestRegister_variance(t *testing.T) {
|
||||
SELECT
|
||||
sum(x), avg(x),
|
||||
var_samp(x), var_pop(x),
|
||||
stddev_samp(x), stddev_pop(x)
|
||||
stddev_samp(x), stddev_pop(x),
|
||||
skewness_samp(x), skewness_pop(x),
|
||||
kurtosis_samp(x), kurtosis_pop(x)
|
||||
FROM data`)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -73,13 +75,26 @@ func TestRegister_variance(t *testing.T) {
|
||||
if got := stmt.ColumnFloat(5); got != math.Sqrt(22.5) {
|
||||
t.Errorf("got %v, want √22.5", got)
|
||||
}
|
||||
if got := stmt.ColumnFloat(6); got != 0 {
|
||||
t.Errorf("got %v, want zero", got)
|
||||
}
|
||||
if got := stmt.ColumnFloat(7); got != 0 {
|
||||
t.Errorf("got %v, want zero", got)
|
||||
}
|
||||
if got := stmt.ColumnFloat(8); float32(got) != -3.3 {
|
||||
t.Errorf("got %v, want -3.3", got)
|
||||
}
|
||||
if got := stmt.ColumnFloat(9); got != -1.64 {
|
||||
t.Errorf("got %v, want -1.64", got)
|
||||
}
|
||||
}
|
||||
stmt.Close()
|
||||
|
||||
stmt, _, err = db.Prepare(`
|
||||
SELECT
|
||||
var_samp(x) OVER (ROWS 1 PRECEDING),
|
||||
var_pop(x) OVER (ROWS 1 PRECEDING)
|
||||
var_pop(x) OVER (ROWS 1 PRECEDING),
|
||||
skewness_pop(x) OVER (ROWS 1 PRECEDING)
|
||||
FROM data`)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
Reference in New Issue
Block a user