CSV type affinity (#102)

Use sqlite-createtable-parser compiled to Wasm to parse the CREATE TABLE statement.
This commit is contained in:
Nuno Cruces
2024-06-17 23:44:37 +01:00
committed by GitHub
parent 3719692349
commit 58e91052bb
14 changed files with 371 additions and 2 deletions

View File

@@ -40,6 +40,8 @@ func Test_uintArg(t *testing.T) {
} }
func Test_boolArg(t *testing.T) { func Test_boolArg(t *testing.T) {
t.Parallel()
tests := []struct { tests := []struct {
arg string arg string
key string key string
@@ -76,6 +78,8 @@ func Test_boolArg(t *testing.T) {
} }
func Test_runeArg(t *testing.T) { func Test_runeArg(t *testing.T) {
t.Parallel()
tests := []struct { tests := []struct {
arg string arg string
key string key string

View File

@@ -12,6 +12,7 @@ import (
"fmt" "fmt"
"io" "io"
"io/fs" "io/fs"
"strconv"
"strings" "strings"
"github.com/ncruces/go-sqlite3" "github.com/ncruces/go-sqlite3"
@@ -93,6 +94,8 @@ func RegisterFS(db *sqlite3.Conn, fsys fs.FS) {
} }
} }
schema = getSchema(header, columns, row) schema = getSchema(header, columns, row)
} else {
table.typs = getColumnAffinities(schema)
} }
err = db.DeclareVTab(schema) err = db.DeclareVTab(schema)
@@ -113,6 +116,7 @@ type table struct {
fsys fs.FS fsys fs.FS
name string name string
data string data string
typs []affinity
comma rune comma rune
header bool header bool
} }
@@ -226,7 +230,40 @@ func (c *cursor) RowID() (int64, error) {
func (c *cursor) Column(ctx *sqlite3.Context, col int) error { func (c *cursor) Column(ctx *sqlite3.Context, col int) error {
if col < len(c.row) { if col < len(c.row) {
ctx.ResultText(c.row[col]) var typ affinity
if col < len(c.table.typs) {
typ = c.table.typs[col]
}
txt := c.row[col]
if typ == blob {
ctx.ResultText(txt)
return nil
}
if txt == "" {
return nil
}
switch typ {
case numeric, integer:
if strings.TrimLeft(txt, "+-0123456789") == "" {
if i, err := strconv.ParseInt(txt, 10, 64); err == nil {
ctx.ResultInt64(i)
return nil
}
}
fallthrough
case real:
if strings.TrimLeft(txt, "+-.0123456789Ee") == "" {
if f, err := strconv.ParseFloat(txt, 64); err == nil {
ctx.ResultFloat(f)
return nil
}
}
fallthrough
case text:
ctx.ResultText(c.row[col])
}
} }
return nil return nil
} }

View File

@@ -113,6 +113,50 @@ Robert "Griesemer" "gri"`
} }
} }
func TestAffinity(t *testing.T) {
t.Parallel()
db, err := sqlite3.Open(":memory:")
if err != nil {
t.Fatal(err)
}
defer db.Close()
csv.Register(db)
const data = "01\n0.10\ne"
err = db.Exec(`
CREATE VIRTUAL TABLE temp.nums USING csv(
data = ` + sqlite3.Quote(data) + `,
schema = 'CREATE TABLE x(a numeric)'
)`)
if err != nil {
t.Fatal(err)
}
stmt, _, err := db.Prepare(`SELECT * FROM temp.nums`)
if err != nil {
t.Fatal(err)
}
defer stmt.Close()
if stmt.Step() {
if got := stmt.ColumnText(0); got != "1" {
t.Errorf("got %q want 1", got)
}
}
if stmt.Step() {
if got := stmt.ColumnText(0); got != "0.1" {
t.Errorf("got %q want 0.1", got)
}
}
if stmt.Step() {
if got := stmt.ColumnText(0); got != "e" {
t.Errorf("got %q want e", got)
}
}
}
func TestRegister_errors(t *testing.T) { func TestRegister_errors(t *testing.T) {
t.Parallel() t.Parallel()

54
ext/csv/types.go Normal file
View File

@@ -0,0 +1,54 @@
package csv
import (
_ "embed"
"strings"
"github.com/ncruces/go-sqlite3/util/vtabutil"
)
type affinity byte
const (
blob affinity = 0
text affinity = 1
numeric affinity = 2
integer affinity = 3
real affinity = 4
)
func getColumnAffinities(schema string) []affinity {
tab, err := vtabutil.Parse(schema)
if err != nil {
return nil
}
defer tab.Close()
types := make([]affinity, tab.NumColumns())
for i := range types {
col := tab.Column(i)
types[i] = getAffinity(col.Type())
}
return types
}
func getAffinity(declType string) affinity {
// https://sqlite.org/datatype3.html#determination_of_column_affinity
if declType == "" {
return blob
}
name := strings.ToUpper(declType)
if strings.Contains(name, "INT") {
return integer
}
if strings.Contains(name, "CHAR") || strings.Contains(name, "CLOB") || strings.Contains(name, "TEXT") {
return text
}
if strings.Contains(name, "BLOB") {
return blob
}
if strings.Contains(name, "REAL") || strings.Contains(name, "FLOA") || strings.Contains(name, "DOUB") {
return real
}
return numeric
}

35
ext/csv/types_test.go Normal file
View File

@@ -0,0 +1,35 @@
package csv
import (
_ "embed"
"testing"
)
func Test_getAffinity(t *testing.T) {
tests := []struct {
decl string
want affinity
}{
{"", blob},
{"INTEGER", integer},
{"TINYINT", integer},
{"TEXT", text},
{"CHAR", text},
{"CLOB", text},
{"BLOB", blob},
{"REAL", real},
{"FLOAT", real},
{"DOUBLE", real},
{"NUMERIC", numeric},
{"DECIMAL", numeric},
{"BOOLEAN", numeric},
{"DATETIME", numeric},
}
for _, tt := range tests {
t.Run(tt.decl, func(t *testing.T) {
if got := getAffinity(tt.decl); got != tt.want {
t.Errorf("getAffinity() = %v, want %v", got, tt.want)
}
})
}
}

8
util/vtabutil/README.md Normal file
View File

@@ -0,0 +1,8 @@
# Virtual Table utility functions
This package implements utilities mostly useful to virtual table implementations.
It also wraps a [parser](https://github.com/marcobambini/sqlite-createtable-parser)
for the [`CREATE`](https://sqlite.org/lang_createtable.html) and
[`ALTER TABLE`](https://sqlite.org/lang_altertable.html) commands,
created by [Marco Bambini](https://github.com/marcobambini).

View File

@@ -1,4 +1,3 @@
// Package ioutil implements virtual table utility functions.
package vtabutil package vtabutil
import "strings" import "strings"

145
util/vtabutil/parse.go Normal file
View File

@@ -0,0 +1,145 @@
package vtabutil
import (
"context"
"sync"
_ "embed"
"github.com/ncruces/go-sqlite3/internal/util"
"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/api"
)
const (
code = 4
base = 8
)
var (
//go:embed parse/sql3parse_table.wasm
binary []byte
ctx context.Context
once sync.Once
runtime wazero.Runtime
)
// Table holds metadata about a table.
type Table struct {
mod api.Module
ptr uint32
sql string
}
// Parse parses a [CREATE] or [ALTER TABLE] command.
//
// [CREATE]: https://sqlite.org/lang_createtable.html
// [ALTER TABLE]: https://sqlite.org/lang_altertable.html
func Parse(sql string) (*Table, error) {
once.Do(func() {
ctx = context.Background()
cfg := wazero.NewRuntimeConfigInterpreter().WithDebugInfoEnabled(false)
runtime = wazero.NewRuntimeWithConfig(ctx, cfg)
})
mod, err := runtime.InstantiateWithConfig(ctx, binary, wazero.NewModuleConfig().WithName(""))
if err != nil {
return nil, err
}
if buf, ok := mod.Memory().Read(base, uint32(len(sql))); ok {
copy(buf, sql)
}
r, err := mod.ExportedFunction("sql3parse_table").Call(ctx, base, uint64(len(sql)), code)
if err != nil {
return nil, err
}
c, _ := mod.Memory().ReadUint32Le(code)
if c == uint32(_MEMORY) {
panic(util.OOMErr)
}
if c != uint32(_NONE) {
return nil, ecode(c)
}
if r[0] == 0 {
return nil, nil
}
return &Table{
sql: sql,
mod: mod,
ptr: uint32(r[0]),
}, nil
}
// Close closes a table handle.
func (t *Table) Close() error {
mod := t.mod
t.mod = nil
return mod.Close(ctx)
}
// NumColumns returns the number of columns of the table.
func (t *Table) NumColumns() int {
r, err := t.mod.ExportedFunction("sql3table_num_columns").Call(ctx, uint64(t.ptr))
if err != nil {
panic(err)
}
return int(int32(r[0]))
}
// Column returns data for the ith column of the table.
//
// https://sqlite.org/lang_createtable.html#column_definitions
func (t *Table) Column(i int) Column {
r, err := t.mod.ExportedFunction("sql3table_get_column").Call(ctx, uint64(t.ptr), uint64(i))
if err != nil {
panic(err)
}
return Column{
tab: t,
ptr: uint32(r[0]),
}
}
// Column holds metadata about a column.
type Column struct {
tab *Table
ptr uint32
}
// Type returns the declared type of a column.
//
// https://sqlite.org/lang_createtable.html#column_data_types
func (c Column) Type() string {
r, err := c.tab.mod.ExportedFunction("sql3column_type").Call(ctx, uint64(c.ptr))
if err != nil {
panic(err)
}
if r[0] == 0 {
return ""
}
off, _ := c.tab.mod.Memory().ReadUint32Le(uint32(r[0]) + 0)
len, _ := c.tab.mod.Memory().ReadUint32Le(uint32(r[0]) + 4)
return c.tab.sql[off-base : off+len-base]
}
type ecode uint32
const (
_NONE ecode = iota
_MEMORY
_SYNTAX
_UNSUPPORTEDSQL
)
func (e ecode) Error() string {
switch e {
case _SYNTAX:
return "sql3parse: invalid syntax"
case _UNSUPPORTEDSQL:
return "sql3parse: unsupported SQL"
default:
panic(util.AssertErr())
}
}

2
util/vtabutil/parse/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
sql3parse_table.c
sql3parse_table.h

28
util/vtabutil/parse/build.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail
cd -P -- "$(dirname -- "$0")"
ROOT=../../../
BINARYEN="$ROOT/tools/binaryen-version_117/bin"
WASI_SDK="$ROOT/tools/wasi-sdk-22.0/bin"
"$WASI_SDK/clang" --target=wasm32-wasi -std=c17 -flto -g0 -Oz \
-Wall -Wextra -Wno-unused-parameter -Wno-unused-function \
-o sql3parse_table.wasm sql3parse_table.c \
-mexec-model=reactor \
-msimd128 -mmutable-globals -mmultivalue \
-mbulk-memory -mreference-types \
-mnontrapping-fptoint -msign-ext \
-fno-stack-protector -fno-stack-clash-protection \
-Wl,--stack-first \
-Wl,--import-undefined \
$(awk '{print "-Wl,--export="$0}' exports.txt)
trap 'rm -f sql3parse_table.tmp' EXIT
"$BINARYEN/wasm-ctor-eval" -g -c _initialize sql3parse_table.wasm -o sql3parse_table.tmp
"$BINARYEN/wasm-opt" -g --strip --strip-producers -c -Oz \
sql3parse_table.tmp -o sql3parse_table.wasm \
--enable-simd --enable-mutable-globals --enable-multivalue \
--enable-bulk-memory --enable-reference-types \
--enable-nontrapping-float-to-int --enable-sign-ext

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
cd -P -- "$(dirname -- "$0")"
curl -#OL "https://github.com/ncruces/sqlite-createtable-parser/raw/master/sql3parse_table.c"
curl -#OL "https://github.com/ncruces/sqlite-createtable-parser/raw/master/sql3parse_table.h"

View File

@@ -0,0 +1,4 @@
sql3parse_table
sql3table_get_column
sql3table_num_columns
sql3column_type

Binary file not shown.

View File

@@ -0,0 +1,2 @@
// Package vtabutil implements virtual table utility functions.
package vtabutil