Files
sqlite3/ext/csv/csv.go

277 lines
5.3 KiB
Go
Raw Normal View History

2023-11-23 03:28:56 +00:00
// Package csv provides a CSV virtual table.
//
// The CSV virtual table reads RFC 4180 formatted comma-separated values,
// and returns that content as if it were rows and columns of an SQL table.
//
// https://sqlite.org/csv.html
package csv
import (
2023-12-19 00:13:51 +00:00
"bufio"
2023-11-23 03:28:56 +00:00
"encoding/csv"
"fmt"
"io"
2023-12-19 00:13:51 +00:00
"io/fs"
"strconv"
2023-11-23 03:28:56 +00:00
"strings"
"github.com/ncruces/go-sqlite3"
2024-07-04 15:28:49 +01:00
"github.com/ncruces/go-sqlite3/internal/util"
2024-01-03 12:47:49 +00:00
"github.com/ncruces/go-sqlite3/util/osutil"
2024-01-03 00:54:30 +00:00
"github.com/ncruces/go-sqlite3/util/vtabutil"
2023-11-23 03:28:56 +00:00
)
// Register registers the CSV virtual table.
2023-12-19 15:24:54 +00:00
// If a filename is specified, [os.Open] is used to open the file.
2024-07-08 12:06:57 +01:00
func Register(db *sqlite3.Conn) error {
return RegisterFS(db, osutil.FS{})
2023-11-23 09:54:18 +00:00
}
2023-12-19 15:24:54 +00:00
// RegisterFS registers the CSV virtual table.
2023-12-19 00:13:51 +00:00
// If a filename is specified, fsys is used to open the file.
2024-07-08 12:06:57 +01:00
func RegisterFS(db *sqlite3.Conn, fsys fs.FS) error {
2023-11-29 00:46:27 +00:00
declare := func(db *sqlite3.Conn, _, _, _ string, arg ...string) (_ *table, err error) {
2023-11-23 03:28:56 +00:00
var (
filename string
data string
schema string
header bool
columns int = -1
comma rune = ','
2024-06-20 11:02:23 +01:00
comment rune
2023-11-23 09:54:18 +00:00
done = map[string]struct{}{}
2023-11-23 03:28:56 +00:00
)
2023-11-29 00:46:27 +00:00
for _, arg := range arg {
2024-01-03 00:54:30 +00:00
key, val := vtabutil.NamedArg(arg)
2023-11-23 03:28:56 +00:00
if _, ok := done[key]; ok {
return nil, fmt.Errorf("csv: more than one %q parameter", key)
}
switch key {
case "filename":
2024-01-03 00:54:30 +00:00
filename = vtabutil.Unquote(val)
2023-11-23 03:28:56 +00:00
case "data":
2024-01-03 00:54:30 +00:00
data = vtabutil.Unquote(val)
2023-11-23 03:28:56 +00:00
case "schema":
2024-01-03 00:54:30 +00:00
schema = vtabutil.Unquote(val)
2023-11-23 03:28:56 +00:00
case "header":
2024-01-03 00:54:30 +00:00
header, err = boolArg(key, val)
2023-11-23 03:28:56 +00:00
case "columns":
2024-01-03 00:54:30 +00:00
columns, err = uintArg(key, val)
2023-11-23 03:28:56 +00:00
case "comma":
2024-01-03 00:54:30 +00:00
comma, err = runeArg(key, val)
2024-06-20 11:02:23 +01:00
case "comment":
comment, err = runeArg(key, val)
2023-11-23 03:28:56 +00:00
default:
return nil, fmt.Errorf("csv: unknown %q parameter", key)
}
if err != nil {
return nil, err
}
done[key] = struct{}{}
}
if (filename == "") == (data == "") {
2024-07-04 15:28:49 +01:00
return nil, util.ErrorString(`csv: must specify either "filename" or "data" but not both`)
2023-11-23 03:28:56 +00:00
}
table := &table{
2024-06-20 11:02:23 +01:00
fsys: fsys,
name: filename,
data: data,
comma: comma,
comment: comment,
header: header,
2023-11-23 03:28:56 +00:00
}
2023-11-29 00:46:27 +00:00
if schema == "" {
var row []string
if header || columns < 0 {
2024-01-03 00:54:30 +00:00
csv, c, err := table.newReader()
defer c.Close()
2023-12-19 00:13:51 +00:00
if err != nil {
return nil, err
}
row, err = csv.Read()
2023-11-29 00:46:27 +00:00
if err != nil {
return nil, err
}
2023-11-23 03:28:56 +00:00
}
schema = getSchema(header, columns, row)
} else {
2024-06-19 23:25:05 +01:00
defer func() {
if err == nil {
table.typs, err = getColumnAffinities(schema)
}
}()
2023-11-23 03:28:56 +00:00
}
2024-01-08 19:23:32 +00:00
err = db.DeclareVTab(schema)
2023-11-23 09:54:18 +00:00
if err != nil {
return nil, err
}
2024-01-08 19:23:32 +00:00
err = db.VTabConfig(sqlite3.VTAB_DIRECTONLY)
2023-11-23 09:54:18 +00:00
if err != nil {
return nil, err
}
return table, nil
2023-11-23 03:28:56 +00:00
}
2024-07-08 12:06:57 +01:00
return sqlite3.CreateModule(db, "csv", declare, declare)
2023-11-23 03:28:56 +00:00
}
type table struct {
2024-06-20 11:02:23 +01:00
fsys fs.FS
name string
data string
typs []affinity
comma rune
comment rune
header bool
2023-11-23 03:28:56 +00:00
}
func (t *table) BestIndex(idx *sqlite3.IndexInfo) error {
idx.EstimatedCost = 1e6
return nil
}
func (t *table) Open() (sqlite3.VTabCursor, error) {
return &cursor{table: t}, nil
}
2023-11-23 09:54:18 +00:00
func (t *table) Rename(new string) error {
return nil
}
2023-12-19 00:13:51 +00:00
func (t *table) Integrity(schema, table string, flags int) error {
if flags&1 != 0 {
return nil
}
2024-01-03 00:54:30 +00:00
csv, c, err := t.newReader()
2023-12-19 00:13:51 +00:00
if err != nil {
return err
2023-11-23 09:54:18 +00:00
}
2024-01-03 00:54:30 +00:00
defer c.Close()
2023-12-19 00:13:51 +00:00
_, err = csv.ReadAll()
2023-11-23 09:54:18 +00:00
return err
}
2023-12-19 00:13:51 +00:00
func (t *table) newReader() (*csv.Reader, io.Closer, error) {
var r io.Reader
var c io.Closer
if t.name != "" {
f, err := t.fsys.Open(t.name)
if err != nil {
return nil, f, err
}
buf := bufio.NewReader(f)
bom, err := buf.Peek(3)
if err != nil {
return nil, f, err
}
if string(bom) == "\xEF\xBB\xBF" {
buf.Discard(3)
2023-11-27 23:35:43 +00:00
}
2023-12-19 00:13:51 +00:00
r = buf
c = f
} else {
r = strings.NewReader(t.data)
c = io.NopCloser(r)
2023-11-27 23:35:43 +00:00
}
2023-12-19 00:13:51 +00:00
csv := csv.NewReader(r)
2023-11-23 03:28:56 +00:00
csv.ReuseRecord = true
csv.Comma = t.comma
2024-06-20 11:02:23 +01:00
csv.Comment = t.comment
2023-12-19 00:13:51 +00:00
return csv, c, nil
2023-11-23 03:28:56 +00:00
}
type cursor struct {
2024-01-03 00:54:30 +00:00
table *table
closer io.Closer
csv *csv.Reader
row []string
rowID int64
2023-11-23 03:28:56 +00:00
}
2024-01-03 00:54:30 +00:00
func (c *cursor) Close() (err error) {
if c.closer != nil {
err = c.closer.Close()
c.closer = nil
}
return err
2023-12-19 00:13:51 +00:00
}
2023-11-23 03:28:56 +00:00
func (c *cursor) Filter(idxNum int, idxStr string, arg ...sqlite3.Value) error {
2024-01-03 00:54:30 +00:00
err := c.Close()
if err != nil {
return err
}
c.csv, c.closer, err = c.table.newReader()
2023-12-19 00:13:51 +00:00
if err != nil {
return err
}
2023-11-23 03:28:56 +00:00
if c.table.header {
c.Next() // skip header
}
c.rowID = 0
return c.Next()
}
func (c *cursor) Next() (err error) {
c.rowID++
c.row, err = c.csv.Read()
if err != io.EOF {
return err
}
return nil
}
func (c *cursor) EOF() bool {
return c.row == nil
}
func (c *cursor) RowID() (int64, error) {
return c.rowID, nil
}
func (c *cursor) Column(ctx *sqlite3.Context, col int) error {
if col < len(c.row) {
2024-06-20 11:02:23 +01:00
typ := text
if col < len(c.table.typs) {
typ = c.table.typs[col]
}
txt := c.row[col]
2024-06-20 11:02:23 +01:00
if txt == "" && typ != text {
return nil
}
switch typ {
case numeric, integer:
if strings.TrimLeft(txt, "+-0123456789") == "" {
if i, err := strconv.ParseInt(txt, 10, 64); err == nil {
ctx.ResultInt64(i)
return nil
}
}
fallthrough
case real:
if strings.TrimLeft(txt, "+-.0123456789Ee") == "" {
if f, err := strconv.ParseFloat(txt, 64); err == nil {
ctx.ResultFloat(f)
return nil
}
}
fallthrough
2024-06-20 11:02:23 +01:00
default:
}
2024-06-20 11:02:23 +01:00
ctx.ResultText(txt)
2023-11-23 03:28:56 +00:00
}
return nil
}