2023-11-23 03:28:56 +00:00
|
|
|
// Package csv provides a CSV virtual table.
|
|
|
|
|
//
|
|
|
|
|
// The CSV virtual table reads RFC 4180 formatted comma-separated values,
|
|
|
|
|
// and returns that content as if it were rows and columns of an SQL table.
|
|
|
|
|
//
|
|
|
|
|
// https://sqlite.org/csv.html
|
|
|
|
|
package csv
|
|
|
|
|
|
|
|
|
|
import (
|
2023-12-19 00:13:51 +00:00
|
|
|
"bufio"
|
2023-11-23 03:28:56 +00:00
|
|
|
"encoding/csv"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
2023-12-19 00:13:51 +00:00
|
|
|
"io/fs"
|
2024-06-17 23:44:37 +01:00
|
|
|
"strconv"
|
2023-11-23 03:28:56 +00:00
|
|
|
"strings"
|
|
|
|
|
|
|
|
|
|
"github.com/ncruces/go-sqlite3"
|
2024-07-04 15:28:49 +01:00
|
|
|
"github.com/ncruces/go-sqlite3/internal/util"
|
2024-01-03 12:47:49 +00:00
|
|
|
"github.com/ncruces/go-sqlite3/util/osutil"
|
2024-10-22 23:32:57 +01:00
|
|
|
"github.com/ncruces/go-sqlite3/util/sql3util"
|
2023-11-23 03:28:56 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Register registers the CSV virtual table.
|
2023-12-19 15:24:54 +00:00
|
|
|
// If a filename is specified, [os.Open] is used to open the file.
|
2024-07-08 12:06:57 +01:00
|
|
|
func Register(db *sqlite3.Conn) error {
|
|
|
|
|
return RegisterFS(db, osutil.FS{})
|
2023-11-23 09:54:18 +00:00
|
|
|
}
|
|
|
|
|
|
2023-12-19 15:24:54 +00:00
|
|
|
// RegisterFS registers the CSV virtual table.
|
2023-12-19 00:13:51 +00:00
|
|
|
// If a filename is specified, fsys is used to open the file.
|
2024-07-08 12:06:57 +01:00
|
|
|
func RegisterFS(db *sqlite3.Conn, fsys fs.FS) error {
|
2025-01-21 01:42:57 +00:00
|
|
|
declare := func(db *sqlite3.Conn, _, _, _ string, arg ...string) (_ *table, err error) {
|
2023-11-23 03:28:56 +00:00
|
|
|
var (
|
|
|
|
|
filename string
|
|
|
|
|
data string
|
|
|
|
|
schema string
|
|
|
|
|
header bool
|
|
|
|
|
columns int = -1
|
|
|
|
|
comma rune = ','
|
2024-06-20 11:02:23 +01:00
|
|
|
comment rune
|
2023-11-23 09:54:18 +00:00
|
|
|
|
2024-09-21 11:40:16 +01:00
|
|
|
done = util.Set[string]{}
|
2023-11-23 03:28:56 +00:00
|
|
|
)
|
|
|
|
|
|
2023-11-29 00:46:27 +00:00
|
|
|
for _, arg := range arg {
|
2024-10-22 23:32:57 +01:00
|
|
|
key, val := sql3util.NamedArg(arg)
|
2024-09-21 11:40:16 +01:00
|
|
|
if done.Contains(key) {
|
2023-11-23 03:28:56 +00:00
|
|
|
return nil, fmt.Errorf("csv: more than one %q parameter", key)
|
|
|
|
|
}
|
|
|
|
|
switch key {
|
|
|
|
|
case "filename":
|
2024-10-22 23:32:57 +01:00
|
|
|
filename = sql3util.Unquote(val)
|
2023-11-23 03:28:56 +00:00
|
|
|
case "data":
|
2024-10-22 23:32:57 +01:00
|
|
|
data = sql3util.Unquote(val)
|
2023-11-23 03:28:56 +00:00
|
|
|
case "schema":
|
2024-10-22 23:32:57 +01:00
|
|
|
schema = sql3util.Unquote(val)
|
2023-11-23 03:28:56 +00:00
|
|
|
case "header":
|
2024-01-03 00:54:30 +00:00
|
|
|
header, err = boolArg(key, val)
|
2023-11-23 03:28:56 +00:00
|
|
|
case "columns":
|
2024-01-03 00:54:30 +00:00
|
|
|
columns, err = uintArg(key, val)
|
2023-11-23 03:28:56 +00:00
|
|
|
case "comma":
|
2024-01-03 00:54:30 +00:00
|
|
|
comma, err = runeArg(key, val)
|
2024-06-20 11:02:23 +01:00
|
|
|
case "comment":
|
|
|
|
|
comment, err = runeArg(key, val)
|
2023-11-23 03:28:56 +00:00
|
|
|
default:
|
|
|
|
|
return nil, fmt.Errorf("csv: unknown %q parameter", key)
|
|
|
|
|
}
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-09-21 11:40:16 +01:00
|
|
|
done.Add(key)
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (filename == "") == (data == "") {
|
2024-07-04 15:28:49 +01:00
|
|
|
return nil, util.ErrorString(`csv: must specify either "filename" or "data" but not both`)
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
2024-10-07 13:22:31 +01:00
|
|
|
t := &table{
|
2024-06-20 11:02:23 +01:00
|
|
|
fsys: fsys,
|
|
|
|
|
name: filename,
|
|
|
|
|
data: data,
|
|
|
|
|
comma: comma,
|
|
|
|
|
comment: comment,
|
|
|
|
|
header: header,
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
2023-11-29 00:46:27 +00:00
|
|
|
if schema == "" {
|
|
|
|
|
var row []string
|
|
|
|
|
if header || columns < 0 {
|
2024-10-07 13:22:31 +01:00
|
|
|
csv, c, err := t.newReader()
|
2024-01-03 00:54:30 +00:00
|
|
|
defer c.Close()
|
2023-12-19 00:13:51 +00:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
row, err = csv.Read()
|
2023-11-29 00:46:27 +00:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
schema = getSchema(header, columns, row)
|
2024-06-17 23:44:37 +01:00
|
|
|
} else {
|
2024-10-07 13:22:31 +01:00
|
|
|
t.typs, err = getColumnAffinities(schema)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
2024-01-08 19:23:32 +00:00
|
|
|
err = db.DeclareVTab(schema)
|
2024-10-07 13:22:31 +01:00
|
|
|
if err == nil {
|
|
|
|
|
err = db.VTabConfig(sqlite3.VTAB_DIRECTONLY)
|
2023-11-23 09:54:18 +00:00
|
|
|
}
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-10-07 13:22:31 +01:00
|
|
|
return t, nil
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
2024-07-08 12:06:57 +01:00
|
|
|
return sqlite3.CreateModule(db, "csv", declare, declare)
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type table struct {
|
2024-06-20 11:02:23 +01:00
|
|
|
fsys fs.FS
|
|
|
|
|
name string
|
|
|
|
|
data string
|
|
|
|
|
typs []affinity
|
|
|
|
|
comma rune
|
|
|
|
|
comment rune
|
|
|
|
|
header bool
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (t *table) BestIndex(idx *sqlite3.IndexInfo) error {
|
|
|
|
|
idx.EstimatedCost = 1e6
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (t *table) Open() (sqlite3.VTabCursor, error) {
|
|
|
|
|
return &cursor{table: t}, nil
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-23 09:54:18 +00:00
|
|
|
func (t *table) Rename(new string) error {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-19 00:13:51 +00:00
|
|
|
func (t *table) Integrity(schema, table string, flags int) error {
|
|
|
|
|
if flags&1 != 0 {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2024-01-03 00:54:30 +00:00
|
|
|
csv, c, err := t.newReader()
|
2023-12-19 00:13:51 +00:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
2023-11-23 09:54:18 +00:00
|
|
|
}
|
2024-01-03 00:54:30 +00:00
|
|
|
defer c.Close()
|
2023-12-19 00:13:51 +00:00
|
|
|
_, err = csv.ReadAll()
|
2023-11-23 09:54:18 +00:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-19 00:13:51 +00:00
|
|
|
func (t *table) newReader() (*csv.Reader, io.Closer, error) {
|
|
|
|
|
var r io.Reader
|
|
|
|
|
var c io.Closer
|
|
|
|
|
if t.name != "" {
|
|
|
|
|
f, err := t.fsys.Open(t.name)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, f, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buf := bufio.NewReader(f)
|
|
|
|
|
bom, err := buf.Peek(3)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, f, err
|
|
|
|
|
}
|
|
|
|
|
if string(bom) == "\xEF\xBB\xBF" {
|
|
|
|
|
buf.Discard(3)
|
2023-11-27 23:35:43 +00:00
|
|
|
}
|
2023-12-19 00:13:51 +00:00
|
|
|
|
|
|
|
|
r = buf
|
|
|
|
|
c = f
|
|
|
|
|
} else {
|
|
|
|
|
r = strings.NewReader(t.data)
|
|
|
|
|
c = io.NopCloser(r)
|
2023-11-27 23:35:43 +00:00
|
|
|
}
|
2023-12-19 00:13:51 +00:00
|
|
|
|
|
|
|
|
csv := csv.NewReader(r)
|
2023-11-23 03:28:56 +00:00
|
|
|
csv.ReuseRecord = true
|
|
|
|
|
csv.Comma = t.comma
|
2024-06-20 11:02:23 +01:00
|
|
|
csv.Comment = t.comment
|
2023-12-19 00:13:51 +00:00
|
|
|
return csv, c, nil
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type cursor struct {
|
2024-01-03 00:54:30 +00:00
|
|
|
table *table
|
|
|
|
|
closer io.Closer
|
|
|
|
|
csv *csv.Reader
|
|
|
|
|
row []string
|
|
|
|
|
rowID int64
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
|
2024-01-03 00:54:30 +00:00
|
|
|
func (c *cursor) Close() (err error) {
|
|
|
|
|
if c.closer != nil {
|
|
|
|
|
err = c.closer.Close()
|
|
|
|
|
c.closer = nil
|
|
|
|
|
}
|
|
|
|
|
return err
|
2023-12-19 00:13:51 +00:00
|
|
|
}
|
|
|
|
|
|
2023-11-23 03:28:56 +00:00
|
|
|
func (c *cursor) Filter(idxNum int, idxStr string, arg ...sqlite3.Value) error {
|
2024-01-03 00:54:30 +00:00
|
|
|
err := c.Close()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
c.csv, c.closer, err = c.table.newReader()
|
2023-12-19 00:13:51 +00:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2023-11-23 03:28:56 +00:00
|
|
|
if c.table.header {
|
2025-01-17 14:40:12 +00:00
|
|
|
err = c.Next() // skip header
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
c.rowID = 0
|
|
|
|
|
return c.Next()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (c *cursor) Next() (err error) {
|
|
|
|
|
c.rowID++
|
|
|
|
|
c.row, err = c.csv.Read()
|
|
|
|
|
if err != io.EOF {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (c *cursor) EOF() bool {
|
|
|
|
|
return c.row == nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (c *cursor) RowID() (int64, error) {
|
|
|
|
|
return c.rowID, nil
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-26 12:25:15 +01:00
|
|
|
func (c *cursor) Column(ctx sqlite3.Context, col int) error {
|
2023-11-23 03:28:56 +00:00
|
|
|
if col < len(c.row) {
|
2024-06-20 11:02:23 +01:00
|
|
|
typ := text
|
2024-06-17 23:44:37 +01:00
|
|
|
if col < len(c.table.typs) {
|
|
|
|
|
typ = c.table.typs[col]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
txt := c.row[col]
|
2024-06-20 11:02:23 +01:00
|
|
|
if txt == "" && typ != text {
|
2024-06-17 23:44:37 +01:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch typ {
|
|
|
|
|
case numeric, integer:
|
|
|
|
|
if strings.TrimLeft(txt, "+-0123456789") == "" {
|
|
|
|
|
if i, err := strconv.ParseInt(txt, 10, 64); err == nil {
|
|
|
|
|
ctx.ResultInt64(i)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
|
|
|
|
case real:
|
|
|
|
|
if strings.TrimLeft(txt, "+-.0123456789Ee") == "" {
|
|
|
|
|
if f, err := strconv.ParseFloat(txt, 64); err == nil {
|
|
|
|
|
ctx.ResultFloat(f)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
2024-06-20 11:02:23 +01:00
|
|
|
default:
|
2024-06-17 23:44:37 +01:00
|
|
|
}
|
2024-06-20 11:02:23 +01:00
|
|
|
ctx.ResultText(txt)
|
2023-11-23 03:28:56 +00:00
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|