From fc3a993c3ecd8816bc0cbe187245e4120842d9c2 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Sat, 21 Dec 2024 09:26:55 +0000 Subject: [PATCH] Parquet vtab. --- ext/parquet/go.mod | 23 ++++++++++++++++ ext/parquet/go.sum | 32 ++++++++++++++++++++++ ext/parquet/parquet.go | 62 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 ext/parquet/go.mod create mode 100644 ext/parquet/go.sum create mode 100644 ext/parquet/parquet.go diff --git a/ext/parquet/go.mod b/ext/parquet/go.mod new file mode 100644 index 0000000..cacbfc0 --- /dev/null +++ b/ext/parquet/go.mod @@ -0,0 +1,23 @@ +module github.com/ncruces/go-sqlite3/ext/parquet + +go 1.22 + +toolchain go1.23.0 + +require ( + github.com/ncruces/go-sqlite3 v0.21.0 + github.com/parquet-go/parquet-go v0.24.0 +) + +require ( + github.com/andybalholm/brotli v1.1.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/ncruces/julianday v1.0.0 // indirect + github.com/olekukonko/tablewriter v0.0.5 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/tetratelabs/wazero v1.8.2 // indirect + golang.org/x/sys v0.28.0 // indirect +) diff --git a/ext/parquet/go.sum b/ext/parquet/go.sum new file mode 100644 index 0000000..0de8cce --- /dev/null +++ b/ext/parquet/go.sum @@ -0,0 +1,32 @@ +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/ncruces/go-sqlite3 v0.21.0 h1:EwKFoy1hHEopN4sFZarmi+McXdbCcbTuLixhEayXVbQ= +github.com/ncruces/go-sqlite3 v0.21.0/go.mod h1:zxMOaSG5kFYVFK4xQa0pdwIszqxqJ0W0BxBgwdrNjuA= +github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M= +github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= +github.com/parquet-go/parquet-go v0.24.0 h1:VrsifmLPDnas8zpoHmYiWDZ1YHzLmc7NmNwPGkI2JM4= +github.com/parquet-go/parquet-go v0.24.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/tetratelabs/wazero v1.8.2 h1:yIgLR/b2bN31bjxwXHD8a3d+BogigR952csSDdLYEv4= +github.com/tetratelabs/wazero v1.8.2/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= diff --git a/ext/parquet/parquet.go b/ext/parquet/parquet.go new file mode 100644 index 0000000..4fef086 --- /dev/null +++ b/ext/parquet/parquet.go @@ -0,0 +1,62 @@ +package parquet + +import ( + "os" + "strings" + + "github.com/ncruces/go-sqlite3" + "github.com/ncruces/go-sqlite3/internal/util" + "github.com/ncruces/go-sqlite3/util/osutil" + "github.com/ncruces/go-sqlite3/util/sql3util" + "github.com/parquet-go/parquet-go" +) + +func Register(db *sqlite3.Conn) error { + declare := func(db *sqlite3.Conn, _, _, _ string, arg ...string) (_ *table, err error) { + if len(arg) == 0 { + return nil, util.ErrorString(`parquet: must specify a filename`) + } + + file, err := osutil.OpenFile(sql3util.Unquote(arg[0]), os.O_RDONLY, 0) + if err != nil { + return nil, err + } + + reader := parquet.NewReader(file) + + column := make(map[int]string) + + var schema strings.Builder + schema.WriteString("CREATE TABLE x(") + for i, field := range reader.Schema().Fields() { + if i > 0 { + schema.WriteByte(',') + } + schema.WriteString(sqlite3.QuoteIdentifier(field.Name())) + schema.WriteByte(' ') + switch field.Type().Kind() { + case parquet.Boolean: + schema.WriteString("BOOLEAN") + case parquet.Int32, parquet.Int64, parquet.Int96: + schema.WriteString("INTEGER") + case parquet.Float, parquet.Double: + schema.WriteString("REAL") + case parquet.ByteArray, parquet.FixedLenByteArray: + schema.WriteString("TEXT") + } + // Save the column name + column[i] = field.Name() + } + schema.WriteString(");") + err = db.DeclareVTab(schema.String()) + if err != nil { + return nil, err + } + return &table{}, nil + } + + return sqlite3.CreateModule(db, "parquet", declare, declare) +} + +type table struct { +}