From 81e7a94ca49a1b182841dec79691f840bad9cc66 Mon Sep 17 00:00:00 2001 From: Nuno Cruces Date: Mon, 4 Nov 2024 19:30:10 +0000 Subject: [PATCH] Optimize regexp. --- ext/regexp/regexp.go | 30 +++++++++++++++++++++++++----- ext/regexp/regexp_test.go | 23 +++++++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/ext/regexp/regexp.go b/ext/regexp/regexp.go index 1d0aa30..8a1867f 100644 --- a/ext/regexp/regexp.go +++ b/ext/regexp/regexp.go @@ -1,11 +1,11 @@ // Package regexp provides additional regular expression functions. // // It provides the following Unicode aware functions: -// - regexp_like(), -// - regexp_count(), -// - regexp_instr(), -// - regexp_substr(), -// - regexp_replace(), +// - regexp_like(text, pattern), +// - regexp_count(text, pattern [, start]), +// - regexp_instr(text, pattern [, start [, N [, endoption [, subexpr ]]]]), +// - regexp_substr(text, pattern [, start [, N [, subexpr ]]]), +// - regexp_replace(text, pattern, replacement [, start [, N ]]), // - and a REGEXP operator. // // The implementation uses Go [regexp/syntax] for regular expressions. @@ -16,6 +16,7 @@ package regexp import ( "errors" "regexp" + "strings" "github.com/ncruces/go-sqlite3" ) @@ -42,6 +43,25 @@ func Register(db *sqlite3.Conn) error { db.CreateFunction("regexp_replace", 5, flags, regexReplace)) } +// GlobPrefix returns a GLOB for a regular expression +// appropriate to take advantage of the [LIKE optimization] +// in a query such as: +// +// SELECT column WHERE column GLOB :glob_prefix AND column REGEXP :regexp +// +// [LIKE optimization]: https://sqlite.org/optoverview.html#the_like_optimization +func GlobPrefix(re *regexp.Regexp) string { + prefix, complete := re.LiteralPrefix() + i := strings.IndexAny(prefix, "*?[") + if i < 0 { + if complete { + return prefix + } + i = len(prefix) + } + return prefix[:i] + "*" +} + func load(ctx sqlite3.Context, i int, expr string) (*regexp.Regexp, error) { re, ok := ctx.GetAuxData(i).(*regexp.Regexp) if !ok { diff --git a/ext/regexp/regexp_test.go b/ext/regexp/regexp_test.go index d8dc83e..d5852ef 100644 --- a/ext/regexp/regexp_test.go +++ b/ext/regexp/regexp_test.go @@ -2,6 +2,7 @@ package regexp import ( "database/sql" + "regexp" "testing" "github.com/ncruces/go-sqlite3/driver" @@ -101,3 +102,25 @@ func TestRegister_errors(t *testing.T) { } } } + +func TestGlobPrefix(t *testing.T) { + tests := []struct { + re string + want string + }{ + {``, ""}, + {`a`, "a"}, + {`a*`, "*"}, + {`a+`, "a*"}, + {`ab*`, "a*"}, + {`ab+`, "ab*"}, + {`a\?b`, "a*"}, + } + for _, tt := range tests { + t.Run(tt.re, func(t *testing.T) { + if got := GlobPrefix(regexp.MustCompile(tt.re)); got != tt.want { + t.Errorf("GlobPrefix() = %v, want %v", got, tt.want) + } + }) + } +}