Optimize regexp.

This commit is contained in:
Nuno Cruces
2024-11-04 19:30:10 +00:00
parent 034b9a3b4d
commit 81e7a94ca4
2 changed files with 48 additions and 5 deletions

View File

@@ -1,11 +1,11 @@
// Package regexp provides additional regular expression functions.
//
// It provides the following Unicode aware functions:
// - regexp_like(),
// - regexp_count(),
// - regexp_instr(),
// - regexp_substr(),
// - regexp_replace(),
// - regexp_like(text, pattern),
// - regexp_count(text, pattern [, start]),
// - regexp_instr(text, pattern [, start [, N [, endoption [, subexpr ]]]]),
// - regexp_substr(text, pattern [, start [, N [, subexpr ]]]),
// - regexp_replace(text, pattern, replacement [, start [, N ]]),
// - and a REGEXP operator.
//
// The implementation uses Go [regexp/syntax] for regular expressions.
@@ -16,6 +16,7 @@ package regexp
import (
"errors"
"regexp"
"strings"
"github.com/ncruces/go-sqlite3"
)
@@ -42,6 +43,25 @@ func Register(db *sqlite3.Conn) error {
db.CreateFunction("regexp_replace", 5, flags, regexReplace))
}
// GlobPrefix returns a GLOB for a regular expression
// appropriate to take advantage of the [LIKE optimization]
// in a query such as:
//
// SELECT column WHERE column GLOB :glob_prefix AND column REGEXP :regexp
//
// [LIKE optimization]: https://sqlite.org/optoverview.html#the_like_optimization
func GlobPrefix(re *regexp.Regexp) string {
prefix, complete := re.LiteralPrefix()
i := strings.IndexAny(prefix, "*?[")
if i < 0 {
if complete {
return prefix
}
i = len(prefix)
}
return prefix[:i] + "*"
}
func load(ctx sqlite3.Context, i int, expr string) (*regexp.Regexp, error) {
re, ok := ctx.GetAuxData(i).(*regexp.Regexp)
if !ok {

View File

@@ -2,6 +2,7 @@ package regexp
import (
"database/sql"
"regexp"
"testing"
"github.com/ncruces/go-sqlite3/driver"
@@ -101,3 +102,25 @@ func TestRegister_errors(t *testing.T) {
}
}
}
func TestGlobPrefix(t *testing.T) {
tests := []struct {
re string
want string
}{
{``, ""},
{`a`, "a"},
{`a*`, "*"},
{`a+`, "a*"},
{`ab*`, "a*"},
{`ab+`, "ab*"},
{`a\?b`, "a*"},
}
for _, tt := range tests {
t.Run(tt.re, func(t *testing.T) {
if got := GlobPrefix(regexp.MustCompile(tt.re)); got != tt.want {
t.Errorf("GlobPrefix() = %v, want %v", got, tt.want)
}
})
}
}