diff --git a/ext/unicode/unicode.go b/ext/unicode/unicode.go index 113fc23..2955965 100644 --- a/ext/unicode/unicode.go +++ b/ext/unicode/unicode.go @@ -1,9 +1,17 @@ -// Package unicode provides a replacement for the SQLite ICU extension. +// Package unicode provides an alternative to the SQLite ICU extension. // // Provides Unicode aware: // - upper and lower functions, -// - LIKE and REGEX operators, +// - LIKE and REGEXP operators, // - collation sequences. +// +// This package is not 100% compatible with the ICU extension: +// - upper and lower use [strings.ToUpper], [strings.ToLower] and [cases]; +// - the LIKE operator follows [strings.EqualFold] rules; +// - the REGEXP operator uses Go [regex/syntax]; +// - collation sequences use [collate]. +// +// Expect subtle differences (e.g.) in the handling of Turkish case folding. package unicode import ( @@ -20,7 +28,7 @@ import ( ) // Register registers Unicode aware functions for a database connection. -func Register(db sqlite3.Conn) { +func Register(db *sqlite3.Conn) { flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS db.CreateFunction("like", 2, flags, like) @@ -58,7 +66,7 @@ func upper(ctx sqlite3.Context, arg ...sqlite3.Value) { } cs, ok := ctx.GetAuxData(1).(cases.Caser) if !ok { - t, err := language.Parse(arg[0].Text()) + t, err := language.Parse(arg[1].Text()) if err != nil { ctx.ResultError(err) return @@ -77,7 +85,7 @@ func lower(ctx sqlite3.Context, arg ...sqlite3.Value) { } cs, ok := ctx.GetAuxData(1).(cases.Caser) if !ok { - t, err := language.Parse(arg[0].Text()) + t, err := language.Parse(arg[1].Text()) if err != nil { ctx.ResultError(err) return @@ -137,7 +145,8 @@ func like2regex(pattern string, escape rune) string { var re strings.Builder start := 0 literal := false - re.WriteString(`(?is)`) // case insensitive, . matches any character + re.Grow(len(pattern) + 10) + re.WriteString(`(?is)\A`) // case insensitive, . matches any character for i, r := range pattern { if start < 0 { start = i @@ -164,5 +173,6 @@ func like2regex(pattern string, escape rune) string { if start >= 0 { re.WriteString(regexp.QuoteMeta(pattern[start:])) } + re.WriteString(`\z`) return re.String() } diff --git a/ext/unicode/unicode_test.go b/ext/unicode/unicode_test.go index 1679c80..6d9b085 100644 --- a/ext/unicode/unicode_test.go +++ b/ext/unicode/unicode_test.go @@ -1,25 +1,134 @@ package unicode -import "testing" +import ( + "errors" + "testing" + + "github.com/ncruces/go-sqlite3" + _ "github.com/ncruces/go-sqlite3/embed" +) + +func TestRegister(t *testing.T) { + t.Parallel() + + db, err := sqlite3.Open(":memory:") + if err != nil { + t.Fatal(err) + } + defer db.Close() + + exec := func(fn string) string { + stmt, _, err := db.Prepare(`SELECT ` + fn) + if err != nil { + t.Fatal(err) + } + defer stmt.Close() + + if stmt.Step() { + return stmt.ColumnText(0) + } + t.Fatal(stmt.Err()) + return "" + } + + Register(db) + + tests := []struct { + test string + want string + }{ + {`upper('hello')`, "HELLO"}, + {`lower('HELLO')`, "hello"}, + {`upper('привет')`, "ПРИВЕТ"}, + {`lower('ПРИВЕТ')`, "привет"}, + {`upper('istanbul')`, "ISTANBUL"}, + {`upper('istanbul', 'tr-TR')`, "İSTANBUL"}, + {`lower('Dünyanın İlk Borsası', 'tr-TR')`, "dünyanın ilk borsası"}, + {`upper('Dünyanın İlk Borsası', 'tr-TR')`, "DÜNYANIN İLK BORSASI"}, + {`'Hello' REGEXP 'ell'`, "1"}, + {`'Hello' REGEXP 'el.'`, "1"}, + {`'Hello' LIKE 'hel_'`, "0"}, + {`'Hello' LIKE 'hel%'`, "1"}, + {`'Hello' LIKE 'h_llo'`, "1"}, + {`'Hello' LIKE 'hello'`, "1"}, + {`'Привет' LIKE 'ПРИВЕТ'`, "1"}, + {`'100%' LIKE '100|%' ESCAPE '|'`, "1"}, + } + + for _, tt := range tests { + t.Run(tt.test, func(t *testing.T) { + if got := exec(tt.test); got != tt.want { + t.Errorf("exec(%q) = %q, want %q", tt.test, got, tt.want) + } + }) + } +} + +func TestRegister_error(t *testing.T) { + t.Parallel() + + db, err := sqlite3.Open(":memory:") + if err != nil { + t.Fatal(err) + } + defer db.Close() + + Register(db) + + err = db.Exec(`SELECT upper('hello', 'enUS')`) + if err == nil { + t.Error("want error") + } + if !errors.Is(err, sqlite3.ERROR) { + t.Errorf("got %v, want sqlite3.ERROR", err) + } + + err = db.Exec(`SELECT lower('hello', 'enUS')`) + if err == nil { + t.Error("want error") + } + if !errors.Is(err, sqlite3.ERROR) { + t.Errorf("got %v, want sqlite3.ERROR", err) + } + + err = db.Exec(`SELECT 'hello' REGEXP '\'`) + if err == nil { + t.Error("want error") + } + if !errors.Is(err, sqlite3.ERROR) { + t.Errorf("got %v, want sqlite3.ERROR", err) + } + + err = db.Exec(`SELECT 'hello' LIKE 'HELLO' ESCAPE '\\' `) + if err == nil { + t.Error("want error") + } + if !errors.Is(err, sqlite3.ERROR) { + t.Errorf("got %v, want sqlite3.ERROR", err) + } +} func Test_like2regex(t *testing.T) { + const prefix = `(?is)\A` + const sufix = `\z` tests := []struct { pattern string escape rune want string }{ - {`a`, -1, `(?is)a`}, - {`a.`, -1, `(?is)a\.`}, - {`a%`, -1, `(?is)a.*`}, - {`a\`, -1, `(?is)a\\`}, - {`a_b`, -1, `(?is)a.b`}, - {`a|b`, '|', `(?is)ab`}, - {`a|_`, '|', `(?is)a_`}, + {`a`, -1, `a`}, + {`a.`, -1, `a\.`}, + {`a%`, -1, `a.*`}, + {`a\`, -1, `a\\`}, + {`a_b`, -1, `a.b`}, + {`a|b`, '|', `ab`}, + {`a|_`, '|', `a_`}, } for _, tt := range tests { t.Run(tt.pattern, func(t *testing.T) { - if got := like2regex(tt.pattern, tt.escape); got != tt.want { - t.Errorf("like2regex() = %v, want %v", got, tt.want) + want := prefix + tt.want + sufix + if got := like2regex(tt.pattern, tt.escape); got != want { + t.Errorf("like2regex() = %q, want %q", got, want) } }) }