Unicode tests, fixes.

This commit is contained in:
Nuno Cruces
2023-07-12 13:29:14 +01:00
parent 72db90efdf
commit ff0cb6fb88
2 changed files with 135 additions and 16 deletions

View File

@@ -1,9 +1,17 @@
// Package unicode provides a replacement for the SQLite ICU extension.
// Package unicode provides an alternative to the SQLite ICU extension.
//
// Provides Unicode aware:
// - upper and lower functions,
// - LIKE and REGEX operators,
// - LIKE and REGEXP operators,
// - collation sequences.
//
// This package is not 100% compatible with the ICU extension:
// - upper and lower use [strings.ToUpper], [strings.ToLower] and [cases];
// - the LIKE operator follows [strings.EqualFold] rules;
// - the REGEXP operator uses Go [regex/syntax];
// - collation sequences use [collate].
//
// Expect subtle differences (e.g.) in the handling of Turkish case folding.
package unicode
import (
@@ -20,7 +28,7 @@ import (
)
// Register registers Unicode aware functions for a database connection.
func Register(db sqlite3.Conn) {
func Register(db *sqlite3.Conn) {
flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS
db.CreateFunction("like", 2, flags, like)
@@ -58,7 +66,7 @@ func upper(ctx sqlite3.Context, arg ...sqlite3.Value) {
}
cs, ok := ctx.GetAuxData(1).(cases.Caser)
if !ok {
t, err := language.Parse(arg[0].Text())
t, err := language.Parse(arg[1].Text())
if err != nil {
ctx.ResultError(err)
return
@@ -77,7 +85,7 @@ func lower(ctx sqlite3.Context, arg ...sqlite3.Value) {
}
cs, ok := ctx.GetAuxData(1).(cases.Caser)
if !ok {
t, err := language.Parse(arg[0].Text())
t, err := language.Parse(arg[1].Text())
if err != nil {
ctx.ResultError(err)
return
@@ -137,7 +145,8 @@ func like2regex(pattern string, escape rune) string {
var re strings.Builder
start := 0
literal := false
re.WriteString(`(?is)`) // case insensitive, . matches any character
re.Grow(len(pattern) + 10)
re.WriteString(`(?is)\A`) // case insensitive, . matches any character
for i, r := range pattern {
if start < 0 {
start = i
@@ -164,5 +173,6 @@ func like2regex(pattern string, escape rune) string {
if start >= 0 {
re.WriteString(regexp.QuoteMeta(pattern[start:]))
}
re.WriteString(`\z`)
return re.String()
}

View File

@@ -1,25 +1,134 @@
package unicode
import "testing"
import (
"errors"
"testing"
"github.com/ncruces/go-sqlite3"
_ "github.com/ncruces/go-sqlite3/embed"
)
func TestRegister(t *testing.T) {
t.Parallel()
db, err := sqlite3.Open(":memory:")
if err != nil {
t.Fatal(err)
}
defer db.Close()
exec := func(fn string) string {
stmt, _, err := db.Prepare(`SELECT ` + fn)
if err != nil {
t.Fatal(err)
}
defer stmt.Close()
if stmt.Step() {
return stmt.ColumnText(0)
}
t.Fatal(stmt.Err())
return ""
}
Register(db)
tests := []struct {
test string
want string
}{
{`upper('hello')`, "HELLO"},
{`lower('HELLO')`, "hello"},
{`upper('привет')`, "ПРИВЕТ"},
{`lower('ПРИВЕТ')`, "привет"},
{`upper('istanbul')`, "ISTANBUL"},
{`upper('istanbul', 'tr-TR')`, "İSTANBUL"},
{`lower('Dünyanın İlk Borsası', 'tr-TR')`, "dünyanın ilk borsası"},
{`upper('Dünyanın İlk Borsası', 'tr-TR')`, "DÜNYANIN İLK BORSASI"},
{`'Hello' REGEXP 'ell'`, "1"},
{`'Hello' REGEXP 'el.'`, "1"},
{`'Hello' LIKE 'hel_'`, "0"},
{`'Hello' LIKE 'hel%'`, "1"},
{`'Hello' LIKE 'h_llo'`, "1"},
{`'Hello' LIKE 'hello'`, "1"},
{`'Привет' LIKE 'ПРИВЕТ'`, "1"},
{`'100%' LIKE '100|%' ESCAPE '|'`, "1"},
}
for _, tt := range tests {
t.Run(tt.test, func(t *testing.T) {
if got := exec(tt.test); got != tt.want {
t.Errorf("exec(%q) = %q, want %q", tt.test, got, tt.want)
}
})
}
}
func TestRegister_error(t *testing.T) {
t.Parallel()
db, err := sqlite3.Open(":memory:")
if err != nil {
t.Fatal(err)
}
defer db.Close()
Register(db)
err = db.Exec(`SELECT upper('hello', 'enUS')`)
if err == nil {
t.Error("want error")
}
if !errors.Is(err, sqlite3.ERROR) {
t.Errorf("got %v, want sqlite3.ERROR", err)
}
err = db.Exec(`SELECT lower('hello', 'enUS')`)
if err == nil {
t.Error("want error")
}
if !errors.Is(err, sqlite3.ERROR) {
t.Errorf("got %v, want sqlite3.ERROR", err)
}
err = db.Exec(`SELECT 'hello' REGEXP '\'`)
if err == nil {
t.Error("want error")
}
if !errors.Is(err, sqlite3.ERROR) {
t.Errorf("got %v, want sqlite3.ERROR", err)
}
err = db.Exec(`SELECT 'hello' LIKE 'HELLO' ESCAPE '\\' `)
if err == nil {
t.Error("want error")
}
if !errors.Is(err, sqlite3.ERROR) {
t.Errorf("got %v, want sqlite3.ERROR", err)
}
}
func Test_like2regex(t *testing.T) {
const prefix = `(?is)\A`
const sufix = `\z`
tests := []struct {
pattern string
escape rune
want string
}{
{`a`, -1, `(?is)a`},
{`a.`, -1, `(?is)a\.`},
{`a%`, -1, `(?is)a.*`},
{`a\`, -1, `(?is)a\\`},
{`a_b`, -1, `(?is)a.b`},
{`a|b`, '|', `(?is)ab`},
{`a|_`, '|', `(?is)a_`},
{`a`, -1, `a`},
{`a.`, -1, `a\.`},
{`a%`, -1, `a.*`},
{`a\`, -1, `a\\`},
{`a_b`, -1, `a.b`},
{`a|b`, '|', `ab`},
{`a|_`, '|', `a_`},
}
for _, tt := range tests {
t.Run(tt.pattern, func(t *testing.T) {
if got := like2regex(tt.pattern, tt.escape); got != tt.want {
t.Errorf("like2regex() = %v, want %v", got, tt.want)
want := prefix + tt.want + sufix
if got := like2regex(tt.pattern, tt.escape); got != want {
t.Errorf("like2regex() = %q, want %q", got, want)
}
})
}