2023-07-12 13:29:14 +01:00
|
|
|
// Package unicode provides an alternative to the SQLite ICU extension.
|
2023-07-11 13:38:29 +01:00
|
|
|
//
|
2023-08-31 16:30:52 +01:00
|
|
|
// Like the [ICU extension], it provides Unicode aware:
|
|
|
|
|
// - upper() and lower() functions,
|
2023-07-12 13:29:14 +01:00
|
|
|
// - LIKE and REGEXP operators,
|
2023-07-11 13:38:29 +01:00
|
|
|
// - collation sequences.
|
2023-07-12 13:29:14 +01:00
|
|
|
//
|
2023-08-31 16:30:52 +01:00
|
|
|
// The implementation is not 100% compatible with the [ICU extension]:
|
|
|
|
|
// - upper() and lower() use [strings.ToUpper], [strings.ToLower] and [cases];
|
2023-07-12 13:29:14 +01:00
|
|
|
// - the LIKE operator follows [strings.EqualFold] rules;
|
2023-12-19 10:53:58 -05:00
|
|
|
// - the REGEXP operator uses Go [regexp/syntax];
|
2023-07-12 13:29:14 +01:00
|
|
|
// - collation sequences use [collate].
|
|
|
|
|
//
|
|
|
|
|
// Expect subtle differences (e.g.) in the handling of Turkish case folding.
|
2023-08-31 16:30:52 +01:00
|
|
|
//
|
|
|
|
|
// [ICU extension]: https://sqlite.org/src/dir/ext/icu
|
2023-07-11 13:38:29 +01:00
|
|
|
package unicode
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"regexp"
|
|
|
|
|
"strings"
|
|
|
|
|
"unicode/utf8"
|
|
|
|
|
|
|
|
|
|
"github.com/ncruces/go-sqlite3"
|
|
|
|
|
"github.com/ncruces/go-sqlite3/internal/util"
|
|
|
|
|
"golang.org/x/text/cases"
|
|
|
|
|
"golang.org/x/text/collate"
|
|
|
|
|
"golang.org/x/text/language"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Register registers Unicode aware functions for a database connection.
|
2023-07-12 13:29:14 +01:00
|
|
|
func Register(db *sqlite3.Conn) {
|
2023-07-11 13:38:29 +01:00
|
|
|
flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS
|
|
|
|
|
|
|
|
|
|
db.CreateFunction("like", 2, flags, like)
|
|
|
|
|
db.CreateFunction("like", 3, flags, like)
|
|
|
|
|
db.CreateFunction("upper", 1, flags, upper)
|
|
|
|
|
db.CreateFunction("upper", 2, flags, upper)
|
|
|
|
|
db.CreateFunction("lower", 1, flags, lower)
|
|
|
|
|
db.CreateFunction("lower", 2, flags, lower)
|
|
|
|
|
db.CreateFunction("regexp", 2, flags, regex)
|
|
|
|
|
db.CreateFunction("icu_load_collation", 2, sqlite3.DIRECTONLY,
|
|
|
|
|
func(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
|
|
|
|
name := arg[1].Text()
|
|
|
|
|
if name == "" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-31 16:30:52 +01:00
|
|
|
err := RegisterCollation(db, arg[0].Text(), name)
|
2023-07-11 13:38:29 +01:00
|
|
|
if err != nil {
|
|
|
|
|
ctx.ResultError(err)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-31 16:30:52 +01:00
|
|
|
// RegisterCollation registers a Unicode collation sequence for a database connection.
|
|
|
|
|
func RegisterCollation(db *sqlite3.Conn, locale, name string) error {
|
|
|
|
|
tag, err := language.Parse(locale)
|
2023-08-10 13:39:52 +01:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return db.CreateCollation(name, collate.New(tag).Compare)
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-11 13:38:29 +01:00
|
|
|
func upper(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
|
|
|
|
if len(arg) == 1 {
|
2023-11-10 13:42:11 +00:00
|
|
|
ctx.ResultRawText(bytes.ToUpper(arg[0].RawText()))
|
2023-07-11 13:38:29 +01:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
cs, ok := ctx.GetAuxData(1).(cases.Caser)
|
|
|
|
|
if !ok {
|
2023-07-12 13:29:14 +01:00
|
|
|
t, err := language.Parse(arg[1].Text())
|
2023-07-11 13:38:29 +01:00
|
|
|
if err != nil {
|
|
|
|
|
ctx.ResultError(err)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
c := cases.Upper(t)
|
|
|
|
|
ctx.SetAuxData(1, c)
|
|
|
|
|
cs = c
|
|
|
|
|
}
|
2023-11-10 13:42:11 +00:00
|
|
|
ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
|
2023-07-11 13:38:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func lower(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
|
|
|
|
if len(arg) == 1 {
|
2023-11-10 13:42:11 +00:00
|
|
|
ctx.ResultRawText(bytes.ToLower(arg[0].RawText()))
|
2023-07-11 13:38:29 +01:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
cs, ok := ctx.GetAuxData(1).(cases.Caser)
|
|
|
|
|
if !ok {
|
2023-07-12 13:29:14 +01:00
|
|
|
t, err := language.Parse(arg[1].Text())
|
2023-07-11 13:38:29 +01:00
|
|
|
if err != nil {
|
|
|
|
|
ctx.ResultError(err)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
c := cases.Lower(t)
|
|
|
|
|
ctx.SetAuxData(1, c)
|
|
|
|
|
cs = c
|
|
|
|
|
}
|
2023-11-10 13:42:11 +00:00
|
|
|
ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
|
2023-07-11 13:38:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func regex(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
|
|
|
|
re, ok := ctx.GetAuxData(0).(*regexp.Regexp)
|
|
|
|
|
if !ok {
|
|
|
|
|
r, err := regexp.Compile(arg[0].Text())
|
|
|
|
|
if err != nil {
|
|
|
|
|
ctx.ResultError(err)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
re = r
|
|
|
|
|
ctx.SetAuxData(0, re)
|
|
|
|
|
}
|
2023-11-27 14:57:04 +00:00
|
|
|
ctx.ResultBool(re.Match(arg[1].RawText()))
|
2023-07-11 13:38:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func like(ctx sqlite3.Context, arg ...sqlite3.Value) {
|
|
|
|
|
escape := rune(-1)
|
|
|
|
|
if len(arg) == 3 {
|
|
|
|
|
var size int
|
2023-11-27 14:57:04 +00:00
|
|
|
b := arg[2].RawText()
|
2023-07-11 13:38:29 +01:00
|
|
|
escape, size = utf8.DecodeRune(b)
|
|
|
|
|
if size != len(b) {
|
|
|
|
|
ctx.ResultError(util.ErrorString("ESCAPE expression must be a single character"))
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type likeData struct {
|
|
|
|
|
*regexp.Regexp
|
|
|
|
|
escape rune
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
re, ok := ctx.GetAuxData(0).(likeData)
|
|
|
|
|
if !ok || re.escape != escape {
|
2023-07-26 02:02:39 +01:00
|
|
|
re = likeData{
|
|
|
|
|
regexp.MustCompile(like2regex(arg[0].Text(), escape)),
|
|
|
|
|
escape,
|
2023-07-11 13:38:29 +01:00
|
|
|
}
|
|
|
|
|
ctx.SetAuxData(0, re)
|
|
|
|
|
}
|
2023-11-27 14:57:04 +00:00
|
|
|
ctx.ResultBool(re.Match(arg[1].RawText()))
|
2023-07-11 13:38:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func like2regex(pattern string, escape rune) string {
|
|
|
|
|
var re strings.Builder
|
|
|
|
|
start := 0
|
|
|
|
|
literal := false
|
2023-07-12 13:29:14 +01:00
|
|
|
re.Grow(len(pattern) + 10)
|
|
|
|
|
re.WriteString(`(?is)\A`) // case insensitive, . matches any character
|
2023-07-11 13:38:29 +01:00
|
|
|
for i, r := range pattern {
|
|
|
|
|
if start < 0 {
|
|
|
|
|
start = i
|
|
|
|
|
}
|
|
|
|
|
if literal {
|
|
|
|
|
literal = false
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
var symbol string
|
|
|
|
|
switch r {
|
|
|
|
|
case '_':
|
|
|
|
|
symbol = `.`
|
|
|
|
|
case '%':
|
|
|
|
|
symbol = `.*`
|
|
|
|
|
case escape:
|
|
|
|
|
literal = true
|
|
|
|
|
default:
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
re.WriteString(regexp.QuoteMeta(pattern[start:i]))
|
|
|
|
|
re.WriteString(symbol)
|
|
|
|
|
start = -1
|
|
|
|
|
}
|
|
|
|
|
if start >= 0 {
|
|
|
|
|
re.WriteString(regexp.QuoteMeta(pattern[start:]))
|
|
|
|
|
}
|
2023-07-12 13:29:14 +01:00
|
|
|
re.WriteString(`\z`)
|
2023-07-11 13:38:29 +01:00
|
|
|
return re.String()
|
|
|
|
|
}
|