Files
sqlite3/ext/unicode/unicode.go

183 lines
4.3 KiB
Go
Raw Normal View History

2023-07-12 13:29:14 +01:00
// Package unicode provides an alternative to the SQLite ICU extension.
2023-07-11 13:38:29 +01:00
//
2023-08-31 16:30:52 +01:00
// Like the [ICU extension], it provides Unicode aware:
// - upper() and lower() functions,
2023-07-12 13:29:14 +01:00
// - LIKE and REGEXP operators,
2023-07-11 13:38:29 +01:00
// - collation sequences.
2023-07-12 13:29:14 +01:00
//
2023-08-31 16:30:52 +01:00
// The implementation is not 100% compatible with the [ICU extension]:
// - upper() and lower() use [strings.ToUpper], [strings.ToLower] and [cases];
2023-07-12 13:29:14 +01:00
// - the LIKE operator follows [strings.EqualFold] rules;
2023-12-19 10:53:58 -05:00
// - the REGEXP operator uses Go [regexp/syntax];
2023-07-12 13:29:14 +01:00
// - collation sequences use [collate].
//
// Expect subtle differences (e.g.) in the handling of Turkish case folding.
2023-08-31 16:30:52 +01:00
//
// [ICU extension]: https://sqlite.org/src/dir/ext/icu
2023-07-11 13:38:29 +01:00
package unicode
import (
"bytes"
2024-07-08 12:06:57 +01:00
"errors"
2023-07-11 13:38:29 +01:00
"regexp"
"strings"
"unicode/utf8"
"github.com/ncruces/go-sqlite3"
"github.com/ncruces/go-sqlite3/internal/util"
"golang.org/x/text/cases"
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
// Register registers Unicode aware functions for a database connection.
2024-07-08 12:06:57 +01:00
func Register(db *sqlite3.Conn) error {
2023-07-11 13:38:29 +01:00
flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS
2024-07-08 12:06:57 +01:00
return errors.Join(
db.CreateFunction("like", 2, flags, like),
db.CreateFunction("like", 3, flags, like),
db.CreateFunction("upper", 1, flags, upper),
db.CreateFunction("upper", 2, flags, upper),
db.CreateFunction("lower", 1, flags, lower),
db.CreateFunction("lower", 2, flags, lower),
db.CreateFunction("regexp", 2, flags, regex),
db.CreateFunction("icu_load_collation", 2, sqlite3.DIRECTONLY,
func(ctx sqlite3.Context, arg ...sqlite3.Value) {
name := arg[1].Text()
if name == "" {
return
}
2023-07-11 13:38:29 +01:00
2024-07-08 12:06:57 +01:00
err := RegisterCollation(db, arg[0].Text(), name)
if err != nil {
ctx.ResultError(err)
2024-07-26 13:29:24 +01:00
return // notest
2024-07-08 12:06:57 +01:00
}
}))
2023-07-11 13:38:29 +01:00
}
2023-08-31 16:30:52 +01:00
// RegisterCollation registers a Unicode collation sequence for a database connection.
func RegisterCollation(db *sqlite3.Conn, locale, name string) error {
tag, err := language.Parse(locale)
2023-08-10 13:39:52 +01:00
if err != nil {
return err
}
return db.CreateCollation(name, collate.New(tag).Compare)
}
2023-07-11 13:38:29 +01:00
func upper(ctx sqlite3.Context, arg ...sqlite3.Value) {
if len(arg) == 1 {
2023-11-10 13:42:11 +00:00
ctx.ResultRawText(bytes.ToUpper(arg[0].RawText()))
2023-07-11 13:38:29 +01:00
return
}
cs, ok := ctx.GetAuxData(1).(cases.Caser)
if !ok {
2023-07-12 13:29:14 +01:00
t, err := language.Parse(arg[1].Text())
2023-07-11 13:38:29 +01:00
if err != nil {
ctx.ResultError(err)
2024-07-26 13:29:24 +01:00
return // notest
2023-07-11 13:38:29 +01:00
}
c := cases.Upper(t)
ctx.SetAuxData(1, c)
cs = c
}
2023-11-10 13:42:11 +00:00
ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
2023-07-11 13:38:29 +01:00
}
func lower(ctx sqlite3.Context, arg ...sqlite3.Value) {
if len(arg) == 1 {
2023-11-10 13:42:11 +00:00
ctx.ResultRawText(bytes.ToLower(arg[0].RawText()))
2023-07-11 13:38:29 +01:00
return
}
cs, ok := ctx.GetAuxData(1).(cases.Caser)
if !ok {
2023-07-12 13:29:14 +01:00
t, err := language.Parse(arg[1].Text())
2023-07-11 13:38:29 +01:00
if err != nil {
ctx.ResultError(err)
2024-07-26 13:29:24 +01:00
return // notest
2023-07-11 13:38:29 +01:00
}
c := cases.Lower(t)
ctx.SetAuxData(1, c)
cs = c
}
2023-11-10 13:42:11 +00:00
ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
2023-07-11 13:38:29 +01:00
}
func regex(ctx sqlite3.Context, arg ...sqlite3.Value) {
re, ok := ctx.GetAuxData(0).(*regexp.Regexp)
if !ok {
r, err := regexp.Compile(arg[0].Text())
if err != nil {
ctx.ResultError(err)
2024-07-26 13:29:24 +01:00
return // notest
2023-07-11 13:38:29 +01:00
}
re = r
2024-07-05 00:12:26 +01:00
ctx.SetAuxData(0, r)
2023-07-11 13:38:29 +01:00
}
2023-11-27 14:57:04 +00:00
ctx.ResultBool(re.Match(arg[1].RawText()))
2023-07-11 13:38:29 +01:00
}
func like(ctx sqlite3.Context, arg ...sqlite3.Value) {
escape := rune(-1)
if len(arg) == 3 {
var size int
2023-11-27 14:57:04 +00:00
b := arg[2].RawText()
2023-07-11 13:38:29 +01:00
escape, size = utf8.DecodeRune(b)
if size != len(b) {
ctx.ResultError(util.ErrorString("ESCAPE expression must be a single character"))
return
}
}
type likeData struct {
*regexp.Regexp
escape rune
}
re, ok := ctx.GetAuxData(0).(likeData)
if !ok || re.escape != escape {
2023-07-26 02:02:39 +01:00
re = likeData{
regexp.MustCompile(like2regex(arg[0].Text(), escape)),
escape,
2023-07-11 13:38:29 +01:00
}
ctx.SetAuxData(0, re)
}
2023-11-27 14:57:04 +00:00
ctx.ResultBool(re.Match(arg[1].RawText()))
2023-07-11 13:38:29 +01:00
}
func like2regex(pattern string, escape rune) string {
var re strings.Builder
start := 0
literal := false
2023-07-12 13:29:14 +01:00
re.Grow(len(pattern) + 10)
re.WriteString(`(?is)\A`) // case insensitive, . matches any character
2023-07-11 13:38:29 +01:00
for i, r := range pattern {
if start < 0 {
start = i
}
if literal {
literal = false
continue
}
var symbol string
switch r {
case '_':
symbol = `.`
case '%':
symbol = `.*`
case escape:
literal = true
default:
continue
}
re.WriteString(regexp.QuoteMeta(pattern[start:i]))
re.WriteString(symbol)
start = -1
}
if start >= 0 {
re.WriteString(regexp.QuoteMeta(pattern[start:]))
}
2023-07-12 13:29:14 +01:00
re.WriteString(`\z`)
2023-07-11 13:38:29 +01:00
return re.String()
}