Unaccent, title case.

This commit is contained in:
Nuno Cruces
2024-09-29 20:01:14 +01:00
parent 5b0a063bfe
commit 505c6640c2
2 changed files with 43 additions and 0 deletions

View File

@@ -5,6 +5,10 @@
// - LIKE and REGEXP operators,
// - collation sequences.
//
// It also provides, from PostgreSQL:
// - unaccent(),
// - initcap().
//
// The implementation is not 100% compatible with the [ICU extension]:
// - upper() and lower() use [strings.ToUpper], [strings.ToLower] and [cases];
// - the LIKE operator follows [strings.EqualFold] rules;
@@ -21,6 +25,7 @@ import (
"errors"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/ncruces/go-sqlite3"
@@ -28,6 +33,9 @@ import (
"golang.org/x/text/cases"
"golang.org/x/text/collate"
"golang.org/x/text/language"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// Register registers Unicode aware functions for a database connection.
@@ -41,6 +49,9 @@ func Register(db *sqlite3.Conn) error {
db.CreateFunction("lower", 1, flags, lower),
db.CreateFunction("lower", 2, flags, lower),
db.CreateFunction("regexp", 2, flags, regex),
db.CreateFunction("initcap", 1, flags, initcap),
db.CreateFunction("initcap", 2, flags, initcap),
db.CreateFunction("unaccent", 1, flags, unaccent),
db.CreateFunction("icu_load_collation", 2, sqlite3.DIRECTONLY,
func(ctx sqlite3.Context, arg ...sqlite3.Value) {
name := arg[1].Text()
@@ -112,6 +123,35 @@ func lower(ctx sqlite3.Context, arg ...sqlite3.Value) {
ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
}
func initcap(ctx sqlite3.Context, arg ...sqlite3.Value) {
if len(arg) == 1 {
ctx.ResultRawText(bytes.Title(arg[0].RawText()))
return
}
cs, ok := ctx.GetAuxData(1).(cases.Caser)
if !ok {
t, err := language.Parse(arg[1].Text())
if err != nil {
ctx.ResultError(err)
return // notest
}
c := cases.Title(t)
ctx.SetAuxData(1, c)
cs = c
}
ctx.ResultRawText(cs.Bytes(arg[0].RawText()))
}
func unaccent(ctx sqlite3.Context, arg ...sqlite3.Value) {
unaccent := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
res, _, err := transform.Bytes(unaccent, arg[0].RawText())
if err != nil {
ctx.ResultError(err)
} else {
ctx.ResultRawText(res)
}
}
func regex(ctx sqlite3.Context, arg ...sqlite3.Value) {
re, ok := ctx.GetAuxData(0).(*regexp.Regexp)
if !ok {

View File

@@ -47,6 +47,9 @@ func TestRegister(t *testing.T) {
{`upper('istanbul', 'tr-TR')`, "İSTANBUL"},
{`lower('Dünyanın İlk Borsası', 'tr-TR')`, "dünyanın ilk borsası"},
{`upper('Dünyanın İlk Borsası', 'tr-TR')`, "DÜNYANIN İLK BORSASI"},
{`initcap('Kad je hladno Marko nosi džemper')`, "Kad Je Hladno Marko Nosi Džemper"},
{`initcap('Kad je hladno Marko nosi džemper', 'hr-HR')`, "Kad Je Hladno Marko Nosi Džemper"},
{`unaccent('Hôtel')`, "Hotel"},
{`'Hello' REGEXP 'ell'`, "1"},
{`'Hello' REGEXP 'el.'`, "1"},
{`'Hello' LIKE 'hel_'`, "0"},