1 Commits

Author SHA1 Message Date
Fabio Bozzo
d3e2ac07fc fix(selector): tokenize utf-8 support 2024-10-22 11:12:42 +02:00
3 changed files with 55 additions and 58 deletions

View File

@@ -38,13 +38,20 @@ func matchStatement(statement Statement, node ipld.Node) bool {
switch statement.Kind() {
case KindEqual:
if s, ok := statement.(equality); ok {
one, _, err := s.selector.Select(node)
one, many, err := s.selector.Select(node)
if err != nil {
return false
}
if one != nil {
return datamodel.DeepEqual(s.value, one)
}
if many != nil {
for _, n := range many {
if eq := datamodel.DeepEqual(s.value, n); eq {
return true
}
}
}
return false
}
@@ -121,41 +128,16 @@ func matchStatement(statement Statement, node ipld.Node) bool {
}
case KindAll:
if s, ok := statement.(quantifier); ok {
one, many, err := s.selector.Select(node)
if err != nil {
_, many, err := s.selector.Select(node)
if err != nil || many == nil {
return false
}
if one != nil {
it := one.ListIterator()
if it != nil {
for !it.Done() {
_, v, err := it.Next()
if err != nil {
return false
}
ok := matchStatement(s.statement, v)
if !ok {
return false
}
}
} else {
ok := matchStatement(s.statement, one)
if !ok {
return false
}
for _, n := range many {
ok := matchStatement(s.statement, n)
if !ok {
return false
}
}
if len(many) > 0 {
for _, n := range many {
ok := matchStatement(s.statement, n)
if !ok {
return false
}
}
}
return true
}
case KindAny:
@@ -164,29 +146,13 @@ func matchStatement(statement Statement, node ipld.Node) bool {
if err != nil {
return false
}
if one != nil {
it := one.ListIterator()
if it != nil {
for !it.Done() {
_, v, err := it.Next()
if err != nil {
return false
}
ok := matchStatement(s.statement, v)
if ok {
return true
}
}
} else {
ok := matchStatement(s.statement, one)
if ok {
return true
}
ok := matchStatement(s.statement, one)
if ok {
return true
}
}
if len(many) > 0 {
if many != nil {
for _, n := range many {
ok := matchStatement(s.statement, n)
if ok {

View File

@@ -5,6 +5,7 @@ import (
"regexp"
"strconv"
"strings"
"unicode/utf8"
)
var (
@@ -102,10 +103,10 @@ func tokenize(str string) []string {
ctx := ""
for col < len(str) {
char := string(str[col])
char, size := utf8.DecodeRuneInString(str[col:])
if char == "\"" && string(str[col-1]) != "\\" {
col++
if char == '"' && (col == 0 || str[col-1] != '\\') {
col += size
if ctx == "\"" {
ctx = ""
} else {
@@ -115,17 +116,17 @@ func tokenize(str string) []string {
}
if ctx == "\"" {
col++
col += size
continue
}
if char == "." || char == "[" {
if char == '.' || char == '[' {
if ofs < col {
toks = append(toks, str[ofs:col])
}
ofs = col
}
col++
col += size
}
if ofs < col && ctx != "\"" {

View File

@@ -0,0 +1,30 @@
package selector
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestTokenizeUTF8(t *testing.T) {
t.Run("simple UTF-8", func(t *testing.T) {
str := ".こんにちは[0]"
expected := []string{".", "こんにちは", "[0]"}
actual := tokenize(str)
require.Equal(t, expected, actual)
})
t.Run("UTF-8 with quotes", func(t *testing.T) {
str := ".こんにちは[\"привет\"]"
expected := []string{".", "こんにちは", "[\"привет\"]"}
actual := tokenize(str)
require.Equal(t, expected, actual)
})
t.Run("UTF-8 with escaped quotes", func(t *testing.T) {
str := ".こんにちは[\"привет \\\"мир\\\"\"]"
expected := []string{".", "こんにちは", "[\"привет \\\"мир\\\"\"]"}
actual := tokenize(str)
require.Equal(t, expected, actual)
})
}