From a25bfbaf45d61bc18d397b235ae66797d72d960b Mon Sep 17 00:00:00 2001 From: Fabio Bozzo Date: Fri, 29 Nov 2024 19:32:31 +0100 Subject: [PATCH 1/4] fix: extended field names --- pkg/policy/selector/parsing.go | 16 ++++++++++- pkg/policy/selector/parsing_test.go | 44 +++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/pkg/policy/selector/parsing.go b/pkg/policy/selector/parsing.go index 05ab092..fd42897 100644 --- a/pkg/policy/selector/parsing.go +++ b/pkg/policy/selector/parsing.go @@ -11,7 +11,21 @@ import ( var ( indexRegex = regexp.MustCompile(`^-?\d+$`) sliceRegex = regexp.MustCompile(`^((\-?\d+:\-?\d*)|(\-?\d*:\-?\d+))$`) - fieldRegex = regexp.MustCompile(`^\.[a-zA-Z_-]*?$`) + + // According to ECMAScript 2024, identifiers can include: + // - Unicode letters + // - $, _ + // - Unicode combining marks + // - Unicode digits + // - Unicode connector punctuation + // Additional characters allowed for compatibility: + // - hyphen (-) + // \p{L} - any kind of letter from any language + // \p{Mn}\p{Mc} - combining marks and spacing combining marks + // \p{Nd} - decimal numbers + // \p{Pc} - connector punctuation (like underscore) + // \p{Sm}\p{So} - math symbols and other symbols + fieldRegex = regexp.MustCompile(`^\.[a-zA-Z_\p{L}][a-zA-Z$_\p{L}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Sm}\p{So}-]*?$`) ) func Parse(str string) (Selector, error) { diff --git a/pkg/policy/selector/parsing_test.go b/pkg/policy/selector/parsing_test.go index b84ad52..01d0b88 100644 --- a/pkg/policy/selector/parsing_test.go +++ b/pkg/policy/selector/parsing_test.go @@ -572,4 +572,48 @@ func TestParse(t *testing.T) { _, err := Parse(".[foo]") require.Error(t, err) }) + + t.Run("extended field names", func(t *testing.T) { + validFields := []string{ + ".basic", + ".user_name", + ".user-name", + ".userName$special", + ".αβγ", // Greek letters + ".użytkownik", // Polish characters + ".用户", // Chinese characters + ".사용자", // Korean characters + "._private", + ".number123", + ".camelCase", + ".snake_case", + ".kebab-case", + ".mixed_kebab-case", + ".with$dollar", + ".MIXED_Case_123", + ".unicode⌘", + } + + for _, field := range validFields { + sel, err := Parse(field) + require.NoError(t, err, "field: %s", field) + require.NotNil(t, sel) + } + + invalidFields := []string{ + ".123number", // Can't start with digit + ".@special", // @ not allowed + ".space name", // No spaces + ".#hashtag", // No # + ".name!", // No ! + ".{brackets}", // No brackets + ".name/with/slashes", // No slashes + } + + for _, field := range invalidFields { + sel, err := Parse(field) + require.Error(t, err, "field: %s", field) + require.Nil(t, sel) + } + }) } From 117a75e2c4e28562ddd22943d77fdc6896214a54 Mon Sep 17 00:00:00 2001 From: Fabio Bozzo Date: Fri, 29 Nov 2024 19:36:00 +0100 Subject: [PATCH 2/4] cleanup comments --- pkg/policy/selector/parsing.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/policy/selector/parsing.go b/pkg/policy/selector/parsing.go index fd42897..251d8f3 100644 --- a/pkg/policy/selector/parsing.go +++ b/pkg/policy/selector/parsing.go @@ -12,13 +12,11 @@ var ( indexRegex = regexp.MustCompile(`^-?\d+$`) sliceRegex = regexp.MustCompile(`^((\-?\d+:\-?\d*)|(\-?\d*:\-?\d+))$`) - // According to ECMAScript 2024, identifiers can include: // - Unicode letters - // - $, _ // - Unicode combining marks // - Unicode digits // - Unicode connector punctuation - // Additional characters allowed for compatibility: + // - $, _ // - hyphen (-) // \p{L} - any kind of letter from any language // \p{Mn}\p{Mc} - combining marks and spacing combining marks From 15751c7362988dcfa686a4f0bbdf555a436d69bf Mon Sep 17 00:00:00 2001 From: Fabio Bozzo Date: Mon, 2 Dec 2024 18:30:41 +0100 Subject: [PATCH 3/4] regex to be more restrictive and consistent --- pkg/policy/selector/parsing.go | 3 +-- pkg/policy/selector/parsing_test.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/policy/selector/parsing.go b/pkg/policy/selector/parsing.go index 251d8f3..2e758af 100644 --- a/pkg/policy/selector/parsing.go +++ b/pkg/policy/selector/parsing.go @@ -22,8 +22,7 @@ var ( // \p{Mn}\p{Mc} - combining marks and spacing combining marks // \p{Nd} - decimal numbers // \p{Pc} - connector punctuation (like underscore) - // \p{Sm}\p{So} - math symbols and other symbols - fieldRegex = regexp.MustCompile(`^\.[a-zA-Z_\p{L}][a-zA-Z$_\p{L}\p{Mn}\p{Mc}\p{Nd}\p{Pc}\p{Sm}\p{So}-]*?$`) + fieldRegex = regexp.MustCompile(`^\.[a-zA-Z_\p{L}][a-zA-Z$_\p{L}\p{Mn}\p{Mc}\p{Nd}\p{Pc}-]*?$`) ) func Parse(str string) (Selector, error) { diff --git a/pkg/policy/selector/parsing_test.go b/pkg/policy/selector/parsing_test.go index 01d0b88..b7ff22d 100644 --- a/pkg/policy/selector/parsing_test.go +++ b/pkg/policy/selector/parsing_test.go @@ -591,7 +591,7 @@ func TestParse(t *testing.T) { ".mixed_kebab-case", ".with$dollar", ".MIXED_Case_123", - ".unicode⌘", + ".unicodeø", } for _, field := range validFields { From 3688ccea019316b939421518b8033e84b3016d07 Mon Sep 17 00:00:00 2001 From: Fabio Bozzo Date: Mon, 2 Dec 2024 19:18:01 +0100 Subject: [PATCH 4/4] fieldRegex to be more restrictive and consistent --- pkg/policy/selector/parsing.go | 21 ++++++++++----------- pkg/policy/selector/parsing_test.go | 1 + 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/policy/selector/parsing.go b/pkg/policy/selector/parsing.go index e6deab8..13be0a1 100644 --- a/pkg/policy/selector/parsing.go +++ b/pkg/policy/selector/parsing.go @@ -14,17 +14,16 @@ var ( indexRegex = regexp.MustCompile(`^-?\d+$`) sliceRegex = regexp.MustCompile(`^((\-?\d+:\-?\d*)|(\-?\d*:\-?\d+))$`) - // - Unicode letters - // - Unicode combining marks - // - Unicode digits - // - Unicode connector punctuation - // - $, _ - // - hyphen (-) - // \p{L} - any kind of letter from any language - // \p{Mn}\p{Mc} - combining marks and spacing combining marks - // \p{Nd} - decimal numbers - // \p{Pc} - connector punctuation (like underscore) - fieldRegex = regexp.MustCompile(`^\.[a-zA-Z_\p{L}][a-zA-Z$_\p{L}\p{Mn}\p{Mc}\p{Nd}\p{Pc}-]*?$`) + // Field name requirements: + // - Must start with ASCII letter, Unicode letter, or underscore + // - Can contain: + // - ASCII letters (a-z, A-Z) + // - ASCII digits (0-9) + // - Unicode letters (\p{L}) + // - Dollar sign ($) + // - Underscore (_) + // - Hyphen (-) + fieldRegex = regexp.MustCompile(`^\.[a-zA-Z_\p{L}][a-zA-Z0-9$_\p{L}\-]*$`) ) func Parse(str string) (Selector, error) { diff --git a/pkg/policy/selector/parsing_test.go b/pkg/policy/selector/parsing_test.go index e58e9b2..6c8caf4 100644 --- a/pkg/policy/selector/parsing_test.go +++ b/pkg/policy/selector/parsing_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/stretchr/testify/require" + "github.com/ucan-wg/go-ucan/pkg/policy/limits" )