diff --git a/base256emoji.go b/base256emoji.go new file mode 100644 index 0000000..3111615 --- /dev/null +++ b/base256emoji.go @@ -0,0 +1,94 @@ +package multibase + +import ( + "strconv" + "strings" + "unicode/utf8" +) + +var base256emojiTable = [256]rune{ + // Curated list, this is just a list of things that *somwhat* are related to our comunity + '๐Ÿš€', '๐Ÿช', 'โ˜„', '๐Ÿ›ฐ', '๐ŸŒŒ', // Space + '๐ŸŒ‘', '๐ŸŒ’', '๐ŸŒ“', '๐ŸŒ”', '๐ŸŒ•', '๐ŸŒ–', '๐ŸŒ—', '๐ŸŒ˜', // Moon + '๐ŸŒ', '๐ŸŒ', '๐ŸŒŽ', // Our Home, for now (earth) + 'โ˜‰', 'โ˜€', // Our Garden, for now (sol) + '๐Ÿ’ป', '๐Ÿ–ฅ', '๐Ÿ’พ', '๐Ÿ’ฟ', // Computer + // The rest is completed from https://home.unicode.org/emoji/emoji-frequency/ at the time of creation (december 2021) (the data is from 2019), most used first until we reach 256. + // We exclude modifier based emojies (such as flags) as they are bigger than one single codepoint. + // Some other emojies were removed adhoc for various reasons. + '๐Ÿ˜‚', 'โค', '๐Ÿ˜', '๐Ÿคฃ', '๐Ÿ˜Š', '๐Ÿ™', '๐Ÿ’•', '๐Ÿ˜ญ', '๐Ÿ˜˜', '๐Ÿ‘', + '๐Ÿ˜…', '๐Ÿ‘', '๐Ÿ˜', '๐Ÿ”ฅ', '๐Ÿฅฐ', '๐Ÿ’”', '๐Ÿ’–', '๐Ÿ’™', '๐Ÿ˜ข', '๐Ÿค”', + '๐Ÿ˜†', '๐Ÿ™„', '๐Ÿ’ช', '๐Ÿ˜‰', 'โ˜บ', '๐Ÿ‘Œ', '๐Ÿค—', '๐Ÿ’œ', '๐Ÿ˜”', '๐Ÿ˜Ž', + '๐Ÿ˜‡', '๐ŸŒน', '๐Ÿคฆ', '๐ŸŽ‰', '๐Ÿ’ž', 'โœŒ', 'โœจ', '๐Ÿคท', '๐Ÿ˜ฑ', '๐Ÿ˜Œ', + '๐ŸŒธ', '๐Ÿ™Œ', '๐Ÿ˜‹', '๐Ÿ’—', '๐Ÿ’š', '๐Ÿ˜', '๐Ÿ’›', '๐Ÿ™‚', '๐Ÿ’“', '๐Ÿคฉ', + '๐Ÿ˜„', '๐Ÿ˜€', '๐Ÿ–ค', '๐Ÿ˜ƒ', '๐Ÿ’ฏ', '๐Ÿ™ˆ', '๐Ÿ‘‡', '๐ŸŽถ', '๐Ÿ˜’', '๐Ÿคญ', + 'โฃ', '๐Ÿ˜œ', '๐Ÿ’‹', '๐Ÿ‘€', '๐Ÿ˜ช', '๐Ÿ˜‘', '๐Ÿ’ฅ', '๐Ÿ™‹', '๐Ÿ˜ž', '๐Ÿ˜ฉ', + '๐Ÿ˜ก', '๐Ÿคช', '๐Ÿ‘Š', '๐Ÿฅณ', '๐Ÿ˜ฅ', '๐Ÿคค', '๐Ÿ‘‰', '๐Ÿ’ƒ', '๐Ÿ˜ณ', 'โœ‹', + '๐Ÿ˜š', '๐Ÿ˜', '๐Ÿ˜ด', '๐ŸŒŸ', '๐Ÿ˜ฌ', '๐Ÿ™ƒ', '๐Ÿ€', '๐ŸŒท', '๐Ÿ˜ป', '๐Ÿ˜“', + 'โญ', 'โœ…', '๐Ÿฅบ', '๐ŸŒˆ', '๐Ÿ˜ˆ', '๐Ÿค˜', '๐Ÿ’ฆ', 'โœ”', '๐Ÿ˜ฃ', '๐Ÿƒ', + '๐Ÿ’', 'โ˜น', '๐ŸŽŠ', '๐Ÿ’˜', '๐Ÿ˜ ', 'โ˜', '๐Ÿ˜•', '๐ŸŒบ', '๐ŸŽ‚', '๐ŸŒป', + '๐Ÿ˜', '๐Ÿ–•', '๐Ÿ’', '๐Ÿ™Š', '๐Ÿ˜น', '๐Ÿ—ฃ', '๐Ÿ’ซ', '๐Ÿ’€', '๐Ÿ‘‘', '๐ŸŽต', + '๐Ÿคž', '๐Ÿ˜›', '๐Ÿ”ด', '๐Ÿ˜ค', '๐ŸŒผ', '๐Ÿ˜ซ', 'โšฝ', '๐Ÿค™', 'โ˜•', '๐Ÿ†', + '๐Ÿคซ', '๐Ÿ‘ˆ', '๐Ÿ˜ฎ', '๐Ÿ™†', '๐Ÿป', '๐Ÿƒ', '๐Ÿถ', '๐Ÿ’', '๐Ÿ˜ฒ', '๐ŸŒฟ', + '๐Ÿงก', '๐ŸŽ', 'โšก', '๐ŸŒž', '๐ŸŽˆ', 'โŒ', 'โœŠ', '๐Ÿ‘‹', '๐Ÿ˜ฐ', '๐Ÿคจ', + '๐Ÿ˜ถ', '๐Ÿค', '๐Ÿšถ', '๐Ÿ’ฐ', '๐Ÿ“', '๐Ÿ’ข', '๐ŸคŸ', '๐Ÿ™', '๐Ÿšจ', '๐Ÿ’จ', + '๐Ÿคฌ', 'โœˆ', '๐ŸŽ€', '๐Ÿบ', '๐Ÿค“', '๐Ÿ˜™', '๐Ÿ’Ÿ', '๐ŸŒฑ', '๐Ÿ˜–', '๐Ÿ‘ถ', + '๐Ÿฅด', 'โ–ถ', 'โžก', 'โ“', '๐Ÿ’Ž', '๐Ÿ’ธ', 'โฌ‡', '๐Ÿ˜จ', '๐ŸŒš', '๐Ÿฆ‹', + '๐Ÿ˜ท', '๐Ÿ•บ', 'โš ', '๐Ÿ™…', '๐Ÿ˜Ÿ', '๐Ÿ˜ต', '๐Ÿ‘Ž', '๐Ÿคฒ', '๐Ÿค ', '๐Ÿคง', + '๐Ÿ“Œ', '๐Ÿ”ต', '๐Ÿ’…', '๐Ÿง', '๐Ÿพ', '๐Ÿ’', '๐Ÿ˜—', '๐Ÿค‘', '๐ŸŒŠ', '๐Ÿคฏ', + '๐Ÿท', 'โ˜Ž', '๐Ÿ’ง', '๐Ÿ˜ฏ', '๐Ÿ’†', '๐Ÿ‘†', '๐ŸŽค', '๐Ÿ™‡', '๐Ÿ‘', 'โ„', + '๐ŸŒด', '๐Ÿ’ฃ', '๐Ÿธ', '๐Ÿ’Œ', '๐Ÿ“', '๐Ÿฅ€', '๐Ÿคข', '๐Ÿ‘…', '๐Ÿ’ก', '๐Ÿ’ฉ', + '๐Ÿ‘', '๐Ÿ“ธ', '๐Ÿ‘ป', '๐Ÿค', '๐Ÿคฎ', '๐ŸŽผ', '๐Ÿฅต', '๐Ÿšฉ', '๐ŸŽ', '๐ŸŠ', + '๐Ÿ‘ผ', '๐Ÿ’', '๐Ÿ“ฃ', '๐Ÿฅ‚', +} + +var base256emojiReverseTable map[rune]byte + +func init() { + base256emojiReverseTable = make(map[rune]byte, len(base256emojiTable)) + for i, v := range base256emojiTable { + base256emojiReverseTable[v] = byte(i) + } +} + +func base256emojiEncode(in []byte) string { + var l int + for _, v := range in { + l += utf8.RuneLen(base256emojiTable[v]) + } + var out strings.Builder + out.Grow(l) + for _, v := range in { + out.WriteRune(base256emojiTable[v]) + } + return out.String() +} + +type base256emojiCorruptInputError struct { + index int + char rune +} + +func (e base256emojiCorruptInputError) Error() string { + return "illegal base256emoji data at input byte " + strconv.FormatInt(int64(e.index), 10) + ", char: '" + string(e.char) + "'" +} + +func (e base256emojiCorruptInputError) String() string { + return e.Error() +} + +func base256emojiDecode(in string) ([]byte, error) { + out := make([]byte, utf8.RuneCountInString(in)) + var stri int + for i := 0; len(in) > 0; i++ { + r, n := utf8.DecodeRuneInString(in) + in = in[n:] + var ok bool + out[i], ok = base256emojiReverseTable[r] + if !ok { + return nil, base256emojiCorruptInputError{stri, r} + } + stri += n + } + return out, nil +} diff --git a/base256emoji_test.go b/base256emoji_test.go new file mode 100644 index 0000000..95177a3 --- /dev/null +++ b/base256emoji_test.go @@ -0,0 +1,26 @@ +package multibase + +import "testing" + +func TestBase256EmojiAlphabet(t *testing.T) { + var c uint + for _, v := range base256emojiTable { + if v != rune(0) { + c++ + } + } + if c != 256 { + t.Errorf("Base256Emoji count is wrong, expected 256, got %d.", c) + } +} + +func TestBase256EmojiUniq(t *testing.T) { + m := make(map[rune]struct{}, len(base256emojiTable)) + for i, v := range base256emojiTable { + _, ok := m[v] + if ok { + t.Errorf("Base256Emoji duplicate %s at index %d.", string(v), i) + } + m[v] = struct{}{} + } +} diff --git a/encoder.go b/encoder.go index dd4fd44..09664a3 100644 --- a/encoder.go +++ b/encoder.go @@ -2,6 +2,7 @@ package multibase import ( "fmt" + "unicode/utf8" ) // Encoder is a multibase encoding that is verified to be supported and @@ -36,8 +37,9 @@ func EncoderByName(str string) (Encoder, error) { var ok bool if len(str) == 0 { return Encoder{-1}, fmt.Errorf("empty multibase encoding") - } else if len(str) == 1 { - base = Encoding(str[0]) + } else if utf8.RuneCountInString(str) == 1 { + r, _ := utf8.DecodeRuneInString(str) + base = Encoding(r) _, ok = EncodingToStr[base] } else { base, ok = Encodings[str] diff --git a/encoder_test.go b/encoder_test.go index 3db2c13..7f50647 100644 --- a/encoder_test.go +++ b/encoder_test.go @@ -2,6 +2,7 @@ package multibase import ( "testing" + "unicode/utf8" ) func TestInvalidCode(t *testing.T) { @@ -43,9 +44,10 @@ func TestEncoder(t *testing.T) { } // Test that an encoder can be created from the single letter // prefix - _, err = EncoderByName(str[0:1]) + r, _ := utf8.DecodeRuneInString(str) + _, err = EncoderByName(string(r)) if err != nil { - t.Fatalf("EncoderByName(%s) failed: %v", str[0:1], err) + t.Fatalf("EncoderByName(%s) failed: %v", string(r), err) } } } diff --git a/multibase.go b/multibase.go index 92975c2..8122a0a 100644 --- a/multibase.go +++ b/multibase.go @@ -4,6 +4,7 @@ import ( "encoding/base64" "encoding/hex" "fmt" + "unicode/utf8" b58 "github.com/mr-tron/base58/base58" b32 "github.com/multiformats/go-base32" @@ -38,31 +39,33 @@ const ( Base64url = 'u' Base64pad = 'M' Base64urlPad = 'U' + Base256Emoji = '๐Ÿš€' ) // EncodingToStr is a map of the supported encoding, unsupported encoding // specified in standard are left out var EncodingToStr = map[Encoding]string{ - 0x00: "identity", - '0': "base2", - 'f': "base16", - 'F': "base16upper", - 'b': "base32", - 'B': "base32upper", - 'c': "base32pad", - 'C': "base32padupper", - 'v': "base32hex", - 'V': "base32hexupper", - 't': "base32hexpad", - 'T': "base32hexpadupper", - 'k': "base36", - 'K': "base36upper", - 'z': "base58btc", - 'Z': "base58flickr", - 'm': "base64", - 'u': "base64url", - 'M': "base64pad", - 'U': "base64urlpad", + 0x00: "identity", + '0': "base2", + 'f': "base16", + 'F': "base16upper", + 'b': "base32", + 'B': "base32upper", + 'c': "base32pad", + 'C': "base32padupper", + 'v': "base32hex", + 'V': "base32hexupper", + 't': "base32hexpad", + 'T': "base32hexpadupper", + 'k': "base36", + 'K': "base36upper", + 'z': "base58btc", + 'Z': "base58flickr", + 'm': "base64", + 'u': "base64url", + 'M': "base64pad", + 'U': "base64urlpad", + Base256Emoji: "base256emoji", } var Encodings = map[string]Encoding{} @@ -123,6 +126,8 @@ func Encode(base Encoding, data []byte) (string, error) { return string(Base64url) + base64.RawURLEncoding.EncodeToString(data), nil case Base64: return string(Base64) + base64.RawStdEncoding.EncodeToString(data), nil + case Base256Emoji: + return string(Base256Emoji) + base256emojiEncode(data), nil default: return "", ErrUnsupportedEncoding } @@ -135,7 +140,8 @@ func Decode(data string) (Encoding, []byte, error) { return 0, nil, fmt.Errorf("cannot decode multibase for zero length string") } - enc := Encoding(data[0]) + r, _ := utf8.DecodeRuneInString(data) + enc := Encoding(r) switch enc { case Identity: @@ -179,6 +185,9 @@ func Decode(data string) (Encoding, []byte, error) { case Base64url: bytes, err := base64.RawURLEncoding.DecodeString(data[1:]) return Base64url, bytes, err + case Base256Emoji: + bytes, err := base256emojiDecode(data[4:]) + return Base256Emoji, bytes, err default: return -1, nil, ErrUnsupportedEncoding } diff --git a/multibase_test.go b/multibase_test.go index 4f938d5..2e5acf2 100644 --- a/multibase_test.go +++ b/multibase_test.go @@ -44,6 +44,7 @@ var encodedSamples = map[Encoding]string{ Base64url: "uRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE", Base64pad: "MRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE=", Base64urlPad: "URGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE=", + Base256Emoji: "๐Ÿš€๐Ÿ’›โœ‹๐Ÿ’ƒโœ‹๐Ÿ˜ป๐Ÿ˜ˆ๐Ÿฅบ๐Ÿคค๐Ÿ€๐ŸŒŸ๐Ÿ’โœ‹๐Ÿ˜…โœ‹๐Ÿ’ฆโœ‹๐Ÿฅบ๐Ÿƒ๐Ÿ˜ˆ๐Ÿ˜ด๐ŸŒŸ๐Ÿ˜ป๐Ÿ˜๐Ÿ‘๐Ÿ‘๐Ÿ‘", } func testEncode(t *testing.T, encoding Encoding, bytes []byte, expected string) { diff --git a/spec b/spec index a4b4a4e..cffd1aa 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit a4b4a4e5e43a1ef4c052dc1c7a15354662b46ca1 +Subproject commit cffd1aa30832875d568495a7cad77f512f50b9e7