feat: add UTF-8 support and base256emoji
This include fixes for UTF-8 as well as base256emoji encoding (an encoding which actually use UTF-8).
This commit is contained in:
94
base256emoji.go
Normal file
94
base256emoji.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package multibase
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var base256emojiTable = [256]rune{
|
||||
// Curated list, this is just a list of things that *somwhat* are related to our comunity
|
||||
'🚀', '🪐', '☄', '🛰', '🌌', // Space
|
||||
'🌑', '🌒', '🌓', '🌔', '🌕', '🌖', '🌗', '🌘', // Moon
|
||||
'🌍', '🌏', '🌎', // Our Home, for now (earth)
|
||||
'☉', '☀', // Our Garden, for now (sol)
|
||||
'💻', '🖥', '💾', '💿', // Computer
|
||||
// The rest is completed from https://home.unicode.org/emoji/emoji-frequency/ at the time of creation (december 2021) (the data is from 2019), most used first until we reach 256.
|
||||
// We exclude modifier based emojies (such as flags) as they are bigger than one single codepoint.
|
||||
// Some other emojies were removed adhoc for various reasons.
|
||||
'😂', '❤', '😍', '🤣', '😊', '🙏', '💕', '😭', '😘', '👍',
|
||||
'😅', '👏', '😁', '🔥', '🥰', '💔', '💖', '💙', '😢', '🤔',
|
||||
'😆', '🙄', '💪', '😉', '☺', '👌', '🤗', '💜', '😔', '😎',
|
||||
'😇', '🌹', '🤦', '🎉', '💞', '✌', '✨', '🤷', '😱', '😌',
|
||||
'🌸', '🙌', '😋', '💗', '💚', '😏', '💛', '🙂', '💓', '🤩',
|
||||
'😄', '😀', '🖤', '😃', '💯', '🙈', '👇', '🎶', '😒', '🤭',
|
||||
'❣', '😜', '💋', '👀', '😪', '😑', '💥', '🙋', '😞', '😩',
|
||||
'😡', '🤪', '👊', '🥳', '😥', '🤤', '👉', '💃', '😳', '✋',
|
||||
'😚', '😝', '😴', '🌟', '😬', '🙃', '🍀', '🌷', '😻', '😓',
|
||||
'⭐', '✅', '🥺', '🌈', '😈', '🤘', '💦', '✔', '😣', '🏃',
|
||||
'💐', '☹', '🎊', '💘', '😠', '☝', '😕', '🌺', '🎂', '🌻',
|
||||
'😐', '🖕', '💝', '🙊', '😹', '🗣', '💫', '💀', '👑', '🎵',
|
||||
'🤞', '😛', '🔴', '😤', '🌼', '😫', '⚽', '🤙', '☕', '🏆',
|
||||
'🤫', '👈', '😮', '🙆', '🍻', '🍃', '🐶', '💁', '😲', '🌿',
|
||||
'🧡', '🎁', '⚡', '🌞', '🎈', '❌', '✊', '👋', '😰', '🤨',
|
||||
'😶', '🤝', '🚶', '💰', '🍓', '💢', '🤟', '🙁', '🚨', '💨',
|
||||
'🤬', '✈', '🎀', '🍺', '🤓', '😙', '💟', '🌱', '😖', '👶',
|
||||
'🥴', '▶', '➡', '❓', '💎', '💸', '⬇', '😨', '🌚', '🦋',
|
||||
'😷', '🕺', '⚠', '🙅', '😟', '😵', '👎', '🤲', '🤠', '🤧',
|
||||
'📌', '🔵', '💅', '🧐', '🐾', '🍒', '😗', '🤑', '🌊', '🤯',
|
||||
'🐷', '☎', '💧', '😯', '💆', '👆', '🎤', '🙇', '🍑', '❄',
|
||||
'🌴', '💣', '🐸', '💌', '📍', '🥀', '🤢', '👅', '💡', '💩',
|
||||
'👐', '📸', '👻', '🤐', '🤮', '🎼', '🥵', '🚩', '🍎', '🍊',
|
||||
'👼', '💍', '📣', '🥂',
|
||||
}
|
||||
|
||||
var base256emojiReverseTable map[rune]byte
|
||||
|
||||
func init() {
|
||||
base256emojiReverseTable = make(map[rune]byte, len(base256emojiTable))
|
||||
for i, v := range base256emojiTable {
|
||||
base256emojiReverseTable[v] = byte(i)
|
||||
}
|
||||
}
|
||||
|
||||
func base256emojiEncode(in []byte) string {
|
||||
var l int
|
||||
for _, v := range in {
|
||||
l += utf8.RuneLen(base256emojiTable[v])
|
||||
}
|
||||
var out strings.Builder
|
||||
out.Grow(l)
|
||||
for _, v := range in {
|
||||
out.WriteRune(base256emojiTable[v])
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
type base256emojiCorruptInputError struct {
|
||||
index int
|
||||
char rune
|
||||
}
|
||||
|
||||
func (e base256emojiCorruptInputError) Error() string {
|
||||
return "illegal base256emoji data at input byte " + strconv.FormatInt(int64(e.index), 10) + ", char: '" + string(e.char) + "'"
|
||||
}
|
||||
|
||||
func (e base256emojiCorruptInputError) String() string {
|
||||
return e.Error()
|
||||
}
|
||||
|
||||
func base256emojiDecode(in string) ([]byte, error) {
|
||||
out := make([]byte, utf8.RuneCountInString(in))
|
||||
var stri int
|
||||
for i := 0; len(in) > 0; i++ {
|
||||
r, n := utf8.DecodeRuneInString(in)
|
||||
in = in[n:]
|
||||
var ok bool
|
||||
out[i], ok = base256emojiReverseTable[r]
|
||||
if !ok {
|
||||
return nil, base256emojiCorruptInputError{stri, r}
|
||||
}
|
||||
stri += n
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
26
base256emoji_test.go
Normal file
26
base256emoji_test.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package multibase
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestBase256EmojiAlphabet(t *testing.T) {
|
||||
var c uint
|
||||
for _, v := range base256emojiTable {
|
||||
if v != rune(0) {
|
||||
c++
|
||||
}
|
||||
}
|
||||
if c != 256 {
|
||||
t.Errorf("Base256Emoji count is wrong, expected 256, got %d.", c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBase256EmojiUniq(t *testing.T) {
|
||||
m := make(map[rune]struct{}, len(base256emojiTable))
|
||||
for i, v := range base256emojiTable {
|
||||
_, ok := m[v]
|
||||
if ok {
|
||||
t.Errorf("Base256Emoji duplicate %s at index %d.", string(v), i)
|
||||
}
|
||||
m[v] = struct{}{}
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package multibase
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Encoder is a multibase encoding that is verified to be supported and
|
||||
@@ -36,8 +37,9 @@ func EncoderByName(str string) (Encoder, error) {
|
||||
var ok bool
|
||||
if len(str) == 0 {
|
||||
return Encoder{-1}, fmt.Errorf("empty multibase encoding")
|
||||
} else if len(str) == 1 {
|
||||
base = Encoding(str[0])
|
||||
} else if utf8.RuneCountInString(str) == 1 {
|
||||
r, _ := utf8.DecodeRuneInString(str)
|
||||
base = Encoding(r)
|
||||
_, ok = EncodingToStr[base]
|
||||
} else {
|
||||
base, ok = Encodings[str]
|
||||
|
||||
@@ -2,6 +2,7 @@ package multibase
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestInvalidCode(t *testing.T) {
|
||||
@@ -43,9 +44,10 @@ func TestEncoder(t *testing.T) {
|
||||
}
|
||||
// Test that an encoder can be created from the single letter
|
||||
// prefix
|
||||
_, err = EncoderByName(str[0:1])
|
||||
r, _ := utf8.DecodeRuneInString(str)
|
||||
_, err = EncoderByName(string(r))
|
||||
if err != nil {
|
||||
t.Fatalf("EncoderByName(%s) failed: %v", str[0:1], err)
|
||||
t.Fatalf("EncoderByName(%s) failed: %v", string(r), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
51
multibase.go
51
multibase.go
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
|
||||
b58 "github.com/mr-tron/base58/base58"
|
||||
b32 "github.com/multiformats/go-base32"
|
||||
@@ -38,31 +39,33 @@ const (
|
||||
Base64url = 'u'
|
||||
Base64pad = 'M'
|
||||
Base64urlPad = 'U'
|
||||
Base256Emoji = '🚀'
|
||||
)
|
||||
|
||||
// EncodingToStr is a map of the supported encoding, unsupported encoding
|
||||
// specified in standard are left out
|
||||
var EncodingToStr = map[Encoding]string{
|
||||
0x00: "identity",
|
||||
'0': "base2",
|
||||
'f': "base16",
|
||||
'F': "base16upper",
|
||||
'b': "base32",
|
||||
'B': "base32upper",
|
||||
'c': "base32pad",
|
||||
'C': "base32padupper",
|
||||
'v': "base32hex",
|
||||
'V': "base32hexupper",
|
||||
't': "base32hexpad",
|
||||
'T': "base32hexpadupper",
|
||||
'k': "base36",
|
||||
'K': "base36upper",
|
||||
'z': "base58btc",
|
||||
'Z': "base58flickr",
|
||||
'm': "base64",
|
||||
'u': "base64url",
|
||||
'M': "base64pad",
|
||||
'U': "base64urlpad",
|
||||
0x00: "identity",
|
||||
'0': "base2",
|
||||
'f': "base16",
|
||||
'F': "base16upper",
|
||||
'b': "base32",
|
||||
'B': "base32upper",
|
||||
'c': "base32pad",
|
||||
'C': "base32padupper",
|
||||
'v': "base32hex",
|
||||
'V': "base32hexupper",
|
||||
't': "base32hexpad",
|
||||
'T': "base32hexpadupper",
|
||||
'k': "base36",
|
||||
'K': "base36upper",
|
||||
'z': "base58btc",
|
||||
'Z': "base58flickr",
|
||||
'm': "base64",
|
||||
'u': "base64url",
|
||||
'M': "base64pad",
|
||||
'U': "base64urlpad",
|
||||
Base256Emoji: "base256emoji",
|
||||
}
|
||||
|
||||
var Encodings = map[string]Encoding{}
|
||||
@@ -123,6 +126,8 @@ func Encode(base Encoding, data []byte) (string, error) {
|
||||
return string(Base64url) + base64.RawURLEncoding.EncodeToString(data), nil
|
||||
case Base64:
|
||||
return string(Base64) + base64.RawStdEncoding.EncodeToString(data), nil
|
||||
case Base256Emoji:
|
||||
return string(Base256Emoji) + base256emojiEncode(data), nil
|
||||
default:
|
||||
return "", ErrUnsupportedEncoding
|
||||
}
|
||||
@@ -135,7 +140,8 @@ func Decode(data string) (Encoding, []byte, error) {
|
||||
return 0, nil, fmt.Errorf("cannot decode multibase for zero length string")
|
||||
}
|
||||
|
||||
enc := Encoding(data[0])
|
||||
r, _ := utf8.DecodeRuneInString(data)
|
||||
enc := Encoding(r)
|
||||
|
||||
switch enc {
|
||||
case Identity:
|
||||
@@ -179,6 +185,9 @@ func Decode(data string) (Encoding, []byte, error) {
|
||||
case Base64url:
|
||||
bytes, err := base64.RawURLEncoding.DecodeString(data[1:])
|
||||
return Base64url, bytes, err
|
||||
case Base256Emoji:
|
||||
bytes, err := base256emojiDecode(data[4:])
|
||||
return Base256Emoji, bytes, err
|
||||
default:
|
||||
return -1, nil, ErrUnsupportedEncoding
|
||||
}
|
||||
|
||||
@@ -44,6 +44,7 @@ var encodedSamples = map[Encoding]string{
|
||||
Base64url: "uRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE",
|
||||
Base64pad: "MRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE=",
|
||||
Base64urlPad: "URGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchISE=",
|
||||
Base256Emoji: "🚀💛✋💃✋😻😈🥺🤤🍀🌟💐✋😅✋💦✋🥺🏃😈😴🌟😻😝👏👏👏",
|
||||
}
|
||||
|
||||
func testEncode(t *testing.T, encoding Encoding, bytes []byte, expected string) {
|
||||
|
||||
2
spec
2
spec
Submodule spec updated: a4b4a4e5e4...cffd1aa308
Reference in New Issue
Block a user