feat: use CIDs as their byte representation instead of a struct

This commit is contained in:
dignifiedquire
2018-05-03 15:12:25 +02:00
committed by Kevin Atkinson
parent 6ddb575a8d
commit e153340e5a
6 changed files with 127 additions and 117 deletions

View File

@@ -5,7 +5,7 @@ import (
)
type Builder interface {
Sum(data []byte) (*Cid, error)
Sum(data []byte) (Cid, error)
GetCodec() uint64
WithCodec(uint64) Builder
}
@@ -33,7 +33,7 @@ func (p Prefix) WithCodec(c uint64) Builder {
return p
}
func (p V0Builder) Sum(data []byte) (*Cid, error) {
func (p V0Builder) Sum(data []byte) (Cid, error) {
hash, err := mh.Sum(data, mh.SHA2_256, -1)
if err != nil {
return nil, err
@@ -52,7 +52,7 @@ func (p V0Builder) WithCodec(c uint64) Builder {
return V1Builder{Codec: c, MhType: mh.SHA2_256}
}
func (p V1Builder) Sum(data []byte) (*Cid, error) {
func (p V1Builder) Sum(data []byte) (Cid, error) {
mhLen := p.MhLength
if mhLen <= 0 {
mhLen = -1

146
cid.go
View File

@@ -132,36 +132,53 @@ var CodecToStr = map[uint64]string{
// They exist to allow IPFS to work with Cids while keeping
// compatibility with the plain-multihash format used used in IPFS.
// NewCidV1 should be used preferentially.
func NewCidV0(mhash mh.Multihash) *Cid {
return &Cid{
version: 0,
codec: DagProtobuf,
hash: mhash,
}
func NewCidV0(mhash mh.Multihash) Cid {
return newCid(0, DagProtobuf, mhash)
}
// NewCidV1 returns a new Cid using the given multicodec-packed
// content type.
func NewCidV1(codecType uint64, mhash mh.Multihash) *Cid {
return &Cid{
version: 1,
codec: codecType,
hash: mhash,
func NewCidV1(codecType uint64, mhash mh.Multihash) Cid {
return newCid(1, codecType, mhash)
}
func newCid(version, codecType uint64, mhash mh.Multihash) Cid {
hashlen := len(mhash)
// two 8 bytes (max) numbers plus hash
buf := make([]byte, 2*binary.MaxVarintLen64+hashlen)
n := binary.PutUvarint(buf, version)
n += binary.PutUvarint(buf[n:], codecType)
cn := copy(buf[n:], mhash)
if cn != hashlen {
panic("copy hash length is inconsistent")
}
return Cid(buf[:n+hashlen])
}
// Cid represents a self-describing content adressed
// identifier. It is formed by a Version, a Codec (which indicates
// a multicodec-packed content type) and a Multihash.
type Cid struct {
version uint64
codec uint64
hash mh.Multihash
// Byte layout: [version, codec, multihash]
// - version uvarint
// - codec uvarint
// - hash mh.Multihash
type Cid []byte
func (c Cid) version() uint64 {
v, _ := binary.Uvarint(c)
return v
}
func (c Cid) codec() uint64 {
_, n := binary.Uvarint(c)
codec, _ := binary.Uvarint(c[n:])
return codec
}
// Parse is a short-hand function to perform Decode, Cast etc... on
// a generic interface{} type.
func Parse(v interface{}) (*Cid, error) {
func Parse(v interface{}) (Cid, error) {
switch v2 := v.(type) {
case string:
if strings.Contains(v2, "/ipfs/") {
@@ -172,7 +189,7 @@ func Parse(v interface{}) (*Cid, error) {
return Cast(v2)
case mh.Multihash:
return NewCidV0(v2), nil
case *Cid:
case Cid:
return v2, nil
default:
return nil, fmt.Errorf("can't parse %+v as Cid", v2)
@@ -191,7 +208,7 @@ func Parse(v interface{}) (*Cid, error) {
// Decode will also detect and parse CidV0 strings. Strings
// starting with "Qm" are considered CidV0 and treated directly
// as B58-encoded multihashes.
func Decode(v string) (*Cid, error) {
func Decode(v string) (Cid, error) {
if len(v) < 2 {
return nil, ErrCidTooShort
}
@@ -257,18 +274,14 @@ func uvError(read int) error {
//
// Please use decode when parsing a regular Cid string, as Cast does not
// expect multibase-encoded data. Cast accepts the output of Cid.Bytes().
func Cast(data []byte) (*Cid, error) {
func Cast(data []byte) (Cid, error) {
if len(data) == 34 && data[0] == 18 && data[1] == 32 {
h, err := mh.Cast(data)
if err != nil {
return nil, err
}
return &Cid{
codec: DagProtobuf,
version: 0,
hash: h,
}, nil
return NewCidV0(h), nil
}
vers, n := binary.Uvarint(data)
@@ -280,7 +293,7 @@ func Cast(data []byte) (*Cid, error) {
return nil, fmt.Errorf("invalid cid version number: %d", vers)
}
codec, cn := binary.Uvarint(data[n:])
_, cn := binary.Uvarint(data[n:])
if err := uvError(cn); err != nil {
return nil, err
}
@@ -291,25 +304,21 @@ func Cast(data []byte) (*Cid, error) {
return nil, err
}
return &Cid{
version: vers,
codec: codec,
hash: h,
}, nil
return Cid(data[0 : n+cn+len(h)]), nil
}
// Type returns the multicodec-packed content type of a Cid.
func (c *Cid) Type() uint64 {
return c.codec
func (c Cid) Type() uint64 {
return c.codec()
}
// String returns the default string representation of a
// Cid. Currently, Base58 is used as the encoding for the
// multibase string.
func (c *Cid) String() string {
switch c.version {
func (c Cid) String() string {
switch c.version() {
case 0:
return c.hash.B58String()
return c.Hash().B58String()
case 1:
mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1())
if err != nil {
@@ -324,13 +333,13 @@ func (c *Cid) String() string {
// String returns the string representation of a Cid
// encoded is selected base
func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) {
switch c.version {
func (c Cid) StringOfBase(base mbase.Encoding) (string, error) {
switch c.version() {
case 0:
if base != mbase.Base58BTC {
return "", ErrInvalidEncoding
}
return c.hash.B58String(), nil
return c.Hash().B58String(), nil
case 1:
return mbase.Encode(base, c.bytesV1())
default:
@@ -341,10 +350,10 @@ func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) {
// Encode return the string representation of a Cid in a given base
// when applicable. Version 0 Cid's are always in Base58 as they do
// not take a multibase prefix.
func (c *Cid) Encode(base mbase.Encoder) string {
switch c.version {
func (c Cid) Encode(base mbase.Encoder) string {
switch c.version() {
case 0:
return c.hash.B58String()
return c.Hash().B58String()
case 1:
return base.Encode(c.bytesV1())
default:
@@ -353,15 +362,20 @@ func (c *Cid) Encode(base mbase.Encoder) string {
}
// Hash returns the multihash contained by a Cid.
func (c *Cid) Hash() mh.Multihash {
return c.hash
func (c Cid) Hash() mh.Multihash {
// skip version length
_, n1 := binary.Uvarint(c)
// skip codec length
_, n2 := binary.Uvarint(c[n1:])
return mh.Multihash(c[n1+n2:])
}
// Bytes returns the byte representation of a Cid.
// The output of bytes can be parsed back into a Cid
// with Cast().
func (c *Cid) Bytes() []byte {
switch c.version {
func (c Cid) Bytes() []byte {
switch c.version() {
case 0:
return c.bytesV0()
case 1:
@@ -371,30 +385,19 @@ func (c *Cid) Bytes() []byte {
}
}
func (c *Cid) bytesV0() []byte {
return []byte(c.hash)
func (c Cid) bytesV0() []byte {
return []byte(c.Hash())
}
func (c *Cid) bytesV1() []byte {
// two 8 bytes (max) numbers plus hash
buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash))
n := binary.PutUvarint(buf, c.version)
n += binary.PutUvarint(buf[n:], c.codec)
cn := copy(buf[n:], c.hash)
if cn != len(c.hash) {
panic("copy hash length is inconsistent")
}
return buf[:n+len(c.hash)]
func (c Cid) bytesV1() []byte {
return []byte(c)
}
// Equals checks that two Cids are the same.
// In order for two Cids to be considered equal, the
// Version, the Codec and the Multihash must match.
func (c *Cid) Equals(o *Cid) bool {
return c.codec == o.codec &&
c.version == o.version &&
bytes.Equal(c.hash, o.hash)
func (c Cid) Equals(o Cid) bool {
return bytes.Equal(c, o)
}
// UnmarshalJSON parses the JSON representation of a Cid.
@@ -419,9 +422,8 @@ func (c *Cid) UnmarshalJSON(b []byte) error {
return err
}
c.version = out.version
c.hash = out.hash
c.codec = out.codec
*c = out[:]
return nil
}
@@ -436,26 +438,26 @@ func (c Cid) MarshalJSON() ([]byte, error) {
}
// KeyString casts the result of cid.Bytes() as a string, and returns it.
func (c *Cid) KeyString() string {
func (c Cid) KeyString() string {
return string(c.Bytes())
}
// Loggable returns a Loggable (as defined by
// https://godoc.org/github.com/ipfs/go-log).
func (c *Cid) Loggable() map[string]interface{} {
func (c Cid) Loggable() map[string]interface{} {
return map[string]interface{}{
"cid": c,
}
}
// Prefix builds and returns a Prefix out of a Cid.
func (c *Cid) Prefix() Prefix {
dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error
func (c Cid) Prefix() Prefix {
dec, _ := mh.Decode(c.Hash()) // assuming we got a valid multiaddr, this will not error
return Prefix{
MhType: dec.Code,
MhLength: dec.Length,
Version: c.version,
Codec: c.codec,
Version: c.version(),
Codec: c.codec(),
}
}
@@ -474,7 +476,7 @@ type Prefix struct {
// Sum uses the information in a prefix to perform a multihash.Sum()
// and return a newly constructed Cid with the resulting multihash.
func (p Prefix) Sum(data []byte) (*Cid, error) {
func (p Prefix) Sum(data []byte) (Cid, error) {
hash, err := mh.Sum(data, p.MhType, p.MhLength)
if err != nil {
return nil, err

View File

@@ -23,7 +23,7 @@ func Fuzz(data []byte) int {
if err != nil {
panic(err.Error())
}
cid2 := &Cid{}
cid2 := Cid{}
err = cid2.UnmarshalJSON(json)
if err != nil {
panic(err.Error())

View File

@@ -37,16 +37,16 @@ var tCodecs = map[uint64]string{
DecredTx: "decred-tx",
}
func assertEqual(t *testing.T, a, b *Cid) {
if a.codec != b.codec {
func assertEqual(t *testing.T, a, b Cid) {
if a.codec() != b.codec() {
t.Fatal("mismatch on type")
}
if a.version != b.version {
if a.version() != b.version() {
t.Fatal("mismatch on version")
}
if !bytes.Equal(a.hash, b.hash) {
if !bytes.Equal(a.Hash(), b.Hash()) {
t.Fatal("multihash mismatch")
}
}
@@ -77,11 +77,7 @@ func TestBasicMarshaling(t *testing.T) {
t.Fatal(err)
}
cid := &Cid{
codec: 7,
version: 1,
hash: h,
}
cid := newCid(1, 7, h)
data := cid.Bytes()
@@ -107,11 +103,7 @@ func TestBasesMarshaling(t *testing.T) {
t.Fatal(err)
}
cid := &Cid{
codec: 7,
version: 1,
hash: h,
}
cid := newCid(1, 7, h)
data := cid.Bytes()
@@ -179,12 +171,12 @@ func TestV0Handling(t *testing.T) {
t.Fatal(err)
}
if cid.version != 0 {
if cid.version() != 0 {
t.Fatal("should have gotten version 0 cid")
}
if cid.hash.B58String() != old {
t.Fatal("marshaling roundtrip failed")
if cid.Hash().B58String() != old {
t.Fatalf("marshaling roundtrip failed: %s != %s", cid.Hash().B58String(), old)
}
if cid.String() != old {
@@ -306,9 +298,7 @@ func TestPrefixRoundtrip(t *testing.T) {
func Test16BytesVarint(t *testing.T) {
data := []byte("this is some test content")
hash, _ := mh.Sum(data, mh.SHA2_256, -1)
c := NewCidV1(DagCBOR, hash)
c.codec = 1 << 63
c := newCid(1, 1<<63, hash)
_ = c.Bytes()
}
@@ -351,8 +341,8 @@ func TestParse(t *testing.T) {
if err != nil {
return err
}
if cid.version != 0 {
return fmt.Errorf("expected version 0, got %s", string(cid.version))
if cid.version() != 0 {
return fmt.Errorf("expected version 0, got %s", string(cid.version()))
}
actual := cid.Hash().B58String()
if actual != expected {
@@ -424,18 +414,18 @@ func TestJsonRoundTrip(t *testing.T) {
}
var actual Cid
err = json.Unmarshal(enc, &actual)
if !exp.Equals(&actual) {
if !exp.Equals(actual) {
t.Fatal("cids not equal for *Cid")
}
// Verify it works for a Cid.
enc, err = json.Marshal(*exp)
enc, err = json.Marshal(exp)
if err != nil {
t.Fatal(err)
}
var actual2 Cid
err = json.Unmarshal(enc, &actual2)
if !exp.Equals(&actual2) {
if !exp.Equals(actual2) {
t.Fatal("cids not equal for Cid")
}
}
@@ -444,7 +434,10 @@ func BenchmarkStringV1(b *testing.B) {
data := []byte("this is some test content")
hash, _ := mh.Sum(data, mh.SHA2_256, -1)
cid := NewCidV1(Raw, hash)
b.ReportAllocs()
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
count += len(cid.String())
@@ -453,3 +446,22 @@ func BenchmarkStringV1(b *testing.B) {
b.FailNow()
}
}
// making sure we don't allocate when returning bytes
func BenchmarkBytesV1(b *testing.B) {
data := []byte("this is some test content")
hash, _ := mh.Sum(data, mh.SHA2_256, -1)
cid := NewCidV1(Raw, hash)
b.ReportAllocs()
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
count += len(cid.Bytes())
count += len([]byte(cid))
}
if count != 36*2*b.N {
b.FailNow()
}
}

14
set.go
View File

@@ -12,18 +12,18 @@ func NewSet() *Set {
}
// Add puts a Cid in the Set.
func (s *Set) Add(c *Cid) {
func (s *Set) Add(c Cid) {
s.set[string(c.Bytes())] = struct{}{}
}
// Has returns if the Set contains a given Cid.
func (s *Set) Has(c *Cid) bool {
func (s *Set) Has(c Cid) bool {
_, ok := s.set[string(c.Bytes())]
return ok
}
// Remove deletes a Cid from the Set.
func (s *Set) Remove(c *Cid) {
func (s *Set) Remove(c Cid) {
delete(s.set, string(c.Bytes()))
}
@@ -33,8 +33,8 @@ func (s *Set) Len() int {
}
// Keys returns the Cids in the set.
func (s *Set) Keys() []*Cid {
out := make([]*Cid, 0, len(s.set))
func (s *Set) Keys() []Cid {
out := make([]Cid, 0, len(s.set))
for k := range s.set {
c, _ := Cast([]byte(k))
out = append(out, c)
@@ -44,7 +44,7 @@ func (s *Set) Keys() []*Cid {
// Visit adds a Cid to the set only if it is
// not in it already.
func (s *Set) Visit(c *Cid) bool {
func (s *Set) Visit(c Cid) bool {
if !s.Has(c) {
s.Add(c)
return true
@@ -55,7 +55,7 @@ func (s *Set) Visit(c *Cid) bool {
// ForEach allows to run a custom function on each
// Cid in the set.
func (s *Set) ForEach(f func(c *Cid) error) error {
func (s *Set) ForEach(f func(c Cid) error) error {
for cs := range s.set {
c, _ := Cast([]byte(cs))
err := f(c)

View File

@@ -8,7 +8,7 @@ import (
mh "github.com/multiformats/go-multihash"
)
func makeRandomCid(t *testing.T) *Cid {
func makeRandomCid(t *testing.T) Cid {
p := make([]byte, 256)
_, err := rand.Read(p)
if err != nil {
@@ -20,11 +20,7 @@ func makeRandomCid(t *testing.T) *Cid {
t.Fatal(err)
}
cid := &Cid{
codec: 7,
version: 1,
hash: h,
}
cid := NewCidV1(7, h)
return cid
}
@@ -54,8 +50,8 @@ func TestSet(t *testing.T) {
t.Error("visit should return false")
}
foreach := []*Cid{}
foreachF := func(c *Cid) error {
foreach := []Cid{}
foreachF := func(c Cid) error {
foreach = append(foreach, c)
return nil
}
@@ -68,7 +64,7 @@ func TestSet(t *testing.T) {
t.Error("ForEach should have visited 1 element")
}
foreachErr := func(c *Cid) error {
foreachErr := func(c Cid) error {
return errors.New("test")
}