feat: use CIDs as their byte representation instead of a struct

This commit is contained in:
dignifiedquire
2018-05-03 15:12:25 +02:00
committed by Kevin Atkinson
parent 6ddb575a8d
commit e153340e5a
6 changed files with 127 additions and 117 deletions

View File

@@ -5,7 +5,7 @@ import (
) )
type Builder interface { type Builder interface {
Sum(data []byte) (*Cid, error) Sum(data []byte) (Cid, error)
GetCodec() uint64 GetCodec() uint64
WithCodec(uint64) Builder WithCodec(uint64) Builder
} }
@@ -33,7 +33,7 @@ func (p Prefix) WithCodec(c uint64) Builder {
return p return p
} }
func (p V0Builder) Sum(data []byte) (*Cid, error) { func (p V0Builder) Sum(data []byte) (Cid, error) {
hash, err := mh.Sum(data, mh.SHA2_256, -1) hash, err := mh.Sum(data, mh.SHA2_256, -1)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -52,7 +52,7 @@ func (p V0Builder) WithCodec(c uint64) Builder {
return V1Builder{Codec: c, MhType: mh.SHA2_256} return V1Builder{Codec: c, MhType: mh.SHA2_256}
} }
func (p V1Builder) Sum(data []byte) (*Cid, error) { func (p V1Builder) Sum(data []byte) (Cid, error) {
mhLen := p.MhLength mhLen := p.MhLength
if mhLen <= 0 { if mhLen <= 0 {
mhLen = -1 mhLen = -1

146
cid.go
View File

@@ -132,36 +132,53 @@ var CodecToStr = map[uint64]string{
// They exist to allow IPFS to work with Cids while keeping // They exist to allow IPFS to work with Cids while keeping
// compatibility with the plain-multihash format used used in IPFS. // compatibility with the plain-multihash format used used in IPFS.
// NewCidV1 should be used preferentially. // NewCidV1 should be used preferentially.
func NewCidV0(mhash mh.Multihash) *Cid { func NewCidV0(mhash mh.Multihash) Cid {
return &Cid{ return newCid(0, DagProtobuf, mhash)
version: 0,
codec: DagProtobuf,
hash: mhash,
}
} }
// NewCidV1 returns a new Cid using the given multicodec-packed // NewCidV1 returns a new Cid using the given multicodec-packed
// content type. // content type.
func NewCidV1(codecType uint64, mhash mh.Multihash) *Cid { func NewCidV1(codecType uint64, mhash mh.Multihash) Cid {
return &Cid{ return newCid(1, codecType, mhash)
version: 1,
codec: codecType,
hash: mhash,
} }
func newCid(version, codecType uint64, mhash mh.Multihash) Cid {
hashlen := len(mhash)
// two 8 bytes (max) numbers plus hash
buf := make([]byte, 2*binary.MaxVarintLen64+hashlen)
n := binary.PutUvarint(buf, version)
n += binary.PutUvarint(buf[n:], codecType)
cn := copy(buf[n:], mhash)
if cn != hashlen {
panic("copy hash length is inconsistent")
}
return Cid(buf[:n+hashlen])
} }
// Cid represents a self-describing content adressed // Cid represents a self-describing content adressed
// identifier. It is formed by a Version, a Codec (which indicates // identifier. It is formed by a Version, a Codec (which indicates
// a multicodec-packed content type) and a Multihash. // a multicodec-packed content type) and a Multihash.
type Cid struct { // Byte layout: [version, codec, multihash]
version uint64 // - version uvarint
codec uint64 // - codec uvarint
hash mh.Multihash // - hash mh.Multihash
type Cid []byte
func (c Cid) version() uint64 {
v, _ := binary.Uvarint(c)
return v
}
func (c Cid) codec() uint64 {
_, n := binary.Uvarint(c)
codec, _ := binary.Uvarint(c[n:])
return codec
} }
// Parse is a short-hand function to perform Decode, Cast etc... on // Parse is a short-hand function to perform Decode, Cast etc... on
// a generic interface{} type. // a generic interface{} type.
func Parse(v interface{}) (*Cid, error) { func Parse(v interface{}) (Cid, error) {
switch v2 := v.(type) { switch v2 := v.(type) {
case string: case string:
if strings.Contains(v2, "/ipfs/") { if strings.Contains(v2, "/ipfs/") {
@@ -172,7 +189,7 @@ func Parse(v interface{}) (*Cid, error) {
return Cast(v2) return Cast(v2)
case mh.Multihash: case mh.Multihash:
return NewCidV0(v2), nil return NewCidV0(v2), nil
case *Cid: case Cid:
return v2, nil return v2, nil
default: default:
return nil, fmt.Errorf("can't parse %+v as Cid", v2) return nil, fmt.Errorf("can't parse %+v as Cid", v2)
@@ -191,7 +208,7 @@ func Parse(v interface{}) (*Cid, error) {
// Decode will also detect and parse CidV0 strings. Strings // Decode will also detect and parse CidV0 strings. Strings
// starting with "Qm" are considered CidV0 and treated directly // starting with "Qm" are considered CidV0 and treated directly
// as B58-encoded multihashes. // as B58-encoded multihashes.
func Decode(v string) (*Cid, error) { func Decode(v string) (Cid, error) {
if len(v) < 2 { if len(v) < 2 {
return nil, ErrCidTooShort return nil, ErrCidTooShort
} }
@@ -257,18 +274,14 @@ func uvError(read int) error {
// //
// Please use decode when parsing a regular Cid string, as Cast does not // Please use decode when parsing a regular Cid string, as Cast does not
// expect multibase-encoded data. Cast accepts the output of Cid.Bytes(). // expect multibase-encoded data. Cast accepts the output of Cid.Bytes().
func Cast(data []byte) (*Cid, error) { func Cast(data []byte) (Cid, error) {
if len(data) == 34 && data[0] == 18 && data[1] == 32 { if len(data) == 34 && data[0] == 18 && data[1] == 32 {
h, err := mh.Cast(data) h, err := mh.Cast(data)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return &Cid{ return NewCidV0(h), nil
codec: DagProtobuf,
version: 0,
hash: h,
}, nil
} }
vers, n := binary.Uvarint(data) vers, n := binary.Uvarint(data)
@@ -280,7 +293,7 @@ func Cast(data []byte) (*Cid, error) {
return nil, fmt.Errorf("invalid cid version number: %d", vers) return nil, fmt.Errorf("invalid cid version number: %d", vers)
} }
codec, cn := binary.Uvarint(data[n:]) _, cn := binary.Uvarint(data[n:])
if err := uvError(cn); err != nil { if err := uvError(cn); err != nil {
return nil, err return nil, err
} }
@@ -291,25 +304,21 @@ func Cast(data []byte) (*Cid, error) {
return nil, err return nil, err
} }
return &Cid{ return Cid(data[0 : n+cn+len(h)]), nil
version: vers,
codec: codec,
hash: h,
}, nil
} }
// Type returns the multicodec-packed content type of a Cid. // Type returns the multicodec-packed content type of a Cid.
func (c *Cid) Type() uint64 { func (c Cid) Type() uint64 {
return c.codec return c.codec()
} }
// String returns the default string representation of a // String returns the default string representation of a
// Cid. Currently, Base58 is used as the encoding for the // Cid. Currently, Base58 is used as the encoding for the
// multibase string. // multibase string.
func (c *Cid) String() string { func (c Cid) String() string {
switch c.version { switch c.version() {
case 0: case 0:
return c.hash.B58String() return c.Hash().B58String()
case 1: case 1:
mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1()) mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1())
if err != nil { if err != nil {
@@ -324,13 +333,13 @@ func (c *Cid) String() string {
// String returns the string representation of a Cid // String returns the string representation of a Cid
// encoded is selected base // encoded is selected base
func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) { func (c Cid) StringOfBase(base mbase.Encoding) (string, error) {
switch c.version { switch c.version() {
case 0: case 0:
if base != mbase.Base58BTC { if base != mbase.Base58BTC {
return "", ErrInvalidEncoding return "", ErrInvalidEncoding
} }
return c.hash.B58String(), nil return c.Hash().B58String(), nil
case 1: case 1:
return mbase.Encode(base, c.bytesV1()) return mbase.Encode(base, c.bytesV1())
default: default:
@@ -341,10 +350,10 @@ func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) {
// Encode return the string representation of a Cid in a given base // Encode return the string representation of a Cid in a given base
// when applicable. Version 0 Cid's are always in Base58 as they do // when applicable. Version 0 Cid's are always in Base58 as they do
// not take a multibase prefix. // not take a multibase prefix.
func (c *Cid) Encode(base mbase.Encoder) string { func (c Cid) Encode(base mbase.Encoder) string {
switch c.version { switch c.version() {
case 0: case 0:
return c.hash.B58String() return c.Hash().B58String()
case 1: case 1:
return base.Encode(c.bytesV1()) return base.Encode(c.bytesV1())
default: default:
@@ -353,15 +362,20 @@ func (c *Cid) Encode(base mbase.Encoder) string {
} }
// Hash returns the multihash contained by a Cid. // Hash returns the multihash contained by a Cid.
func (c *Cid) Hash() mh.Multihash { func (c Cid) Hash() mh.Multihash {
return c.hash // skip version length
_, n1 := binary.Uvarint(c)
// skip codec length
_, n2 := binary.Uvarint(c[n1:])
return mh.Multihash(c[n1+n2:])
} }
// Bytes returns the byte representation of a Cid. // Bytes returns the byte representation of a Cid.
// The output of bytes can be parsed back into a Cid // The output of bytes can be parsed back into a Cid
// with Cast(). // with Cast().
func (c *Cid) Bytes() []byte { func (c Cid) Bytes() []byte {
switch c.version { switch c.version() {
case 0: case 0:
return c.bytesV0() return c.bytesV0()
case 1: case 1:
@@ -371,30 +385,19 @@ func (c *Cid) Bytes() []byte {
} }
} }
func (c *Cid) bytesV0() []byte { func (c Cid) bytesV0() []byte {
return []byte(c.hash) return []byte(c.Hash())
} }
func (c *Cid) bytesV1() []byte { func (c Cid) bytesV1() []byte {
// two 8 bytes (max) numbers plus hash return []byte(c)
buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash))
n := binary.PutUvarint(buf, c.version)
n += binary.PutUvarint(buf[n:], c.codec)
cn := copy(buf[n:], c.hash)
if cn != len(c.hash) {
panic("copy hash length is inconsistent")
}
return buf[:n+len(c.hash)]
} }
// Equals checks that two Cids are the same. // Equals checks that two Cids are the same.
// In order for two Cids to be considered equal, the // In order for two Cids to be considered equal, the
// Version, the Codec and the Multihash must match. // Version, the Codec and the Multihash must match.
func (c *Cid) Equals(o *Cid) bool { func (c Cid) Equals(o Cid) bool {
return c.codec == o.codec && return bytes.Equal(c, o)
c.version == o.version &&
bytes.Equal(c.hash, o.hash)
} }
// UnmarshalJSON parses the JSON representation of a Cid. // UnmarshalJSON parses the JSON representation of a Cid.
@@ -419,9 +422,8 @@ func (c *Cid) UnmarshalJSON(b []byte) error {
return err return err
} }
c.version = out.version *c = out[:]
c.hash = out.hash
c.codec = out.codec
return nil return nil
} }
@@ -436,26 +438,26 @@ func (c Cid) MarshalJSON() ([]byte, error) {
} }
// KeyString casts the result of cid.Bytes() as a string, and returns it. // KeyString casts the result of cid.Bytes() as a string, and returns it.
func (c *Cid) KeyString() string { func (c Cid) KeyString() string {
return string(c.Bytes()) return string(c.Bytes())
} }
// Loggable returns a Loggable (as defined by // Loggable returns a Loggable (as defined by
// https://godoc.org/github.com/ipfs/go-log). // https://godoc.org/github.com/ipfs/go-log).
func (c *Cid) Loggable() map[string]interface{} { func (c Cid) Loggable() map[string]interface{} {
return map[string]interface{}{ return map[string]interface{}{
"cid": c, "cid": c,
} }
} }
// Prefix builds and returns a Prefix out of a Cid. // Prefix builds and returns a Prefix out of a Cid.
func (c *Cid) Prefix() Prefix { func (c Cid) Prefix() Prefix {
dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error dec, _ := mh.Decode(c.Hash()) // assuming we got a valid multiaddr, this will not error
return Prefix{ return Prefix{
MhType: dec.Code, MhType: dec.Code,
MhLength: dec.Length, MhLength: dec.Length,
Version: c.version, Version: c.version(),
Codec: c.codec, Codec: c.codec(),
} }
} }
@@ -474,7 +476,7 @@ type Prefix struct {
// Sum uses the information in a prefix to perform a multihash.Sum() // Sum uses the information in a prefix to perform a multihash.Sum()
// and return a newly constructed Cid with the resulting multihash. // and return a newly constructed Cid with the resulting multihash.
func (p Prefix) Sum(data []byte) (*Cid, error) { func (p Prefix) Sum(data []byte) (Cid, error) {
hash, err := mh.Sum(data, p.MhType, p.MhLength) hash, err := mh.Sum(data, p.MhType, p.MhLength)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@@ -23,7 +23,7 @@ func Fuzz(data []byte) int {
if err != nil { if err != nil {
panic(err.Error()) panic(err.Error())
} }
cid2 := &Cid{} cid2 := Cid{}
err = cid2.UnmarshalJSON(json) err = cid2.UnmarshalJSON(json)
if err != nil { if err != nil {
panic(err.Error()) panic(err.Error())

View File

@@ -37,16 +37,16 @@ var tCodecs = map[uint64]string{
DecredTx: "decred-tx", DecredTx: "decred-tx",
} }
func assertEqual(t *testing.T, a, b *Cid) { func assertEqual(t *testing.T, a, b Cid) {
if a.codec != b.codec { if a.codec() != b.codec() {
t.Fatal("mismatch on type") t.Fatal("mismatch on type")
} }
if a.version != b.version { if a.version() != b.version() {
t.Fatal("mismatch on version") t.Fatal("mismatch on version")
} }
if !bytes.Equal(a.hash, b.hash) { if !bytes.Equal(a.Hash(), b.Hash()) {
t.Fatal("multihash mismatch") t.Fatal("multihash mismatch")
} }
} }
@@ -77,11 +77,7 @@ func TestBasicMarshaling(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
cid := &Cid{ cid := newCid(1, 7, h)
codec: 7,
version: 1,
hash: h,
}
data := cid.Bytes() data := cid.Bytes()
@@ -107,11 +103,7 @@ func TestBasesMarshaling(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
cid := &Cid{ cid := newCid(1, 7, h)
codec: 7,
version: 1,
hash: h,
}
data := cid.Bytes() data := cid.Bytes()
@@ -179,12 +171,12 @@ func TestV0Handling(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if cid.version != 0 { if cid.version() != 0 {
t.Fatal("should have gotten version 0 cid") t.Fatal("should have gotten version 0 cid")
} }
if cid.hash.B58String() != old { if cid.Hash().B58String() != old {
t.Fatal("marshaling roundtrip failed") t.Fatalf("marshaling roundtrip failed: %s != %s", cid.Hash().B58String(), old)
} }
if cid.String() != old { if cid.String() != old {
@@ -306,9 +298,7 @@ func TestPrefixRoundtrip(t *testing.T) {
func Test16BytesVarint(t *testing.T) { func Test16BytesVarint(t *testing.T) {
data := []byte("this is some test content") data := []byte("this is some test content")
hash, _ := mh.Sum(data, mh.SHA2_256, -1) hash, _ := mh.Sum(data, mh.SHA2_256, -1)
c := NewCidV1(DagCBOR, hash) c := newCid(1, 1<<63, hash)
c.codec = 1 << 63
_ = c.Bytes() _ = c.Bytes()
} }
@@ -351,8 +341,8 @@ func TestParse(t *testing.T) {
if err != nil { if err != nil {
return err return err
} }
if cid.version != 0 { if cid.version() != 0 {
return fmt.Errorf("expected version 0, got %s", string(cid.version)) return fmt.Errorf("expected version 0, got %s", string(cid.version()))
} }
actual := cid.Hash().B58String() actual := cid.Hash().B58String()
if actual != expected { if actual != expected {
@@ -424,18 +414,18 @@ func TestJsonRoundTrip(t *testing.T) {
} }
var actual Cid var actual Cid
err = json.Unmarshal(enc, &actual) err = json.Unmarshal(enc, &actual)
if !exp.Equals(&actual) { if !exp.Equals(actual) {
t.Fatal("cids not equal for *Cid") t.Fatal("cids not equal for *Cid")
} }
// Verify it works for a Cid. // Verify it works for a Cid.
enc, err = json.Marshal(*exp) enc, err = json.Marshal(exp)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
var actual2 Cid var actual2 Cid
err = json.Unmarshal(enc, &actual2) err = json.Unmarshal(enc, &actual2)
if !exp.Equals(&actual2) { if !exp.Equals(actual2) {
t.Fatal("cids not equal for Cid") t.Fatal("cids not equal for Cid")
} }
} }
@@ -444,7 +434,10 @@ func BenchmarkStringV1(b *testing.B) {
data := []byte("this is some test content") data := []byte("this is some test content")
hash, _ := mh.Sum(data, mh.SHA2_256, -1) hash, _ := mh.Sum(data, mh.SHA2_256, -1)
cid := NewCidV1(Raw, hash) cid := NewCidV1(Raw, hash)
b.ReportAllocs()
b.ResetTimer() b.ResetTimer()
count := 0 count := 0
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
count += len(cid.String()) count += len(cid.String())
@@ -453,3 +446,22 @@ func BenchmarkStringV1(b *testing.B) {
b.FailNow() b.FailNow()
} }
} }
// making sure we don't allocate when returning bytes
func BenchmarkBytesV1(b *testing.B) {
data := []byte("this is some test content")
hash, _ := mh.Sum(data, mh.SHA2_256, -1)
cid := NewCidV1(Raw, hash)
b.ReportAllocs()
b.ResetTimer()
count := 0
for i := 0; i < b.N; i++ {
count += len(cid.Bytes())
count += len([]byte(cid))
}
if count != 36*2*b.N {
b.FailNow()
}
}

14
set.go
View File

@@ -12,18 +12,18 @@ func NewSet() *Set {
} }
// Add puts a Cid in the Set. // Add puts a Cid in the Set.
func (s *Set) Add(c *Cid) { func (s *Set) Add(c Cid) {
s.set[string(c.Bytes())] = struct{}{} s.set[string(c.Bytes())] = struct{}{}
} }
// Has returns if the Set contains a given Cid. // Has returns if the Set contains a given Cid.
func (s *Set) Has(c *Cid) bool { func (s *Set) Has(c Cid) bool {
_, ok := s.set[string(c.Bytes())] _, ok := s.set[string(c.Bytes())]
return ok return ok
} }
// Remove deletes a Cid from the Set. // Remove deletes a Cid from the Set.
func (s *Set) Remove(c *Cid) { func (s *Set) Remove(c Cid) {
delete(s.set, string(c.Bytes())) delete(s.set, string(c.Bytes()))
} }
@@ -33,8 +33,8 @@ func (s *Set) Len() int {
} }
// Keys returns the Cids in the set. // Keys returns the Cids in the set.
func (s *Set) Keys() []*Cid { func (s *Set) Keys() []Cid {
out := make([]*Cid, 0, len(s.set)) out := make([]Cid, 0, len(s.set))
for k := range s.set { for k := range s.set {
c, _ := Cast([]byte(k)) c, _ := Cast([]byte(k))
out = append(out, c) out = append(out, c)
@@ -44,7 +44,7 @@ func (s *Set) Keys() []*Cid {
// Visit adds a Cid to the set only if it is // Visit adds a Cid to the set only if it is
// not in it already. // not in it already.
func (s *Set) Visit(c *Cid) bool { func (s *Set) Visit(c Cid) bool {
if !s.Has(c) { if !s.Has(c) {
s.Add(c) s.Add(c)
return true return true
@@ -55,7 +55,7 @@ func (s *Set) Visit(c *Cid) bool {
// ForEach allows to run a custom function on each // ForEach allows to run a custom function on each
// Cid in the set. // Cid in the set.
func (s *Set) ForEach(f func(c *Cid) error) error { func (s *Set) ForEach(f func(c Cid) error) error {
for cs := range s.set { for cs := range s.set {
c, _ := Cast([]byte(cs)) c, _ := Cast([]byte(cs))
err := f(c) err := f(c)

View File

@@ -8,7 +8,7 @@ import (
mh "github.com/multiformats/go-multihash" mh "github.com/multiformats/go-multihash"
) )
func makeRandomCid(t *testing.T) *Cid { func makeRandomCid(t *testing.T) Cid {
p := make([]byte, 256) p := make([]byte, 256)
_, err := rand.Read(p) _, err := rand.Read(p)
if err != nil { if err != nil {
@@ -20,11 +20,7 @@ func makeRandomCid(t *testing.T) *Cid {
t.Fatal(err) t.Fatal(err)
} }
cid := &Cid{ cid := NewCidV1(7, h)
codec: 7,
version: 1,
hash: h,
}
return cid return cid
} }
@@ -54,8 +50,8 @@ func TestSet(t *testing.T) {
t.Error("visit should return false") t.Error("visit should return false")
} }
foreach := []*Cid{} foreach := []Cid{}
foreachF := func(c *Cid) error { foreachF := func(c Cid) error {
foreach = append(foreach, c) foreach = append(foreach, c)
return nil return nil
} }
@@ -68,7 +64,7 @@ func TestSet(t *testing.T) {
t.Error("ForEach should have visited 1 element") t.Error("ForEach should have visited 1 element")
} }
foreachErr := func(c *Cid) error { foreachErr := func(c Cid) error {
return errors.New("test") return errors.New("test")
} }