Open research dir; want to explore cid impl perf.

It's been discussed in several issues and PRs already that we might want to
explore various ways of implementing CIDs for maximum performance and
ease-of-use because they show up extremely often.  Current CIDs are pointers,
which generally speaking means you can't get one without a malloc; and also,
they're not particularly well-suited for use in map keys.

This branch is to attempt to consolidate all the proposals so far -- and do so
in a single branch which can be checked out and contains all the proposals at
once, because this will make it easy to do benchmarks and compare all of the
various ways we could implement this in one place (and also easier for humans
to track what the latest of each proposal is, since they're all in one place).

To start with: a Cid implementation backed by a string; and matching interface.

(I'm also taking this opportunity to be as minimalistic as possible in what
I port over into these experimental new Cid implementations.  This might not
last; but as long as all this work is to be done, it's a more convenient time
than usual to see what can be stripped down and still get work done.)

More to come.
This commit is contained in:
Eric Myhre
2018-08-24 10:53:52 +02:00
parent afcde25c66
commit ff25e9673c
2 changed files with 115 additions and 0 deletions

46
_rsrch/cidiface/cid.go Normal file
View File

@@ -0,0 +1,46 @@
package cid
import (
mh "github.com/multiformats/go-multihash"
)
// Cid represents a self-describing content adressed identifier.
//
// A CID is composed of:
//
// - a Version of the CID itself,
// - a Multicodec (indicates the encoding of the referenced content),
// - and a Multihash (which identifies the referenced content).
//
// (Note that the Multihash further contains its own version and hash type
// indicators.)
type Cid interface {
// n.b. 'yields' means "without copy", 'produces' means a malloc.
Version() uint64 // Yields the version prefix as a uint.
Multicodec() uint64 // Yields the multicodec as a uint.
Multihash() mh.Multihash // Yields the multihash segment.
String() string // Produces the CID formatted as b58 string.
Prefix() Prefix // Produces a tuple of non-content metadata.
// some change notes:
// - `KeyString() CidString` is gone because we're natively a map key now, you're welcome.
// - `StringOfBase(mbase.Encoding) (string, error)` is skipped, maybe it can come back but maybe it should be a formatter's job.
// - `Equals(o Cid) bool` is gone because it's now `==`, you're welcome.
// - `Bytes() []byte` is gone because I can't imagine where that should be used except again where a formatter should be involved.
}
// Prefix represents all the metadata of a Cid,
// that is, the Version, the Codec, the Multihash type
// and the Multihash length. It does not contains
// any actual content information.
// NOTE: The use -1 in MhLength to mean default length is deprecated,
// use the V0Builder or V1Builder structures instead
type Prefix struct {
Version uint64
Codec uint64
MhType uint64
MhLength int
}

View File

@@ -0,0 +1,69 @@
package cid
import (
"encoding/binary"
mbase "github.com/multiformats/go-multibase"
mh "github.com/multiformats/go-multihash"
)
var _ Cid = CidStr("")
// CidStr is a representation of a Cid as a string type containing binary.
//
// Using golang's string type is preferable over byte slices even for binary
// data because golang strings are immutable, usable as map keys,
// trivially comparable with built-in equals operators, etc.
type CidStr string
// EmptyCid is a constant for a zero/uninitialized/sentinelvalue cid;
// it is declared mainly for readability in checks for sentinel values.
const EmptyCid = CidStr("")
func (c CidStr) Version() uint64 {
bytes := []byte(c)
v, _ := binary.Uvarint(bytes)
return v
}
func (c CidStr) Multicodec() uint64 {
bytes := []byte(c)
_, n := binary.Uvarint(bytes) // skip version length
codec, _ := binary.Uvarint(bytes[n:])
return codec
}
func (c CidStr) Multihash() mh.Multihash {
bytes := []byte(c)
_, n1 := binary.Uvarint(bytes) // skip version length
_, n2 := binary.Uvarint(bytes[n1:]) // skip codec length
return mh.Multihash(bytes[n1+n2:]) // return slice of remainder
}
// String returns the default string representation of a Cid.
// Currently, Base58 is used as the encoding for the multibase string.
func (c CidStr) String() string {
switch c.Version() {
case 0:
return c.Multihash().B58String()
case 1:
mbstr, err := mbase.Encode(mbase.Base58BTC, []byte(c))
if err != nil {
panic("should not error with hardcoded mbase: " + err.Error())
}
return mbstr
default:
panic("not possible to reach this point")
}
}
// Prefix builds and returns a Prefix out of a Cid.
func (c CidStr) Prefix() Prefix {
dec, _ := mh.Decode(c.Multihash()) // assuming we got a valid multiaddr, this will not error
return Prefix{
MhType: dec.Code,
MhLength: dec.Length,
Version: c.Version(),
Codec: c.Multicodec(),
}
}