Open research dir; want to explore cid impl perf.
It's been discussed in several issues and PRs already that we might want to explore various ways of implementing CIDs for maximum performance and ease-of-use because they show up extremely often. Current CIDs are pointers, which generally speaking means you can't get one without a malloc; and also, they're not particularly well-suited for use in map keys. This branch is to attempt to consolidate all the proposals so far -- and do so in a single branch which can be checked out and contains all the proposals at once, because this will make it easy to do benchmarks and compare all of the various ways we could implement this in one place (and also easier for humans to track what the latest of each proposal is, since they're all in one place). To start with: a Cid implementation backed by a string; and matching interface. (I'm also taking this opportunity to be as minimalistic as possible in what I port over into these experimental new Cid implementations. This might not last; but as long as all this work is to be done, it's a more convenient time than usual to see what can be stripped down and still get work done.) More to come.
This commit is contained in:
46
_rsrch/cidiface/cid.go
Normal file
46
_rsrch/cidiface/cid.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package cid
|
||||
|
||||
import (
|
||||
mh "github.com/multiformats/go-multihash"
|
||||
)
|
||||
|
||||
// Cid represents a self-describing content adressed identifier.
|
||||
//
|
||||
// A CID is composed of:
|
||||
//
|
||||
// - a Version of the CID itself,
|
||||
// - a Multicodec (indicates the encoding of the referenced content),
|
||||
// - and a Multihash (which identifies the referenced content).
|
||||
//
|
||||
// (Note that the Multihash further contains its own version and hash type
|
||||
// indicators.)
|
||||
type Cid interface {
|
||||
// n.b. 'yields' means "without copy", 'produces' means a malloc.
|
||||
|
||||
Version() uint64 // Yields the version prefix as a uint.
|
||||
Multicodec() uint64 // Yields the multicodec as a uint.
|
||||
Multihash() mh.Multihash // Yields the multihash segment.
|
||||
|
||||
String() string // Produces the CID formatted as b58 string.
|
||||
|
||||
Prefix() Prefix // Produces a tuple of non-content metadata.
|
||||
|
||||
// some change notes:
|
||||
// - `KeyString() CidString` is gone because we're natively a map key now, you're welcome.
|
||||
// - `StringOfBase(mbase.Encoding) (string, error)` is skipped, maybe it can come back but maybe it should be a formatter's job.
|
||||
// - `Equals(o Cid) bool` is gone because it's now `==`, you're welcome.
|
||||
// - `Bytes() []byte` is gone because I can't imagine where that should be used except again where a formatter should be involved.
|
||||
}
|
||||
|
||||
// Prefix represents all the metadata of a Cid,
|
||||
// that is, the Version, the Codec, the Multihash type
|
||||
// and the Multihash length. It does not contains
|
||||
// any actual content information.
|
||||
// NOTE: The use -1 in MhLength to mean default length is deprecated,
|
||||
// use the V0Builder or V1Builder structures instead
|
||||
type Prefix struct {
|
||||
Version uint64
|
||||
Codec uint64
|
||||
MhType uint64
|
||||
MhLength int
|
||||
}
|
||||
69
_rsrch/cidiface/cidString.go
Normal file
69
_rsrch/cidiface/cidString.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package cid
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
|
||||
mbase "github.com/multiformats/go-multibase"
|
||||
mh "github.com/multiformats/go-multihash"
|
||||
)
|
||||
|
||||
var _ Cid = CidStr("")
|
||||
|
||||
// CidStr is a representation of a Cid as a string type containing binary.
|
||||
//
|
||||
// Using golang's string type is preferable over byte slices even for binary
|
||||
// data because golang strings are immutable, usable as map keys,
|
||||
// trivially comparable with built-in equals operators, etc.
|
||||
type CidStr string
|
||||
|
||||
// EmptyCid is a constant for a zero/uninitialized/sentinelvalue cid;
|
||||
// it is declared mainly for readability in checks for sentinel values.
|
||||
const EmptyCid = CidStr("")
|
||||
|
||||
func (c CidStr) Version() uint64 {
|
||||
bytes := []byte(c)
|
||||
v, _ := binary.Uvarint(bytes)
|
||||
return v
|
||||
}
|
||||
|
||||
func (c CidStr) Multicodec() uint64 {
|
||||
bytes := []byte(c)
|
||||
_, n := binary.Uvarint(bytes) // skip version length
|
||||
codec, _ := binary.Uvarint(bytes[n:])
|
||||
return codec
|
||||
}
|
||||
|
||||
func (c CidStr) Multihash() mh.Multihash {
|
||||
bytes := []byte(c)
|
||||
_, n1 := binary.Uvarint(bytes) // skip version length
|
||||
_, n2 := binary.Uvarint(bytes[n1:]) // skip codec length
|
||||
return mh.Multihash(bytes[n1+n2:]) // return slice of remainder
|
||||
}
|
||||
|
||||
// String returns the default string representation of a Cid.
|
||||
// Currently, Base58 is used as the encoding for the multibase string.
|
||||
func (c CidStr) String() string {
|
||||
switch c.Version() {
|
||||
case 0:
|
||||
return c.Multihash().B58String()
|
||||
case 1:
|
||||
mbstr, err := mbase.Encode(mbase.Base58BTC, []byte(c))
|
||||
if err != nil {
|
||||
panic("should not error with hardcoded mbase: " + err.Error())
|
||||
}
|
||||
return mbstr
|
||||
default:
|
||||
panic("not possible to reach this point")
|
||||
}
|
||||
}
|
||||
|
||||
// Prefix builds and returns a Prefix out of a Cid.
|
||||
func (c CidStr) Prefix() Prefix {
|
||||
dec, _ := mh.Decode(c.Multihash()) // assuming we got a valid multiaddr, this will not error
|
||||
return Prefix{
|
||||
MhType: dec.Code,
|
||||
MhLength: dec.Length,
|
||||
Version: c.Version(),
|
||||
Codec: c.Multicodec(),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user