diff --git a/_rsrch/cidiface/cid.go b/_rsrch/cidiface/cid.go index 8a888e1..cb4b871 100644 --- a/_rsrch/cidiface/cid.go +++ b/_rsrch/cidiface/cid.go @@ -22,6 +22,7 @@ type Cid interface { Multihash() mh.Multihash // Yields the multihash segment. String() string // Produces the CID formatted as b58 string. + Bytes() []byte // Produces the CID formatted as raw binary. Prefix() Prefix // Produces a tuple of non-content metadata. @@ -29,7 +30,8 @@ type Cid interface { // - `KeyString() CidString` is gone because we're natively a map key now, you're welcome. // - `StringOfBase(mbase.Encoding) (string, error)` is skipped, maybe it can come back but maybe it should be a formatter's job. // - `Equals(o Cid) bool` is gone because it's now `==`, you're welcome. - // - `Bytes() []byte` is gone because I can't imagine where that should be used except again where a formatter should be involved. + + // TODO: make a multi-return method for {v,mc,mh} decomposition. CidStr will be able to implement this more efficiently than if one makes a series of the individual getter calls. } // Prefix represents all the metadata of a Cid, diff --git a/_rsrch/cidiface/cidString.go b/_rsrch/cidiface/cidString.go index c0deb9b..af2014f 100644 --- a/_rsrch/cidiface/cidString.go +++ b/_rsrch/cidiface/cidString.go @@ -2,23 +2,32 @@ package cid import ( "encoding/binary" + "fmt" mbase "github.com/multiformats/go-multibase" mh "github.com/multiformats/go-multihash" ) +//================= +// def & accessors +//================= + var _ Cid = CidStr("") +var _ map[CidStr]struct{} = nil // CidStr is a representation of a Cid as a string type containing binary. // // Using golang's string type is preferable over byte slices even for binary // data because golang strings are immutable, usable as map keys, // trivially comparable with built-in equals operators, etc. +// +// Please do not cast strings or bytes into the CidStr type directly; +// use a parse method which validates the data and yields a CidStr. type CidStr string -// EmptyCid is a constant for a zero/uninitialized/sentinelvalue cid; +// EmptyCidStr is a constant for a zero/uninitialized/sentinelvalue cid; // it is declared mainly for readability in checks for sentinel values. -const EmptyCid = CidStr("") +const EmptyCidStr = CidStr("") func (c CidStr) Version() uint64 { bytes := []byte(c) @@ -57,6 +66,21 @@ func (c CidStr) String() string { } } +// Bytes produces a raw binary format of the CID. +// +// (For CidStr, this method is only distinct from casting because of +// compatibility with v0 CIDs.) +func (c CidStr) Bytes() []byte { + switch c.Version() { + case 0: + return c.Multihash() + case 1: + return []byte(c) + default: + panic("not possible to reach this point") + } +} + // Prefix builds and returns a Prefix out of a Cid. func (c CidStr) Prefix() Prefix { dec, _ := mh.Decode(c.Multihash()) // assuming we got a valid multiaddr, this will not error @@ -67,3 +91,71 @@ func (c CidStr) Prefix() Prefix { Codec: c.Multicodec(), } } + +//================================== +// parsers & validators & factories +//================================== + +func newCidStr(version uint64, codecType uint64, mhash mh.Multihash) CidStr { + hashlen := len(mhash) + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 2*binary.MaxVarintLen64+hashlen) + n := binary.PutUvarint(buf, version) + n += binary.PutUvarint(buf[n:], codecType) + cn := copy(buf[n:], mhash) + if cn != hashlen { + panic("copy hash length is inconsistent") + } + return CidStr(buf[:n+hashlen]) +} + +// CidStrParse takes a binary byte slice, parses it, and returns either +// a valid CidStr, or the zero CidStr and an error. +// +// For CidV1, the data buffer is in the form: +// +// +// +// CidV0 are also supported. In particular, data buffers starting +// with length 34 bytes, which starts with bytes [18,32...] are considered +// binary multihashes. +// +// The multicodec bytes are not parsed to verify they're a valid varint; +// no further reification is performed. +// +// Multibase encoding should already have been unwrapped before parsing; +// if you have a multibase-enveloped string, use CidStrDecode instead. +// +// CidStrParse is the inverse of Cid.Bytes(). +func CidStrParse(data []byte) (CidStr, error) { + if len(data) == 34 && data[0] == 18 && data[1] == 32 { + h, err := mh.Cast(data) + if err != nil { + return EmptyCidStr, err + } + return newCidStr(0, DagProtobuf, h), nil + } + + vers, n := binary.Uvarint(data) + if err := uvError(n); err != nil { + return EmptyCidStr, err + } + + if vers != 0 && vers != 1 { + return EmptyCidStr, fmt.Errorf("invalid cid version number: %d", vers) + } + + _, cn := binary.Uvarint(data[n:]) + if err := uvError(cn); err != nil { + return EmptyCidStr, err + } + + rest := data[n+cn:] + h, err := mh.Cast(rest) + if err != nil { + return EmptyCidStr, err + } + + // REVIEW: if the data is longer than the mh.len expects, we silently ignore it? should we? + return CidStr(data[0 : n+cn+len(h)]), nil +} diff --git a/_rsrch/cidiface/cidStruct.go b/_rsrch/cidiface/cidStruct.go new file mode 100644 index 0000000..dcd154e --- /dev/null +++ b/_rsrch/cidiface/cidStruct.go @@ -0,0 +1,162 @@ +package cid + +import ( + "encoding/binary" + "fmt" + + mbase "github.com/multiformats/go-multibase" + mh "github.com/multiformats/go-multihash" +) + +//================= +// def & accessors +//================= + +var _ Cid = CidStruct{} + +//var _ map[CidStruct]struct{} = nil // Will not compile! See struct def docs. + +// CidStruct represents a CID in a struct format. +// +// This format complies with the exact same Cid interface as the CidStr +// implementation, but completely pre-parses the Cid metadata. +// CidStruct is a tad quicker in case of repeatedly accessed fields, +// but requires more reshuffling to parse and to serialize. +// CidStruct is not usable as a map key, because it contains a Multihash +// reference, which is a slice, and thus not "comparable" as a primitive. +// +// Beware of zero-valued CidStruct: it is difficult to distinguish an +// incorrectly-initialized "invalid" CidStruct from one representing a v0 cid. +type CidStruct struct { + version uint64 + codec uint64 + hash mh.Multihash +} + +// EmptyCidStruct is a constant for a zero/uninitialized/sentinelvalue cid; +// it is declared mainly for readability in checks for sentinel values. +// +// Note: it's not actually a const; the compiler does not allow const structs. +var EmptyCidStruct = CidStruct{} + +func (c CidStruct) Version() uint64 { + return c.version +} + +func (c CidStruct) Multicodec() uint64 { + return c.codec +} + +func (c CidStruct) Multihash() mh.Multihash { + return c.hash +} + +// String returns the default string representation of a Cid. +// Currently, Base58 is used as the encoding for the multibase string. +func (c CidStruct) String() string { + switch c.Version() { + case 0: + return c.Multihash().B58String() + case 1: + mbstr, err := mbase.Encode(mbase.Base58BTC, c.Bytes()) + if err != nil { + panic("should not error with hardcoded mbase: " + err.Error()) + } + return mbstr + default: + panic("not possible to reach this point") + } +} + +// Bytes produces a raw binary format of the CID. +func (c CidStruct) Bytes() []byte { + switch c.version { + case 0: + return []byte(c.hash) + case 1: + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash)) + n := binary.PutUvarint(buf, c.version) + n += binary.PutUvarint(buf[n:], c.codec) + cn := copy(buf[n:], c.hash) + if cn != len(c.hash) { + panic("copy hash length is inconsistent") + } + return buf[:n+len(c.hash)] + default: + panic("not possible to reach this point") + } +} + +// Prefix builds and returns a Prefix out of a Cid. +func (c CidStruct) Prefix() Prefix { + dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error + return Prefix{ + MhType: dec.Code, + MhLength: dec.Length, + Version: c.version, + Codec: c.codec, + } +} + +//================================== +// parsers & validators & factories +//================================== + +// CidStructParse takes a binary byte slice, parses it, and returns either +// a valid CidStruct, or the zero CidStruct and an error. +// +// For CidV1, the data buffer is in the form: +// +// +// +// CidV0 are also supported. In particular, data buffers starting +// with length 34 bytes, which starts with bytes [18,32...] are considered +// binary multihashes. +// +// The multicodec bytes are not parsed to verify they're a valid varint; +// no further reification is performed. +// +// Multibase encoding should already have been unwrapped before parsing; +// if you have a multibase-enveloped string, use CidStructDecode instead. +// +// CidStructParse is the inverse of Cid.Bytes(). +func CidStructParse(data []byte) (CidStruct, error) { + if len(data) == 34 && data[0] == 18 && data[1] == 32 { + h, err := mh.Cast(data) + if err != nil { + return EmptyCidStruct, err + } + return CidStruct{ + codec: DagProtobuf, + version: 0, + hash: h, + }, nil + } + + vers, n := binary.Uvarint(data) + if err := uvError(n); err != nil { + return EmptyCidStruct, err + } + + if vers != 0 && vers != 1 { + return EmptyCidStruct, fmt.Errorf("invalid cid version number: %d", vers) + } + + codec, cn := binary.Uvarint(data[n:]) + if err := uvError(cn); err != nil { + return EmptyCidStruct, err + } + + rest := data[n+cn:] + h, err := mh.Cast(rest) + if err != nil { + return EmptyCidStruct, err + } + + return CidStruct{ + version: vers, + codec: codec, + hash: h, + }, nil +} diff --git a/_rsrch/cidiface/enums.go b/_rsrch/cidiface/enums.go new file mode 100644 index 0000000..53e3d47 --- /dev/null +++ b/_rsrch/cidiface/enums.go @@ -0,0 +1,76 @@ +package cid + +// These are multicodec-packed content types. The should match +// the codes described in the authoritative document: +// https://github.com/multiformats/multicodec/blob/master/table.csv +const ( + Raw = 0x55 + + DagProtobuf = 0x70 + DagCBOR = 0x71 + + GitRaw = 0x78 + + EthBlock = 0x90 + EthBlockList = 0x91 + EthTxTrie = 0x92 + EthTx = 0x93 + EthTxReceiptTrie = 0x94 + EthTxReceipt = 0x95 + EthStateTrie = 0x96 + EthAccountSnapshot = 0x97 + EthStorageTrie = 0x98 + BitcoinBlock = 0xb0 + BitcoinTx = 0xb1 + ZcashBlock = 0xc0 + ZcashTx = 0xc1 + DecredBlock = 0xe0 + DecredTx = 0xe1 +) + +// Codecs maps the name of a codec to its type +var Codecs = map[string]uint64{ + "v0": DagProtobuf, + "raw": Raw, + "protobuf": DagProtobuf, + "cbor": DagCBOR, + "git-raw": GitRaw, + "eth-block": EthBlock, + "eth-block-list": EthBlockList, + "eth-tx-trie": EthTxTrie, + "eth-tx": EthTx, + "eth-tx-receipt-trie": EthTxReceiptTrie, + "eth-tx-receipt": EthTxReceipt, + "eth-state-trie": EthStateTrie, + "eth-account-snapshot": EthAccountSnapshot, + "eth-storage-trie": EthStorageTrie, + "bitcoin-block": BitcoinBlock, + "bitcoin-tx": BitcoinTx, + "zcash-block": ZcashBlock, + "zcash-tx": ZcashTx, + "decred-block": DecredBlock, + "decred-tx": DecredTx, +} + +// CodecToStr maps the numeric codec to its name +var CodecToStr = map[uint64]string{ + Raw: "raw", + DagProtobuf: "protobuf", + DagCBOR: "cbor", + GitRaw: "git-raw", + EthBlock: "eth-block", + EthBlockList: "eth-block-list", + EthTxTrie: "eth-tx-trie", + EthTx: "eth-tx", + EthTxReceiptTrie: "eth-tx-receipt-trie", + EthTxReceipt: "eth-tx-receipt", + EthStateTrie: "eth-state-trie", + EthAccountSnapshot: "eth-account-snapshot", + EthStorageTrie: "eth-storage-trie", + BitcoinBlock: "bitcoin-block", + BitcoinTx: "bitcoin-tx", + ZcashBlock: "zcash-block", + ZcashTx: "zcash-tx", + DecredBlock: "decred-block", + DecredTx: "decred-tx", +} diff --git a/_rsrch/cidiface/errors.go b/_rsrch/cidiface/errors.go new file mode 100644 index 0000000..588c62e --- /dev/null +++ b/_rsrch/cidiface/errors.go @@ -0,0 +1,24 @@ +package cid + +import ( + "errors" +) + +var ( + // ErrVarintBuffSmall means that a buffer passed to the cid parser was not + // long enough, or did not contain an invalid cid + ErrVarintBuffSmall = errors.New("reading varint: buffer too small") + + // ErrVarintTooBig means that the varint in the given cid was above the + // limit of 2^64 + ErrVarintTooBig = errors.New("reading varint: varint bigger than 64bits" + + " and not supported") + + // ErrCidTooShort means that the cid passed to decode was not long + // enough to be a valid Cid + ErrCidTooShort = errors.New("cid too short") + + // ErrInvalidEncoding means that selected encoding is not supported + // by this Cid version + ErrInvalidEncoding = errors.New("invalid base encoding") +) diff --git a/_rsrch/cidiface/misc.go b/_rsrch/cidiface/misc.go new file mode 100644 index 0000000..9a4486a --- /dev/null +++ b/_rsrch/cidiface/misc.go @@ -0,0 +1,12 @@ +package cid + +func uvError(read int) error { + switch { + case read == 0: + return ErrVarintBuffSmall + case read < 0: + return ErrVarintTooBig + default: + return nil + } +}