Files
cid/cid.go

530 lines
13 KiB
Go
Raw Normal View History

// Package cid implements the Content-IDentifiers specification
// (https://github.com/ipld/cid) in Go. CIDs are
// self-describing content-addressed identifiers useful for
// distributed information systems. CIDs are used in the IPFS
// (https://ipfs.io) project ecosystem.
//
// CIDs have two major versions. A CIDv0 corresponds to a multihash of type
// DagProtobuf, is deprecated and exists for compatibility reasons. Usually,
// CIDv1 should be used.
//
// A CIDv1 has four parts:
//
// <cidv1> ::= <multibase-prefix><cid-version><multicodec-packed-content-type><multihash-content-address>
//
// As shown above, the CID implementation relies heavily on Multiformats,
// particularly Multibase
// (https://github.com/multiformats/go-multibase), Multicodec
// (https://github.com/multiformats/multicodec) and Multihash
// implementations (https://github.com/multiformats/go-multihash).
2016-08-26 17:56:12 -07:00
package cid
import (
"bytes"
2016-08-26 17:56:12 -07:00
"encoding/binary"
2017-02-05 23:52:06 -08:00
"encoding/json"
2016-11-17 18:23:54 +01:00
"errors"
2016-08-30 10:04:50 -07:00
"fmt"
2016-11-17 14:40:43 +01:00
"strings"
2016-08-30 10:04:50 -07:00
2016-08-26 17:56:12 -07:00
mbase "github.com/multiformats/go-multibase"
2016-10-05 12:08:42 -07:00
mh "github.com/multiformats/go-multihash"
2016-08-26 17:56:12 -07:00
)
// UnsupportedVersionString just holds an error message
2016-08-30 10:04:50 -07:00
const UnsupportedVersionString = "<unsupported cid version>"
var (
// ErrVarintBuffSmall means that a buffer passed to the cid parser was not
// long enough, or did not contain an invalid cid
ErrVarintBuffSmall = errors.New("reading varint: buffer too small")
// ErrVarintTooBig means that the varint in the given cid was above the
// limit of 2^64
ErrVarintTooBig = errors.New("reading varint: varint bigger than 64bits" +
" and not supported")
// ErrCidTooShort means that the cid passed to decode was not long
// enough to be a valid Cid
ErrCidTooShort = errors.New("cid too short")
2017-06-19 16:37:07 +02:00
// ErrInvalidEncoding means that selected encoding is not supported
// by this Cid version
ErrInvalidEncoding = errors.New("invalid base encoding")
)
// These are multicodec-packed content types. The should match
// the codes described in the authoritative document:
// https://github.com/multiformats/multicodec/blob/master/table.csv
const (
2016-11-19 16:34:08 -08:00
Raw = 0x55
DagProtobuf = 0x70
DagCBOR = 0x71
2016-10-19 17:53:39 -07:00
2017-06-07 20:08:56 +02:00
GitRaw = 0x78
2017-03-20 18:00:06 -07:00
EthBlock = 0x90
EthBlockList = 0x91
EthTxTrie = 0x92
EthTx = 0x93
EthTxReceiptTrie = 0x94
EthTxReceipt = 0x95
EthStateTrie = 0x96
EthAccountSnapshot = 0x97
EthStorageTrie = 0x98
BitcoinBlock = 0xb0
BitcoinTx = 0xb1
ZcashBlock = 0xc0
ZcashTx = 0xc1
DecredBlock = 0xe0
DecredTx = 0xe1
)
2017-06-29 21:00:23 -04:00
// Codecs maps the name of a codec to its type
2017-06-30 10:00:21 -04:00
var Codecs = map[string]uint64{
2017-06-29 21:00:23 -04:00
"v0": DagProtobuf,
"raw": Raw,
"protobuf": DagProtobuf,
"cbor": DagCBOR,
"git-raw": GitRaw,
"eth-block": EthBlock,
"eth-block-list": EthBlockList,
"eth-tx-trie": EthTxTrie,
"eth-tx": EthTx,
"eth-tx-receipt-trie": EthTxReceiptTrie,
"eth-tx-receipt": EthTxReceipt,
"eth-state-trie": EthStateTrie,
"eth-account-snapshot": EthAccountSnapshot,
"eth-storage-trie": EthStorageTrie,
"bitcoin-block": BitcoinBlock,
"bitcoin-tx": BitcoinTx,
"zcash-block": ZcashBlock,
"zcash-tx": ZcashTx,
"decred-block": DecredBlock,
"decred-tx": DecredTx,
2017-06-29 21:00:23 -04:00
}
// CodecToStr maps the numeric codec to its name
var CodecToStr = map[uint64]string{
Raw: "raw",
DagProtobuf: "protobuf",
DagCBOR: "cbor",
GitRaw: "git-raw",
EthBlock: "eth-block",
EthBlockList: "eth-block-list",
EthTxTrie: "eth-tx-trie",
EthTx: "eth-tx",
EthTxReceiptTrie: "eth-tx-receipt-trie",
EthTxReceipt: "eth-tx-receipt",
EthStateTrie: "eth-state-trie",
EthAccountSnapshot: "eth-account-snapshot",
EthStorageTrie: "eth-storage-trie",
BitcoinBlock: "bitcoin-block",
BitcoinTx: "bitcoin-tx",
ZcashBlock: "zcash-block",
ZcashTx: "zcash-tx",
DecredBlock: "decred-block",
DecredTx: "decred-tx",
}
// NewCidV0 returns a Cid-wrapped multihash.
// They exist to allow IPFS to work with Cids while keeping
// compatibility with the plain-multihash format used used in IPFS.
// NewCidV1 should be used preferentially.
func NewCidV0(mhash mh.Multihash) Cid {
return Cid{string(mhash)}
}
// NewCidV1 returns a new Cid using the given multicodec-packed
// content type.
func NewCidV1(codecType uint64, mhash mh.Multihash) Cid {
hashlen := len(mhash)
// two 8 bytes (max) numbers plus hash
buf := make([]byte, 2*binary.MaxVarintLen64+hashlen)
n := binary.PutUvarint(buf, 1)
n += binary.PutUvarint(buf[n:], codecType)
cn := copy(buf[n:], mhash)
if cn != hashlen {
panic("copy hash length is inconsistent")
}
return Cid{string(buf[:n+hashlen])}
}
// Cid represents a self-describing content adressed
// identifier. It is formed by a Version, a Codec (which indicates
// a multicodec-packed content type) and a Multihash.
// Byte layout: [version, codec, multihash]
// - version uvarint
// - codec uvarint
// - hash mh.Multihash
type Cid struct{ string }
var Nil = Cid{}
2016-08-26 17:56:12 -07:00
// Parse is a short-hand function to perform Decode, Cast etc... on
// a generic interface{} type.
func Parse(v interface{}) (Cid, error) {
2016-11-17 14:40:43 +01:00
switch v2 := v.(type) {
case string:
if strings.Contains(v2, "/ipfs/") {
return Decode(strings.Split(v2, "/ipfs/")[1])
}
return Decode(v2)
case []byte:
return Cast(v2)
case mh.Multihash:
return NewCidV0(v2), nil
case Cid:
2016-11-17 14:40:43 +01:00
return v2, nil
default:
2018-08-25 03:05:17 -04:00
return Nil, fmt.Errorf("can't parse %+v as Cid", v2)
2016-11-17 14:40:43 +01:00
}
}
// Decode parses a Cid-encoded string and returns a Cid object.
// For CidV1, a Cid-encoded string is primarily a multibase string:
//
// <multibase-type-code><base-encoded-string>
//
// The base-encoded string represents a:
//
// <version><codec-type><multihash>
//
// Decode will also detect and parse CidV0 strings. Strings
// starting with "Qm" are considered CidV0 and treated directly
// as B58-encoded multihashes.
func Decode(v string) (Cid, error) {
2016-10-24 17:30:53 -07:00
if len(v) < 2 {
2018-08-25 03:05:17 -04:00
return Nil, ErrCidTooShort
2016-10-24 17:30:53 -07:00
}
2016-08-30 10:04:50 -07:00
if len(v) == 46 && v[:2] == "Qm" {
hash, err := mh.FromB58String(v)
if err != nil {
2018-08-25 03:05:17 -04:00
return Nil, err
2016-08-30 10:04:50 -07:00
}
return NewCidV0(hash), nil
2016-08-30 10:04:50 -07:00
}
2016-08-26 17:56:12 -07:00
_, data, err := mbase.Decode(v)
if err != nil {
2018-08-25 03:05:17 -04:00
return Nil, err
2016-08-26 17:56:12 -07:00
}
return Cast(data)
}
2018-08-09 02:37:09 -04:00
// Extract the encoding from a Cid. If Decode on the same string did
// not return an error neither will this function.
func ExtractEncoding(v string) (mbase.Encoding, error) {
if len(v) < 2 {
return -1, ErrCidTooShort
2016-08-26 17:56:12 -07:00
}
2018-08-09 02:37:09 -04:00
if len(v) == 46 && v[:2] == "Qm" {
return mbase.Base58BTC, nil
}
encoding := mbase.Encoding(v[0])
// check encoding is valid
_, err := mbase.NewEncoder(encoding)
if err != nil {
return -1, err
}
2018-08-09 02:37:09 -04:00
return encoding, nil
2016-08-26 17:56:12 -07:00
}
2016-11-17 18:23:54 +01:00
func uvError(read int) error {
switch {
case read == 0:
return ErrVarintBuffSmall
case read < 0:
return ErrVarintTooBig
default:
return nil
}
}
// Cast takes a Cid data slice, parses it and returns a Cid.
// For CidV1, the data buffer is in the form:
//
// <version><codec-type><multihash>
//
// CidV0 are also supported. In particular, data buffers starting
// with length 34 bytes, which starts with bytes [18,32...] are considered
// binary multihashes.
//
// Please use decode when parsing a regular Cid string, as Cast does not
// expect multibase-encoded data. Cast accepts the output of Cid.Bytes().
func Cast(data []byte) (Cid, error) {
if len(data) == 34 && data[0] == 18 && data[1] == 32 {
h, err := mh.Cast(data)
if err != nil {
2018-08-25 03:05:17 -04:00
return Nil, err
}
return NewCidV0(h), nil
}
2016-08-26 17:56:12 -07:00
vers, n := binary.Uvarint(data)
2016-11-17 18:23:54 +01:00
if err := uvError(n); err != nil {
2018-08-25 03:05:17 -04:00
return Nil, err
2016-11-17 18:23:54 +01:00
}
if vers != 1 {
2018-08-25 03:05:17 -04:00
return Nil, fmt.Errorf("expected 1 as the cid version number, got: %d", vers)
}
_, cn := binary.Uvarint(data[n:])
2016-11-17 18:23:54 +01:00
if err := uvError(cn); err != nil {
2018-08-25 03:05:17 -04:00
return Nil, err
2016-11-17 18:23:54 +01:00
}
2016-08-26 17:56:12 -07:00
rest := data[n+cn:]
h, err := mh.Cast(rest)
if err != nil {
2018-08-25 03:05:17 -04:00
return Nil, err
2016-08-26 17:56:12 -07:00
}
return Cid{string(data[0 : n+cn+len(h)])}, nil
2016-08-26 17:56:12 -07:00
}
// Version returns the Cid version.
func (c Cid) Version() uint64 {
if len(c.string) == 34 && c.string[0] == 18 && c.string[1] == 32 {
return 0
}
return 1
}
// Type returns the multicodec-packed content type of a Cid.
func (c Cid) Type() uint64 {
if c.Version() == 0 {
return DagProtobuf
}
bytes := c.Bytes()
_, n := binary.Uvarint(bytes)
codec, _ := binary.Uvarint(bytes[n:])
return codec
}
// String returns the default string representation of a
// Cid. Currently, Base58 is used as the encoding for the
// multibase string.
func (c Cid) String() string {
switch c.Version() {
2016-08-30 10:04:50 -07:00
case 0:
return c.Hash().B58String()
2016-08-30 10:04:50 -07:00
case 1:
mbstr, err := mbase.Encode(mbase.Base58BTC, c.Bytes())
2016-08-30 10:04:50 -07:00
if err != nil {
panic("should not error with hardcoded mbase: " + err.Error())
}
return mbstr
default:
panic("not possible to reach this point")
2016-08-30 10:04:50 -07:00
}
}
2017-06-19 16:37:07 +02:00
// String returns the string representation of a Cid
// encoded is selected base
func (c Cid) StringOfBase(base mbase.Encoding) (string, error) {
switch c.Version() {
2017-06-19 16:37:07 +02:00
case 0:
if base != mbase.Base58BTC {
return "", ErrInvalidEncoding
}
return c.Hash().B58String(), nil
2017-06-19 16:37:07 +02:00
case 1:
return mbase.Encode(base, c.Bytes())
2017-06-19 16:37:07 +02:00
default:
panic("not possible to reach this point")
}
}
2018-08-01 15:34:58 -04:00
// Encode return the string representation of a Cid in a given base
// when applicable. Version 0 Cid's are always in Base58 as they do
// not take a multibase prefix.
func (c Cid) Encode(base mbase.Encoder) string {
switch c.Version() {
case 0:
return c.Hash().B58String()
case 1:
return base.Encode(c.Bytes())
default:
panic("not possible to reach this point")
}
}
// Hash returns the multihash contained by a Cid.
func (c Cid) Hash() mh.Multihash {
bytes := c.Bytes()
if c.Version() == 0 {
return mh.Multihash(bytes)
}
// skip version length
_, n1 := binary.Uvarint(bytes)
// skip codec length
_, n2 := binary.Uvarint(bytes[n1:])
return mh.Multihash(bytes[n1+n2:])
}
// Bytes returns the byte representation of a Cid.
// The output of bytes can be parsed back into a Cid
// with Cast().
func (c Cid) Bytes() []byte {
return []byte(c.string)
}
2016-08-26 17:56:12 -07:00
// Equals checks that two Cids are the same.
// In order for two Cids to be considered equal, the
// Version, the Codec and the Multihash must match.
func (c Cid) Equals(o Cid) bool {
return c == o
2016-08-26 17:56:12 -07:00
}
// UnmarshalJSON parses the JSON representation of a Cid.
func (c *Cid) UnmarshalJSON(b []byte) error {
if len(b) < 2 {
return fmt.Errorf("invalid cid json blob")
}
2017-02-05 23:52:06 -08:00
obj := struct {
CidTarget string `json:"/"`
}{}
err := json.Unmarshal(b, &obj)
if err != nil {
return err
}
if obj.CidTarget == "" {
return fmt.Errorf("cid was incorrectly formatted")
}
out, err := Decode(obj.CidTarget)
if err != nil {
return err
}
*c = Cid{out.string[:]}
return nil
}
// MarshalJSON procudes a JSON representation of a Cid, which looks as follows:
//
// { "/": "<cid-string>" }
//
// Note that this formatting comes from the IPLD specification
// (https://github.com/ipld/specs/tree/master/ipld)
2018-07-11 11:20:53 -10:00
func (c Cid) MarshalJSON() ([]byte, error) {
2017-02-05 23:52:06 -08:00
return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil
}
2016-09-27 03:43:09 -07:00
// KeyString returns the binary representation of the Cid as a string
func (c Cid) KeyString() string {
return c.string
2016-09-27 03:43:09 -07:00
}
2016-09-27 06:14:46 -07:00
// Loggable returns a Loggable (as defined by
// https://godoc.org/github.com/ipfs/go-log).
func (c Cid) Loggable() map[string]interface{} {
2016-09-27 06:14:46 -07:00
return map[string]interface{}{
"cid": c,
}
}
// Prefix builds and returns a Prefix out of a Cid.
func (c Cid) Prefix() Prefix {
dec, _ := mh.Decode(c.Hash()) // assuming we got a valid multiaddr, this will not error
return Prefix{
MhType: dec.Code,
MhLength: dec.Length,
Version: c.Version(),
Codec: c.Type(),
}
}
// Prefix represents all the metadata of a Cid,
// that is, the Version, the Codec, the Multihash type
// and the Multihash length. It does not contains
// any actual content information.
// NOTE: The use -1 in MhLength to mean default length is deprecated,
2018-08-09 00:04:26 -04:00
// use the V0Builder or V1Builder structures instead
type Prefix struct {
Version uint64
Codec uint64
2017-02-02 18:53:32 -08:00
MhType uint64
MhLength int
}
// Sum uses the information in a prefix to perform a multihash.Sum()
// and return a newly constructed Cid with the resulting multihash.
func (p Prefix) Sum(data []byte) (Cid, error) {
hash, err := mh.Sum(data, p.MhType, p.MhLength)
if err != nil {
2018-08-25 03:05:17 -04:00
return Nil, err
}
switch p.Version {
case 0:
return NewCidV0(hash), nil
case 1:
return NewCidV1(p.Codec, hash), nil
default:
2018-08-25 03:05:17 -04:00
return Nil, fmt.Errorf("invalid cid version")
}
}
// Bytes returns a byte representation of a Prefix. It looks like:
//
// <version><codec><mh-type><mh-length>
func (p Prefix) Bytes() []byte {
2016-11-17 19:16:05 +01:00
buf := make([]byte, 4*binary.MaxVarintLen64)
n := binary.PutUvarint(buf, p.Version)
n += binary.PutUvarint(buf[n:], p.Codec)
n += binary.PutUvarint(buf[n:], uint64(p.MhType))
n += binary.PutUvarint(buf[n:], uint64(p.MhLength))
return buf[:n]
}
// PrefixFromBytes parses a Prefix-byte representation onto a
// Prefix.
func PrefixFromBytes(buf []byte) (Prefix, error) {
r := bytes.NewReader(buf)
vers, err := binary.ReadUvarint(r)
if err != nil {
return Prefix{}, err
}
codec, err := binary.ReadUvarint(r)
if err != nil {
return Prefix{}, err
}
mhtype, err := binary.ReadUvarint(r)
if err != nil {
return Prefix{}, err
}
mhlen, err := binary.ReadUvarint(r)
if err != nil {
return Prefix{}, err
}
return Prefix{
Version: vers,
Codec: codec,
2017-02-02 18:53:32 -08:00
MhType: mhtype,
MhLength: int(mhlen),
}, nil
}