Merge pull request #107 from ipfs/chore/base36_support

Chore/base36 support
gx-era file no longer relevant
2020-05-25 18:15:04 +02:00 · 2020-05-25 18:08:48 +02:00 · 2020-05-25 17:46:16 +02:00 · 2020-05-25 14:06:26 +02:00 · 2020-05-25 13:51:07 +02:00 · 2020-05-25 13:50:43 +02:00
33 changed files with 2561 additions and 144 deletions
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+ - name: Getting Help on IPFS
+   url: https://ipfs.io/help
+   about: All information about how and where to get help on IPFS.
+ - name: IPFS Official Forum
+   url: https://discuss.ipfs.io
+   about: Please post general questions, support requests, and discussions here.
--- a/.github/ISSUE_TEMPLATE/open_an_issue.md
+++ b/.github/ISSUE_TEMPLATE/open_an_issue.md
@@ -0,0 +1,19 @@
+---
+name: Open an issue
+about: Only for actionable issues relevant to this repository.
+title: ''
+labels: need/triage
+assignees: ''
+
+---
+<!--
+Hello! To ensure this issue is correctly addressed as soon as possible by the IPFS team, please try to make sure:
+
+- This issue is relevant to this repository's topic or codebase.
+
+- A clear description is provided. It should includes as much relevant information as possible and clear scope for the issue to be actionable.
+
+FOR GENERAL DISCUSSION, HELP OR QUESTIONS, please see the options at https://ipfs.io/help or head directly to https://discuss.ipfs.io.
+
+(you can delete this section after reading)
+-->
--- a/.github/config.yml
+++ b/.github/config.yml
@@ -0,0 +1,68 @@
+# Configuration for welcome - https://github.com/behaviorbot/welcome
+
+# Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome
+# Comment to be posted to on first time issues
+newIssueWelcomeComment: >
+  Thank you for submitting your first issue to this repository! A maintainer
+  will be here shortly to triage and review.
+
+  In the meantime, please double-check that you have provided all the
+  necessary information to make this process easy! Any information that can
+  help save additional round trips is useful! We currently aim to give
+  initial feedback within **two business days**. If this does not happen, feel
+  free to leave a comment.
+
+  Please keep an eye on how this issue will be labeled, as labels give an
+  overview of priorities, assignments and additional actions requested by the
+  maintainers:
+
+    - "Priority" labels will show how urgent this is for the team.
+    - "Status" labels will show if this is ready to be worked on, blocked, or in progress.
+    - "Need" labels will indicate if additional input or analysis is required.
+
+  Finally, remember to use https://discuss.ipfs.io if you just need general
+  support.
+
+# Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome
+# Comment to be posted to on PRs from first time contributors in your repository
+newPRWelcomeComment: >
+  Thank you for submitting this PR!
+
+  A maintainer will be here shortly to review it.
+
+  We are super grateful, but we are also overloaded! Help us by making sure
+  that:
+
+    * The context for this PR is clear, with relevant discussion, decisions
+      and stakeholders linked/mentioned.
+
+    * Your contribution itself is clear (code comments, self-review for the
+      rest) and in its best form. Follow the [code contribution
+      guidelines](https://github.com/ipfs/community/blob/master/CONTRIBUTING.md#code-contribution-guidelines)
+      if they apply.
+
+  Getting other community members to do a review would be great help too on
+  complex PRs (you can ask in the chats/forums). If you are unsure about
+  something, just leave us a comment.
+
+  Next steps:
+
+    * A maintainer will triage and assign priority to this PR, commenting on
+      any missing things and potentially assigning a reviewer for high
+      priority items.
+
+    * The PR gets reviews, discussed and approvals as needed.
+
+    * The PR is merged by maintainers when it has been approved and comments addressed.
+
+  We currently aim to provide initial feedback/triaging within **two business
+  days**. Please keep an eye on any labelling actions, as these will indicate
+  priorities and status of your contribution.
+
+  We are very grateful for your contribution!
+
+
+# Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge
+# Comment to be posted to on pull requests merged by a first time user
+# Currently disabled
+#firstPRMergeComment: ""
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+cid-fuzz.zip
--- a/.gx/lastpubver
+++ b/.gx/lastpubver
@@ -1 +0,0 @@
-0.3.0: QmfAjb1QYA9SS9TLVJBRZXEVriGaGrRZ3vJSajhLa52aYg
--- a/.travis.yml
+++ b/.travis.yml
@@ -0,0 +1,30 @@
+os:
+  - linux
+
+language: go
+
+go:
+  - 1.11.x
+
+env:
+  global:
+    - GOTFLAGS="-race"
+  matrix:
+    - BUILD_DEPTYPE=gomod
+
+
+# disable travis install
+install:
+  - true
+
+script:
+  - bash <(curl -s https://raw.githubusercontent.com/ipfs/ci-helpers/master/travis-ci/run-standard-tests.sh)
+
+
+cache:
+  directories:
+    - $GOPATH/pkg/mod
+    - $HOME/.cache/go-build
+
+notifications:
+  email: false
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 Protocol Labs, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/5
+++ b/5
@@ -0,0 +1,5 @@
+all: deps
+
+deps:
+	go get github.com/mattn/goveralls
+	go get golang.org/x/tools/cmd/cover
--- a/README.md
+++ b/README.md
@@ -0,0 +1,108 @@
+go-cid
+==================
+
+[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io)
+[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
+[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs)
+[![](https://img.shields.io/badge/readme%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme)
+[![GoDoc](https://godoc.org/github.com/ipfs/go-cid?status.svg)](https://godoc.org/github.com/ipfs/go-cid)
+[![Coverage Status](https://coveralls.io/repos/github/ipfs/go-cid/badge.svg?branch=master)](https://coveralls.io/github/ipfs/go-cid?branch=master)
+[![Travis CI](https://travis-ci.org/ipfs/go-cid.svg?branch=master)](https://travis-ci.org/ipfs/go-cid)
+
+> A package to handle content IDs in Go.
+
+This is an implementation in Go of the [CID spec](https://github.com/ipld/cid).
+It is used in `go-ipfs` and related packages to refer to a typed hunk of data.
+
+## Lead Maintainer
+
+[Eric Myhre](https://github.com/warpfork)
+
+## Table of Contents
+
+- [Install](#install)
+- [Usage](#usage)
+- [API](#api)
+- [Contribute](#contribute)
+- [License](#license)
+
+## Install
+
+`go-cid` is a standard Go module which can be installed with:
+
+```sh
+go get github.com/ipfs/go-cid
+```
+
+## Usage
+
+### Running tests
+
+Run tests with `go test` from the directory root
+
+```sh
+go test
+```
+
+### Examples
+
+#### Parsing string input from users
+
+```go
+// Create a cid from a marshaled string
+c, err := cid.Decode("bafzbeigai3eoy2ccc7ybwjfz5r3rdxqrinwi4rwytly24tdbh6yk7zslrm")
+if err != nil {...}
+
+fmt.Println("Got CID: ", c)
+```
+
+#### Creating a CID from scratch
+
+```go
+// Create a cid manually by specifying the 'prefix' parameters
+pref := cid.Prefix{
+	Version: 1,
+	Codec: cid.Raw,
+	MhType: mh.SHA2_256,
+	MhLength: -1, // default length
+}
+
+// And then feed it some data
+c, err := pref.Sum([]byte("Hello World!"))
+if err != nil {...}
+
+fmt.Println("Created CID: ", c)
+```
+
+#### Check if two CIDs match
+
+```go
+// To test if two cid's are equivalent, be sure to use the 'Equals' method:
+if c1.Equals(c2) {
+	fmt.Println("These two refer to the same exact data!")
+}
+```
+
+#### Check if some data matches a given CID
+
+```go
+// To check if some data matches a given cid, 
+// Get your CIDs prefix, and use that to sum the data in question:
+other, err := c.Prefix().Sum(mydata)
+if err != nil {...}
+
+if !c.Equals(other) {
+	fmt.Println("This data is different.")
+}
+
+```
+
+## Contribute
+
+PRs are welcome!
+
+Small note: If editing the Readme, please conform to the [standard-readme](https://github.com/RichardLitt/standard-readme) specification.
+
+## License
+
+MIT © Jeromy Johnson
--- a/_rsrch/cidiface/README.md
+++ b/_rsrch/cidiface/README.md
@@ -0,0 +1,168 @@
+What golang Kinds work best to implement CIDs?
+==============================================
+
+There are many possible ways to implement CIDs.  This package explores them.
+
+### criteria
+
+There's a couple different criteria to consider:
+
+- We want the best performance when operating on the type (getters, mostly);
+- We want to minimize the number of memory allocations we need;
+- We want types which can be used as map keys, because this is common.
+
+The priority of these criteria is open to argument, but it's probably
+mapkeys > minalloc > anythingelse.
+(Mapkeys and minalloc are also quite entangled, since if we don't pick a
+representation that can work natively as a map key, we'll end up needing
+a `KeyRepr()` method which gives us something that does work as a map key,
+an that will almost certainly involve a malloc itself.)
+
+### options
+
+There are quite a few different ways to go:
+
+- Option A: CIDs as a struct; multihash as bytes.
+- Option B: CIDs as a string.
+- Option C: CIDs as an interface with multiple implementors.
+- Option D: CIDs as a struct; multihash also as a struct or string.
+- Option E: CIDs as a struct; content as strings plus offsets.
+- Option F: CIDs as a struct wrapping only a string.
+
+The current approach on the master branch is Option A.
+
+Option D is distinctive from Option A because multihash as bytes transitively
+causes the CID struct to be non-comparible and thus not suitable for map keys
+as per https://golang.org/ref/spec#KeyType .  (It's also a bit more work to
+pursue Option D because it's just a bigger splash radius of change; but also,
+something we might also want to do soon, because we *do* also have these same
+map-key-usability concerns with multihash alone.)
+
+Option E is distinctive from Option D because Option E would always maintain
+the binary format of the cid internally, and so could yield it again without
+malloc, while still potentially having faster access to components than
+Option B since it wouldn't need to re-parse varints to access later fields.
+
+Option F is actually a varation of Option B; it's distinctive from the other
+struct options because it is proposing *literally* `struct{ x string }` as
+the type, with no additional fields for components nor offsets.
+
+Option C is the avoid-choices choice, but note that interfaces are not free;
+since "minimize mallocs" is one of our major goals, we cannot use interfaces
+whimsically.
+
+Note there is no proposal for migrating to `type Cid []bytes`, because that
+is generally considered to be strictly inferior to `type Cid string`.
+
+
+Discoveries
+-----------
+
+### using interfaces as map keys forgoes a lot of safety checks
+
+Using interfaces as map keys pushes a bunch of type checking to runtime.
+E.g., it's totally valid at compile time to push a type which is non-comparable
+into a map key; it will panic at *runtime* instead of failing at compile-time.
+
+There's also no way to define equality checks between implementors of the
+interface: golang will always use its innate concept of comparison for the
+concrete types.  This means its effectively *never safe* to use two different
+concrete implementations of an interface in the same map; you may add elements
+which are semantically "equal" in your mind, and end up very confused later
+when both impls of the same "equal" object have been stored.
+
+### sentinel values are possible in any impl, but some are clearer than others
+
+When using `*Cid`, the nil value is a clear sentinel for 'invalid';
+when using `type Cid string`, the zero value is a clear sentinel;
+when using `type Cid struct` per Option A or D... the only valid check is
+for a nil multihash field, since version=0 and codec=0 are both valid values.
+When using `type Cid struct{string}` per Option F, zero is a clear sentinel.
+
+### usability as a map key is important
+
+We already covered this in the criteria section, but for clarity:
+
+- Option A: ❌
+- Option B: ✔
+- Option C: ~ (caveats, and depends on concrete impl)
+- Option D: ✔
+- Option E: ✔
+- Option F: ✔
+
+### living without offsets requires parsing
+
+Since CID (and multihash!) are defined using varints, they require parsing;
+we can't just jump into the string at a known offset in order to yield e.g.
+the multicodec number.
+
+In order to get to the 'meat' of the CID (the multihash content), we first
+must parse:
+
+- the CID version varint;
+- the multicodec varint;
+- the multihash type enum varint;
+- and the multihash length varint.
+
+Since there are many applications where we want to jump straight to the
+multihash content (for example, when doing CAS sharding -- see the
+[disclaimer](https://github.com/multiformats/multihash#disclaimers) about
+bias in leading bytes), this overhead may be interesting.
+
+How much this overhead is significant is hard to say from microbenchmarking;
+it depends largely on usage patterns. If these traversals are a significant
+timesink, it would be an argument for Option D/E.
+If these traversals are *not* a significant timesink, we might be wiser
+to keep to Option B/F, because keeping a struct full of offsets will add several
+words of memory usage per CID, and we keep a *lot* of CIDs.
+
+### interfaces cause boxing which is a significant performance cost
+
+See `BenchmarkCidMap_CidStr` and friends.
+
+Long story short: using interfaces *anywhere* will cause the compiler to
+implicitly generate boxing and unboxing code (e.g. `runtime.convT2E`);
+this is both another function call, and more concerningly, results in
+large numbers of unbatchable memory allocations.
+
+Numbers without context are dangerous, but if you need one: 33%.
+It's a big deal.
+
+This means attempts to "use interfaces, but switch to concrete impls when
+performance is important" are a red herring: it doesn't work that way.
+
+This is not a general inditement against using interfaces -- but
+if a situation is at the scale where it's become important to mind whether
+or not pointers are a performance impact, then that situation also
+is one where you have to think twice before using interfaces.
+
+### struct wrappers can be used in place of typedefs with zero overhead
+
+See `TestSizeOf`.
+
+Using the `unsafe.Sizeof` feature to inspect what the Go runtime thinks,
+we can see that `type Foo string` and `type Foo struct{x string}` consume
+precisely the same amount of memory.
+
+This is interesting because it means we can choose between either
+type definition with no significant overhead anywhere we use it:
+thus, we can choose freely between Option B and Option F based on which
+we feel is more pleasant to work with.
+
+Option F (a struct wrapper) means we can prevent casting into our Cid type.
+Option B (typedef string) can be declared a `const`.
+Are there any other concerns that would separate the two choices?
+
+### one way or another: let's get rid of that star
+
+We should switch completely to handling `Cid` and remove `*Cid` completely.
+Regardless of whether we do this by migrating to interface, or string
+implementations, or simply structs with no pointers... once we get there,
+refactoring to any of the *others* can become a no-op from the perspective
+of any downstream code that uses CIDs.
+
+(This means all access via functions, never references to fields -- even if
+we were to use a struct implementation.  *Pretend* there's a interface,
+in other words.)
+
+There are probably `gofix` incantations which can help us with this migration.
--- a/_rsrch/cidiface/cid.go
+++ b/_rsrch/cidiface/cid.go
@@ -0,0 +1,48 @@
+package cid
+
+import (
+	mh "github.com/multiformats/go-multihash"
+)
+
+// Cid represents a self-describing content adressed identifier.
+//
+// A CID is composed of:
+//
+//   - a Version of the CID itself,
+//   - a Multicodec (indicates the encoding of the referenced content),
+//   - and a Multihash (which identifies the referenced content).
+//
+// (Note that the Multihash further contains its own version and hash type
+// indicators.)
+type Cid interface {
+	// n.b. 'yields' means "without copy", 'produces' means a malloc.
+
+	Version() uint64         // Yields the version prefix as a uint.
+	Multicodec() uint64      // Yields the multicodec as a uint.
+	Multihash() mh.Multihash // Yields the multihash segment.
+
+	String() string // Produces the CID formatted as b58 string.
+	Bytes() []byte  // Produces the CID formatted as raw binary.
+
+	Prefix() Prefix // Produces a tuple of non-content metadata.
+
+	// some change notes:
+	// - `KeyString() CidString` is gone because we're natively a map key now, you're welcome.
+	// - `StringOfBase(mbase.Encoding) (string, error)` is skipped, maybe it can come back but maybe it should be a formatter's job.
+	// - `Equals(o Cid) bool` is gone because it's now `==`, you're welcome.
+
+	// TODO: make a multi-return method for {v,mc,mh} decomposition.  CidStr will be able to implement this more efficiently than if one makes a series of the individual getter calls.
+}
+
+// Prefix represents all the metadata of a Cid,
+// that is, the Version, the Codec, the Multihash type
+// and the Multihash length. It does not contains
+// any actual content information.
+// NOTE: The use -1 in MhLength to mean default length is deprecated,
+//   use the V0Builder or V1Builder structures instead
+type Prefix struct {
+	Version  uint64
+	Codec    uint64
+	MhType   uint64
+	MhLength int
+}
--- a/_rsrch/cidiface/cidBoxingBench_test.go
+++ b/_rsrch/cidiface/cidBoxingBench_test.go
@@ -0,0 +1,71 @@
+package cid
+
+import (
+	"testing"
+)
+
+// BenchmarkCidMap_CidStr estimates how fast it is to insert primitives into a map
+// keyed by CidStr (concretely).
+//
+// We do 100 insertions per benchmark run to make sure the map initialization
+// doesn't dominate the results.
+//
+// Sample results on linux amd64 go1.11beta:
+//
+//   BenchmarkCidMap_CidStr-8          100000             16317 ns/op
+//   BenchmarkCidMap_CidIface-8        100000             20516 ns/op
+//
+// With benchmem on:
+//
+//   BenchmarkCidMap_CidStr-8          100000             15579 ns/op           11223 B/op        207 allocs/op
+//   BenchmarkCidMap_CidIface-8        100000             19500 ns/op           12824 B/op        307 allocs/op
+//   BenchmarkCidMap_StrPlusHax-8      200000             10451 ns/op            7589 B/op        202 allocs/op
+//
+// We can see here that the impact of interface boxing is significant:
+// it increases the time taken to do the inserts to 133%, largely because
+// the implied `runtime.convT2E` calls cause another malloc each.
+//
+// There are also significant allocations in both cases because
+// A) we cannot create a multihash without allocations since they are []byte;
+// B) the map has to be grown several times;
+// C) something I haven't quite put my finger on yet.
+// Ideally we'd drive those down further as well.
+//
+// Pre-allocating the map reduces allocs by a very small percentage by *count*,
+// but reduces the time taken by 66% overall (presumably because when a map
+// re-arranges itself, it involves more or less an O(n) copy of the content
+// in addition to the alloc itself).  This isn't topical to the question of
+// whether or not interfaces are a good idea; just for contextualizing.
+//
+func BenchmarkCidMap_CidStr(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		mp := map[CidStr]int{}
+		for x := 0; x < 100; x++ {
+			mp[NewCidStr(0, uint64(x), []byte{})] = x
+		}
+	}
+}
+
+// BenchmarkCidMap_CidIface is in the family of BenchmarkCidMap_CidStr:
+// it is identical except the map key type is declared as an interface
+// (which forces all insertions to be boxed, changing performance).
+func BenchmarkCidMap_CidIface(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		mp := map[Cid]int{}
+		for x := 0; x < 100; x++ {
+			mp[NewCidStr(0, uint64(x), []byte{})] = x
+		}
+	}
+}
+
+// BenchmarkCidMap_CidStrAvoidMapGrowth is in the family of BenchmarkCidMap_CidStr:
+// it is identical except the map is created with a size hint that removes
+// some allocations (5, in practice, apparently).
+func BenchmarkCidMap_CidStrAvoidMapGrowth(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		mp := make(map[CidStr]int, 100)
+		for x := 0; x < 100; x++ {
+			mp[NewCidStr(0, uint64(x), []byte{})] = x
+		}
+	}
+}
--- a/_rsrch/cidiface/cidString.go
+++ b/_rsrch/cidiface/cidString.go
@@ -0,0 +1,161 @@
+package cid
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	mbase "github.com/multiformats/go-multibase"
+	mh "github.com/multiformats/go-multihash"
+)
+
+//=================
+// def & accessors
+//=================
+
+var _ Cid = CidStr("")
+var _ map[CidStr]struct{} = nil
+
+// CidStr is a representation of a Cid as a string type containing binary.
+//
+// Using golang's string type is preferable over byte slices even for binary
+// data because golang strings are immutable, usable as map keys,
+// trivially comparable with built-in equals operators, etc.
+//
+// Please do not cast strings or bytes into the CidStr type directly;
+// use a parse method which validates the data and yields a CidStr.
+type CidStr string
+
+// EmptyCidStr is a constant for a zero/uninitialized/sentinelvalue cid;
+// it is declared mainly for readability in checks for sentinel values.
+const EmptyCidStr = CidStr("")
+
+func (c CidStr) Version() uint64 {
+	bytes := []byte(c)
+	v, _ := binary.Uvarint(bytes)
+	return v
+}
+
+func (c CidStr) Multicodec() uint64 {
+	bytes := []byte(c)
+	_, n := binary.Uvarint(bytes) // skip version length
+	codec, _ := binary.Uvarint(bytes[n:])
+	return codec
+}
+
+func (c CidStr) Multihash() mh.Multihash {
+	bytes := []byte(c)
+	_, n1 := binary.Uvarint(bytes)      // skip version length
+	_, n2 := binary.Uvarint(bytes[n1:]) // skip codec length
+	return mh.Multihash(bytes[n1+n2:])  // return slice of remainder
+}
+
+// String returns the default string representation of a Cid.
+// Currently, Base58 is used as the encoding for the multibase string.
+func (c CidStr) String() string {
+	switch c.Version() {
+	case 0:
+		return c.Multihash().B58String()
+	case 1:
+		mbstr, err := mbase.Encode(mbase.Base58BTC, []byte(c))
+		if err != nil {
+			panic("should not error with hardcoded mbase: " + err.Error())
+		}
+		return mbstr
+	default:
+		panic("not possible to reach this point")
+	}
+}
+
+// Bytes produces a raw binary format of the CID.
+//
+// (For CidStr, this method is only distinct from casting because of
+// compatibility with v0 CIDs.)
+func (c CidStr) Bytes() []byte {
+	switch c.Version() {
+	case 0:
+		return c.Multihash()
+	case 1:
+		return []byte(c)
+	default:
+		panic("not possible to reach this point")
+	}
+}
+
+// Prefix builds and returns a Prefix out of a Cid.
+func (c CidStr) Prefix() Prefix {
+	dec, _ := mh.Decode(c.Multihash()) // assuming we got a valid multiaddr, this will not error
+	return Prefix{
+		MhType:   dec.Code,
+		MhLength: dec.Length,
+		Version:  c.Version(),
+		Codec:    c.Multicodec(),
+	}
+}
+
+//==================================
+// parsers & validators & factories
+//==================================
+
+func NewCidStr(version uint64, codecType uint64, mhash mh.Multihash) CidStr {
+	hashlen := len(mhash)
+	// two 8 bytes (max) numbers plus hash
+	buf := make([]byte, 2*binary.MaxVarintLen64+hashlen)
+	n := binary.PutUvarint(buf, version)
+	n += binary.PutUvarint(buf[n:], codecType)
+	cn := copy(buf[n:], mhash)
+	if cn != hashlen {
+		panic("copy hash length is inconsistent")
+	}
+	return CidStr(buf[:n+hashlen])
+}
+
+// CidStrParse takes a binary byte slice, parses it, and returns either
+// a valid CidStr, or the zero CidStr and an error.
+//
+// For CidV1, the data buffer is in the form:
+//
+//     <version><codec-type><multihash>
+//
+// CidV0 are also supported. In particular, data buffers starting
+// with length 34 bytes, which starts with bytes [18,32...] are considered
+// binary multihashes.
+//
+// The multicodec bytes are not parsed to verify they're a valid varint;
+// no further reification is performed.
+//
+// Multibase encoding should already have been unwrapped before parsing;
+// if you have a multibase-enveloped string, use CidStrDecode instead.
+//
+// CidStrParse is the inverse of Cid.Bytes().
+func CidStrParse(data []byte) (CidStr, error) {
+	if len(data) == 34 && data[0] == 18 && data[1] == 32 {
+		h, err := mh.Cast(data)
+		if err != nil {
+			return EmptyCidStr, err
+		}
+		return NewCidStr(0, DagProtobuf, h), nil
+	}
+
+	vers, n := binary.Uvarint(data)
+	if err := uvError(n); err != nil {
+		return EmptyCidStr, err
+	}
+
+	if vers != 0 && vers != 1 {
+		return EmptyCidStr, fmt.Errorf("invalid cid version number: %d", vers)
+	}
+
+	_, cn := binary.Uvarint(data[n:])
+	if err := uvError(cn); err != nil {
+		return EmptyCidStr, err
+	}
+
+	rest := data[n+cn:]
+	h, err := mh.Cast(rest)
+	if err != nil {
+		return EmptyCidStr, err
+	}
+
+	// REVIEW: if the data is longer than the mh.len expects, we silently ignore it?  should we?
+	return CidStr(data[0 : n+cn+len(h)]), nil
+}
--- a/_rsrch/cidiface/cidStruct.go
+++ b/_rsrch/cidiface/cidStruct.go
@@ -0,0 +1,164 @@
+package cid
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	mbase "github.com/multiformats/go-multibase"
+	mh "github.com/multiformats/go-multihash"
+)
+
+//=================
+// def & accessors
+//=================
+
+var _ Cid = CidStruct{}
+
+//var _ map[CidStruct]struct{} = nil // Will not compile!  See struct def docs.
+//var _ map[Cid]struct{} = map[Cid]struct{}{CidStruct{}: struct{}{}} // Legal to compile...
+// but you'll get panics: "runtime error: hash of unhashable type cid.CidStruct"
+
+// CidStruct represents a CID in a struct format.
+//
+// This format complies with the exact same Cid interface as the CidStr
+// implementation, but completely pre-parses the Cid metadata.
+// CidStruct is a tad quicker in case of repeatedly accessed fields,
+// but requires more reshuffling to parse and to serialize.
+// CidStruct is not usable as a map key, because it contains a Multihash
+// reference, which is a slice, and thus not "comparable" as a primitive.
+//
+// Beware of zero-valued CidStruct: it is difficult to distinguish an
+// incorrectly-initialized "invalid" CidStruct from one representing a v0 cid.
+type CidStruct struct {
+	version uint64
+	codec   uint64
+	hash    mh.Multihash
+}
+
+// EmptyCidStruct is a constant for a zero/uninitialized/sentinelvalue cid;
+// it is declared mainly for readability in checks for sentinel values.
+//
+// Note: it's not actually a const; the compiler does not allow const structs.
+var EmptyCidStruct = CidStruct{}
+
+func (c CidStruct) Version() uint64 {
+	return c.version
+}
+
+func (c CidStruct) Multicodec() uint64 {
+	return c.codec
+}
+
+func (c CidStruct) Multihash() mh.Multihash {
+	return c.hash
+}
+
+// String returns the default string representation of a Cid.
+// Currently, Base58 is used as the encoding for the multibase string.
+func (c CidStruct) String() string {
+	switch c.Version() {
+	case 0:
+		return c.Multihash().B58String()
+	case 1:
+		mbstr, err := mbase.Encode(mbase.Base58BTC, c.Bytes())
+		if err != nil {
+			panic("should not error with hardcoded mbase: " + err.Error())
+		}
+		return mbstr
+	default:
+		panic("not possible to reach this point")
+	}
+}
+
+// Bytes produces a raw binary format of the CID.
+func (c CidStruct) Bytes() []byte {
+	switch c.version {
+	case 0:
+		return []byte(c.hash)
+	case 1:
+		// two 8 bytes (max) numbers plus hash
+		buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash))
+		n := binary.PutUvarint(buf, c.version)
+		n += binary.PutUvarint(buf[n:], c.codec)
+		cn := copy(buf[n:], c.hash)
+		if cn != len(c.hash) {
+			panic("copy hash length is inconsistent")
+		}
+		return buf[:n+len(c.hash)]
+	default:
+		panic("not possible to reach this point")
+	}
+}
+
+// Prefix builds and returns a Prefix out of a Cid.
+func (c CidStruct) Prefix() Prefix {
+	dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error
+	return Prefix{
+		MhType:   dec.Code,
+		MhLength: dec.Length,
+		Version:  c.version,
+		Codec:    c.codec,
+	}
+}
+
+//==================================
+// parsers & validators & factories
+//==================================
+
+// CidStructParse takes a binary byte slice, parses it, and returns either
+// a valid CidStruct, or the zero CidStruct and an error.
+//
+// For CidV1, the data buffer is in the form:
+//
+//     <version><codec-type><multihash>
+//
+// CidV0 are also supported. In particular, data buffers starting
+// with length 34 bytes, which starts with bytes [18,32...] are considered
+// binary multihashes.
+//
+// The multicodec bytes are not parsed to verify they're a valid varint;
+// no further reification is performed.
+//
+// Multibase encoding should already have been unwrapped before parsing;
+// if you have a multibase-enveloped string, use CidStructDecode instead.
+//
+// CidStructParse is the inverse of Cid.Bytes().
+func CidStructParse(data []byte) (CidStruct, error) {
+	if len(data) == 34 && data[0] == 18 && data[1] == 32 {
+		h, err := mh.Cast(data)
+		if err != nil {
+			return EmptyCidStruct, err
+		}
+		return CidStruct{
+			codec:   DagProtobuf,
+			version: 0,
+			hash:    h,
+		}, nil
+	}
+
+	vers, n := binary.Uvarint(data)
+	if err := uvError(n); err != nil {
+		return EmptyCidStruct, err
+	}
+
+	if vers != 0 && vers != 1 {
+		return EmptyCidStruct, fmt.Errorf("invalid cid version number: %d", vers)
+	}
+
+	codec, cn := binary.Uvarint(data[n:])
+	if err := uvError(cn); err != nil {
+		return EmptyCidStruct, err
+	}
+
+	rest := data[n+cn:]
+	h, err := mh.Cast(rest)
+	if err != nil {
+		return EmptyCidStruct, err
+	}
+
+	return CidStruct{
+		version: vers,
+		codec:   codec,
+		hash:    h,
+	}, nil
+}
--- a/_rsrch/cidiface/enums.go
+++ b/_rsrch/cidiface/enums.go
@@ -0,0 +1,79 @@
+package cid
+
+// These are multicodec-packed content types. The should match
+// the codes described in the authoritative document:
+// https://github.com/multiformats/multicodec/blob/master/table.csv
+const (
+	Raw = 0x55
+
+	DagProtobuf = 0x70
+	DagCBOR     = 0x71
+	Libp2pKey   = 0x72
+
+	GitRaw = 0x78
+
+	EthBlock           = 0x90
+	EthBlockList       = 0x91
+	EthTxTrie          = 0x92
+	EthTx              = 0x93
+	EthTxReceiptTrie   = 0x94
+	EthTxReceipt       = 0x95
+	EthStateTrie       = 0x96
+	EthAccountSnapshot = 0x97
+	EthStorageTrie     = 0x98
+	BitcoinBlock       = 0xb0
+	BitcoinTx          = 0xb1
+	ZcashBlock         = 0xc0
+	ZcashTx            = 0xc1
+	DecredBlock        = 0xe0
+	DecredTx           = 0xe1
+)
+
+// Codecs maps the name of a codec to its type
+var Codecs = map[string]uint64{
+	"v0":                   DagProtobuf,
+	"raw":                  Raw,
+	"protobuf":             DagProtobuf,
+	"cbor":                 DagCBOR,
+	"libp2p-key":           Libp2pKey,
+	"git-raw":              GitRaw,
+	"eth-block":            EthBlock,
+	"eth-block-list":       EthBlockList,
+	"eth-tx-trie":          EthTxTrie,
+	"eth-tx":               EthTx,
+	"eth-tx-receipt-trie":  EthTxReceiptTrie,
+	"eth-tx-receipt":       EthTxReceipt,
+	"eth-state-trie":       EthStateTrie,
+	"eth-account-snapshot": EthAccountSnapshot,
+	"eth-storage-trie":     EthStorageTrie,
+	"bitcoin-block":        BitcoinBlock,
+	"bitcoin-tx":           BitcoinTx,
+	"zcash-block":          ZcashBlock,
+	"zcash-tx":             ZcashTx,
+	"decred-block":         DecredBlock,
+	"decred-tx":            DecredTx,
+}
+
+// CodecToStr maps the numeric codec to its name
+var CodecToStr = map[uint64]string{
+	Raw:                "raw",
+	DagProtobuf:        "protobuf",
+	DagCBOR:            "cbor",
+	Libp2pKey:          "libp2p-key",
+	GitRaw:             "git-raw",
+	EthBlock:           "eth-block",
+	EthBlockList:       "eth-block-list",
+	EthTxTrie:          "eth-tx-trie",
+	EthTx:              "eth-tx",
+	EthTxReceiptTrie:   "eth-tx-receipt-trie",
+	EthTxReceipt:       "eth-tx-receipt",
+	EthStateTrie:       "eth-state-trie",
+	EthAccountSnapshot: "eth-account-snapshot",
+	EthStorageTrie:     "eth-storage-trie",
+	BitcoinBlock:       "bitcoin-block",
+	BitcoinTx:          "bitcoin-tx",
+	ZcashBlock:         "zcash-block",
+	ZcashTx:            "zcash-tx",
+	DecredBlock:        "decred-block",
+	DecredTx:           "decred-tx",
+}
--- a/_rsrch/cidiface/errors.go
+++ b/_rsrch/cidiface/errors.go
@@ -0,0 +1,24 @@
+package cid
+
+import (
+	"errors"
+)
+
+var (
+	// ErrVarintBuffSmall means that a buffer passed to the cid parser was not
+	// long enough, or did not contain an invalid cid
+	ErrVarintBuffSmall = errors.New("reading varint: buffer too small")
+
+	// ErrVarintTooBig means that the varint in the given cid was above the
+	// limit of 2^64
+	ErrVarintTooBig = errors.New("reading varint: varint bigger than 64bits" +
+		" and not supported")
+
+	// ErrCidTooShort means that the cid passed to decode was not long
+	// enough to be a valid Cid
+	ErrCidTooShort = errors.New("cid too short")
+
+	// ErrInvalidEncoding means that selected encoding is not supported
+	// by this Cid version
+	ErrInvalidEncoding = errors.New("invalid base encoding")
+)
--- a/_rsrch/cidiface/misc.go
+++ b/_rsrch/cidiface/misc.go
@@ -0,0 +1,12 @@
+package cid
+
+func uvError(read int) error {
+	switch {
+	case read == 0:
+		return ErrVarintBuffSmall
+	case read < 0:
+		return ErrVarintTooBig
+	default:
+		return nil
+	}
+}
--- a/builder.go
+++ b/builder.go
@@ -0,0 +1,74 @@
+package cid
+
+import (
+	mh "github.com/multiformats/go-multihash"
+)
+
+type Builder interface {
+	Sum(data []byte) (Cid, error)
+	GetCodec() uint64
+	WithCodec(uint64) Builder
+}
+
+type V0Builder struct{}
+
+type V1Builder struct {
+	Codec    uint64
+	MhType   uint64
+	MhLength int // MhLength <= 0 means the default length
+}
+
+func (p Prefix) GetCodec() uint64 {
+	return p.Codec
+}
+
+func (p Prefix) WithCodec(c uint64) Builder {
+	if c == p.Codec {
+		return p
+	}
+	p.Codec = c
+	if c != DagProtobuf {
+		p.Version = 1
+	}
+	return p
+}
+
+func (p V0Builder) Sum(data []byte) (Cid, error) {
+	hash, err := mh.Sum(data, mh.SHA2_256, -1)
+	if err != nil {
+		return Undef, err
+	}
+	return Cid{string(hash)}, nil
+}
+
+func (p V0Builder) GetCodec() uint64 {
+	return DagProtobuf
+}
+
+func (p V0Builder) WithCodec(c uint64) Builder {
+	if c == DagProtobuf {
+		return p
+	}
+	return V1Builder{Codec: c, MhType: mh.SHA2_256}
+}
+
+func (p V1Builder) Sum(data []byte) (Cid, error) {
+	mhLen := p.MhLength
+	if mhLen <= 0 {
+		mhLen = -1
+	}
+	hash, err := mh.Sum(data, p.MhType, mhLen)
+	if err != nil {
+		return Undef, err
+	}
+	return NewCidV1(p.Codec, hash), nil
+}
+
+func (p V1Builder) GetCodec() uint64 {
+	return p.Codec
+}
+
+func (p V1Builder) WithCodec(c uint64) Builder {
+	p.Codec = c
+	return p
+}
--- a/builder_test.go
+++ b/builder_test.go
@@ -0,0 +1,92 @@
+package cid
+
+import (
+	"testing"
+
+	mh "github.com/multiformats/go-multihash"
+)
+
+func TestV0Builder(t *testing.T) {
+	data := []byte("this is some test content")
+
+	// Construct c1
+	format := V0Builder{}
+	c1, err := format.Sum(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Construct c2
+	hash, err := mh.Sum(data, mh.SHA2_256, -1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	c2 := NewCidV0(hash)
+
+	if !c1.Equals(c2) {
+		t.Fatal("cids mismatch")
+	}
+	if c1.Prefix() != c2.Prefix() {
+		t.Fatal("prefixes mismatch")
+	}
+}
+
+func TestV1Builder(t *testing.T) {
+	data := []byte("this is some test content")
+
+	// Construct c1
+	format := V1Builder{Codec: DagCBOR, MhType: mh.SHA2_256}
+	c1, err := format.Sum(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Construct c2
+	hash, err := mh.Sum(data, mh.SHA2_256, -1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	c2 := NewCidV1(DagCBOR, hash)
+
+	if !c1.Equals(c2) {
+		t.Fatal("cids mismatch")
+	}
+	if c1.Prefix() != c2.Prefix() {
+		t.Fatal("prefixes mismatch")
+	}
+}
+
+func TestCodecChange(t *testing.T) {
+	t.Run("Prefix-CidV0", func(t *testing.T) {
+		p := Prefix{Version: 0, Codec: DagProtobuf, MhType: mh.SHA2_256, MhLength: mh.DefaultLengths[mh.SHA2_256]}
+		testCodecChange(t, p)
+	})
+	t.Run("Prefix-CidV1", func(t *testing.T) {
+		p := Prefix{Version: 1, Codec: DagProtobuf, MhType: mh.SHA2_256, MhLength: mh.DefaultLengths[mh.SHA2_256]}
+		testCodecChange(t, p)
+	})
+	t.Run("V0Builder", func(t *testing.T) {
+		testCodecChange(t, V0Builder{})
+	})
+	t.Run("V1Builder", func(t *testing.T) {
+		testCodecChange(t, V1Builder{Codec: DagProtobuf, MhType: mh.SHA2_256})
+	})
+}
+
+func testCodecChange(t *testing.T, b Builder) {
+	data := []byte("this is some test content")
+
+	if b.GetCodec() != DagProtobuf {
+		t.Fatal("original builder not using Protobuf codec")
+	}
+
+	b = b.WithCodec(Raw)
+	c, err := b.Sum(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if c.Type() != Raw {
+		t.Fatal("new cid codec did not change to Raw")
+	}
+}
--- a/cid.go
+++ b/cid.go
@@ -1,107 +1,357 @@
+// Package cid implements the Content-IDentifiers specification
+// (https://github.com/ipld/cid) in Go. CIDs are
+// self-describing content-addressed identifiers useful for
+// distributed information systems. CIDs are used in the IPFS
+// (https://ipfs.io) project ecosystem.
+//
+// CIDs have two major versions. A CIDv0 corresponds to a multihash of type
+// DagProtobuf, is deprecated and exists for compatibility reasons. Usually,
+// CIDv1 should be used.
+//
+// A CIDv1 has four parts:
+//
+//     <cidv1> ::= <multibase-prefix><cid-version><multicodec-packed-content-type><multihash-content-address>
+//
+// As shown above, the CID implementation relies heavily on Multiformats,
+// particularly Multibase
+// (https://github.com/multiformats/go-multibase), Multicodec
+// (https://github.com/multiformats/multicodec) and Multihash
+// implementations (https://github.com/multiformats/go-multihash).
 package cid

 import (
 	"bytes"
-	"encoding/binary"
+	"encoding"
+	"encoding/json"
+	"errors"
 	"fmt"
+	"io"
+	"strings"

-	mh "github.com/jbenet/go-multihash"
 	mbase "github.com/multiformats/go-multibase"
+	mh "github.com/multiformats/go-multihash"
+	varint "github.com/multiformats/go-varint"
 )

+// UnsupportedVersionString just holds an error message
 const UnsupportedVersionString = "<unsupported cid version>"

-const (
-	Protobuf = iota
-	Raw
-	JSON
-	CBOR
+var (
+	// ErrCidTooShort means that the cid passed to decode was not long
+	// enough to be a valid Cid
+	ErrCidTooShort = errors.New("cid too short")
+
+	// ErrInvalidEncoding means that selected encoding is not supported
+	// by this Cid version
+	ErrInvalidEncoding = errors.New("invalid base encoding")
 )

-func NewCidV0(h mh.Multihash) *Cid {
-	return &Cid{
-		version: 0,
-		codec:   Protobuf,
-		hash:    h,
+// These are multicodec-packed content types. The should match
+// the codes described in the authoritative document:
+// https://github.com/multiformats/multicodec/blob/master/table.csv
+const (
+	Raw = 0x55
+
+	DagProtobuf = 0x70
+	DagCBOR     = 0x71
+	Libp2pKey   = 0x72
+
+	GitRaw = 0x78
+
+	EthBlock              = 0x90
+	EthBlockList          = 0x91
+	EthTxTrie             = 0x92
+	EthTx                 = 0x93
+	EthTxReceiptTrie      = 0x94
+	EthTxReceipt          = 0x95
+	EthStateTrie          = 0x96
+	EthAccountSnapshot    = 0x97
+	EthStorageTrie        = 0x98
+	BitcoinBlock          = 0xb0
+	BitcoinTx             = 0xb1
+	ZcashBlock            = 0xc0
+	ZcashTx               = 0xc1
+	DecredBlock           = 0xe0
+	DecredTx              = 0xe1
+	DashBlock             = 0xf0
+	DashTx                = 0xf1
+	FilCommitmentUnsealed = 0xf101
+	FilCommitmentSealed   = 0xf102
+)
+
+// Codecs maps the name of a codec to its type
+var Codecs = map[string]uint64{
+	"v0":                      DagProtobuf,
+	"raw":                     Raw,
+	"protobuf":                DagProtobuf,
+	"cbor":                    DagCBOR,
+	"libp2p-key":              Libp2pKey,
+	"git-raw":                 GitRaw,
+	"eth-block":               EthBlock,
+	"eth-block-list":          EthBlockList,
+	"eth-tx-trie":             EthTxTrie,
+	"eth-tx":                  EthTx,
+	"eth-tx-receipt-trie":     EthTxReceiptTrie,
+	"eth-tx-receipt":          EthTxReceipt,
+	"eth-state-trie":          EthStateTrie,
+	"eth-account-snapshot":    EthAccountSnapshot,
+	"eth-storage-trie":        EthStorageTrie,
+	"bitcoin-block":           BitcoinBlock,
+	"bitcoin-tx":              BitcoinTx,
+	"zcash-block":             ZcashBlock,
+	"zcash-tx":                ZcashTx,
+	"decred-block":            DecredBlock,
+	"decred-tx":               DecredTx,
+	"dash-block":              DashBlock,
+	"dash-tx":                 DashTx,
+	"fil-commitment-unsealed": FilCommitmentUnsealed,
+	"fil-commitment-sealed":   FilCommitmentSealed,
+}
+
+// CodecToStr maps the numeric codec to its name
+var CodecToStr = map[uint64]string{
+	Raw:                   "raw",
+	DagProtobuf:           "protobuf",
+	DagCBOR:               "cbor",
+	GitRaw:                "git-raw",
+	EthBlock:              "eth-block",
+	EthBlockList:          "eth-block-list",
+	EthTxTrie:             "eth-tx-trie",
+	EthTx:                 "eth-tx",
+	EthTxReceiptTrie:      "eth-tx-receipt-trie",
+	EthTxReceipt:          "eth-tx-receipt",
+	EthStateTrie:          "eth-state-trie",
+	EthAccountSnapshot:    "eth-account-snapshot",
+	EthStorageTrie:        "eth-storage-trie",
+	BitcoinBlock:          "bitcoin-block",
+	BitcoinTx:             "bitcoin-tx",
+	ZcashBlock:            "zcash-block",
+	ZcashTx:               "zcash-tx",
+	DecredBlock:           "decred-block",
+	DecredTx:              "decred-tx",
+	DashBlock:             "dash-block",
+	DashTx:                "dash-tx",
+	FilCommitmentUnsealed: "fil-commitment-unsealed",
+	FilCommitmentSealed:   "fil-commitment-sealed",
+}
+
+// tryNewCidV0 tries to convert a multihash into a CIDv0 CID and returns an
+// error on failure.
+func tryNewCidV0(mhash mh.Multihash) (Cid, error) {
+	// Need to make sure hash is valid for CidV0 otherwise we will
+	// incorrectly detect it as CidV1 in the Version() method
+	dec, err := mh.Decode(mhash)
+	if err != nil {
+		return Undef, err
+	}
+	if dec.Code != mh.SHA2_256 || dec.Length != 32 {
+		return Undef, fmt.Errorf("invalid hash for cidv0 %d-%d", dec.Code, dec.Length)
+	}
+	return Cid{string(mhash)}, nil
+}
+
+// NewCidV0 returns a Cid-wrapped multihash.
+// They exist to allow IPFS to work with Cids while keeping
+// compatibility with the plain-multihash format used used in IPFS.
+// NewCidV1 should be used preferentially.
+//
+// Panics if the multihash isn't sha2-256.
+func NewCidV0(mhash mh.Multihash) Cid {
+	c, err := tryNewCidV0(mhash)
+	if err != nil {
+		panic(err)
+	}
+	return c
+}
+
+// NewCidV1 returns a new Cid using the given multicodec-packed
+// content type.
+//
+// Panics if the multihash is invalid.
+func NewCidV1(codecType uint64, mhash mh.Multihash) Cid {
+	hashlen := len(mhash)
+	// two 8 bytes (max) numbers plus hash
+	buf := make([]byte, 1+varint.UvarintSize(codecType)+hashlen)
+	n := varint.PutUvarint(buf, 1)
+	n += varint.PutUvarint(buf[n:], codecType)
+	cn := copy(buf[n:], mhash)
+	if cn != hashlen {
+		panic("copy hash length is inconsistent")
+	}
+
+	return Cid{string(buf[:n+hashlen])}
+}
+
+var _ encoding.BinaryMarshaler = Cid{}
+var _ encoding.BinaryUnmarshaler = (*Cid)(nil)
+var _ encoding.TextMarshaler = Cid{}
+var _ encoding.TextUnmarshaler = (*Cid)(nil)
+
+// Cid represents a self-describing content addressed
+// identifier. It is formed by a Version, a Codec (which indicates
+// a multicodec-packed content type) and a Multihash.
+type Cid struct{ str string }
+
+// Undef can be used to represent a nil or undefined Cid, using Cid{}
+// directly is also acceptable.
+var Undef = Cid{}
+
+// Defined returns true if a Cid is defined
+// Calling any other methods on an undefined Cid will result in
+// undefined behavior.
+func (c Cid) Defined() bool {
+	return c.str != ""
+}
+
+// Parse is a short-hand function to perform Decode, Cast etc... on
+// a generic interface{} type.
+func Parse(v interface{}) (Cid, error) {
+	switch v2 := v.(type) {
+	case string:
+		if strings.Contains(v2, "/ipfs/") {
+			return Decode(strings.Split(v2, "/ipfs/")[1])
+		}
+		return Decode(v2)
+	case []byte:
+		return Cast(v2)
+	case mh.Multihash:
+		return tryNewCidV0(v2)
+	case Cid:
+		return v2, nil
+	default:
+		return Undef, fmt.Errorf("can't parse %+v as Cid", v2)
 	}
 }

-func NewCidV1(c uint64, h mh.Multihash) *Cid {
-	return &Cid{
-		version: 1,
-		codec:   c,
-		hash:    h,
+// Decode parses a Cid-encoded string and returns a Cid object.
+// For CidV1, a Cid-encoded string is primarily a multibase string:
+//
+//     <multibase-type-code><base-encoded-string>
+//
+// The base-encoded string represents a:
+//
+// <version><codec-type><multihash>
+//
+// Decode will also detect and parse CidV0 strings. Strings
+// starting with "Qm" are considered CidV0 and treated directly
+// as B58-encoded multihashes.
+func Decode(v string) (Cid, error) {
+	if len(v) < 2 {
+		return Undef, ErrCidTooShort
 	}
-}

-type Cid struct {
-	version uint64
-	codec   uint64
-	hash    mh.Multihash
-}
-
-func Decode(v string) (*Cid, error) {
 	if len(v) == 46 && v[:2] == "Qm" {
 		hash, err := mh.FromB58String(v)
 		if err != nil {
-			return nil, err
+			return Undef, err
 		}

-		return NewCidV0(hash), nil
+		return tryNewCidV0(hash)
 	}

 	_, data, err := mbase.Decode(v)
 	if err != nil {
-		return nil, err
+		return Undef, err
 	}

 	return Cast(data)
 }

-func Cast(data []byte) (*Cid, error) {
-	if len(data) == 34 && data[0] == 18 && data[1] == 32 {
-		h, err := mh.Cast(data)
-		if err != nil {
-			return nil, err
-		}
-
-		return &Cid{
-			codec:   Protobuf,
-			version: 0,
-			hash:    h,
-		}, nil
+// Extract the encoding from a Cid.  If Decode on the same string did
+// not return an error neither will this function.
+func ExtractEncoding(v string) (mbase.Encoding, error) {
+	if len(v) < 2 {
+		return -1, ErrCidTooShort
 	}

-	vers, n := binary.Uvarint(data)
-	if vers != 0 && vers != 1 {
-		return nil, fmt.Errorf("invalid cid version number: %d", vers)
+	if len(v) == 46 && v[:2] == "Qm" {
+		return mbase.Base58BTC, nil
 	}

-	codec, cn := binary.Uvarint(data[n:])
+	encoding := mbase.Encoding(v[0])

-	rest := data[n+cn:]
-	h, err := mh.Cast(rest)
+	// check encoding is valid
+	_, err := mbase.NewEncoder(encoding)
 	if err != nil {
-		return nil, err
+		return -1, err
 	}

-	return &Cid{
-		version: vers,
-		codec:   codec,
-		hash:    h,
-	}, nil
+	return encoding, nil
 }

-func (c *Cid) Type() uint64 {
-	return c.codec
+// Cast takes a Cid data slice, parses it and returns a Cid.
+// For CidV1, the data buffer is in the form:
+//
+//     <version><codec-type><multihash>
+//
+// CidV0 are also supported. In particular, data buffers starting
+// with length 34 bytes, which starts with bytes [18,32...] are considered
+// binary multihashes.
+//
+// Please use decode when parsing a regular Cid string, as Cast does not
+// expect multibase-encoded data. Cast accepts the output of Cid.Bytes().
+func Cast(data []byte) (Cid, error) {
+	nr, c, err := CidFromBytes(data)
+	if err != nil {
+		return Undef, err
+	}
+
+	if nr != len(data) {
+		return Undef, fmt.Errorf("trailing bytes in data buffer passed to cid Cast")
+	}
+
+	return c, nil
 }

-func (c *Cid) String() string {
-	switch c.version {
+// UnmarshalBinary is equivalent to Cast(). It implements the
+// encoding.BinaryUnmarshaler interface.
+func (c *Cid) UnmarshalBinary(data []byte) error {
+	casted, err := Cast(data)
+	if err != nil {
+		return err
+	}
+	c.str = casted.str
+	return nil
+}
+
+// UnmarshalText is equivalent to Decode(). It implements the
+// encoding.TextUnmarshaler interface.
+func (c *Cid) UnmarshalText(text []byte) error {
+	decodedCid, err := Decode(string(text))
+	if err != nil {
+		return err
+	}
+	c.str = decodedCid.str
+	return nil
+}
+
+// Version returns the Cid version.
+func (c Cid) Version() uint64 {
+	if len(c.str) == 34 && c.str[0] == 18 && c.str[1] == 32 {
+		return 0
+	}
+	return 1
+}
+
+// Type returns the multicodec-packed content type of a Cid.
+func (c Cid) Type() uint64 {
+	if c.Version() == 0 {
+		return DagProtobuf
+	}
+	_, n, _ := uvarint(c.str)
+	codec, _, _ := uvarint(c.str[n:])
+	return codec
+}
+
+// String returns the default string representation of a
+// Cid. Currently, Base32 is used for CIDV1 as the encoding for the
+// multibase string, Base58 is used for CIDV0.
+func (c Cid) String() string {
+	switch c.Version() {
 	case 0:
-		return c.hash.B58String()
+		return c.Hash().B58String()
 	case 1:
-		mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1())
+		mbstr, err := mbase.Encode(mbase.Base32, c.Bytes())
 		if err != nil {
 			panic("should not error with hardcoded mbase: " + err.Error())
 		}
@@ -112,59 +362,299 @@ func (c *Cid) String() string {
 	}
 }

-func (c *Cid) Hash() mh.Multihash {
-	return c.hash
-}
-
-func (c *Cid) Bytes() []byte {
-	switch c.version {
+// String returns the string representation of a Cid
+// encoded is selected base
+func (c Cid) StringOfBase(base mbase.Encoding) (string, error) {
+	switch c.Version() {
 	case 0:
-		return c.bytesV0()
+		if base != mbase.Base58BTC {
+			return "", ErrInvalidEncoding
+		}
+		return c.Hash().B58String(), nil
 	case 1:
-		return c.bytesV1()
+		return mbase.Encode(base, c.Bytes())
 	default:
 		panic("not possible to reach this point")
 	}
 }

-func (c *Cid) bytesV0() []byte {
-	return []byte(c.hash)
+// Encode return the string representation of a Cid in a given base
+// when applicable.  Version 0 Cid's are always in Base58 as they do
+// not take a multibase prefix.
+func (c Cid) Encode(base mbase.Encoder) string {
+	switch c.Version() {
+	case 0:
+		return c.Hash().B58String()
+	case 1:
+		return base.Encode(c.Bytes())
+	default:
+		panic("not possible to reach this point")
+	}
 }

-func (c *Cid) bytesV1() []byte {
-	buf := make([]byte, 8+len(c.hash))
-	n := binary.PutUvarint(buf, c.version)
-	n += binary.PutUvarint(buf[n:], c.codec)
-	copy(buf[n:], c.hash)
+// Hash returns the multihash contained by a Cid.
+func (c Cid) Hash() mh.Multihash {
+	bytes := c.Bytes()

-	return buf[:n+len(c.hash)]
+	if c.Version() == 0 {
+		return mh.Multihash(bytes)
+	}
+
+	// skip version length
+	_, n1, _ := varint.FromUvarint(bytes)
+	// skip codec length
+	_, n2, _ := varint.FromUvarint(bytes[n1:])
+
+	return mh.Multihash(bytes[n1+n2:])
 }

-func (c *Cid) Equals(o *Cid) bool {
-	return c.codec == o.codec &&
-		c.version == o.version &&
-		bytes.Equal(c.hash, o.hash)
+// Bytes returns the byte representation of a Cid.
+// The output of bytes can be parsed back into a Cid
+// with Cast().
+func (c Cid) Bytes() []byte {
+	return []byte(c.str)
 }

+// ByteLen returns the length of the CID in bytes.
+// It's equivalent to `len(c.Bytes())`, but works without an allocation,
+// and should therefore be preferred.
+//
+// (See also the WriteTo method for other important operations that work without allocation.)
+func (c Cid) ByteLen() int {
+	return len(c.str)
+}
+
+// WriteBytes writes the CID bytes to the given writer.
+// This method works without incurring any allocation.
+//
+// (See also the ByteLen method for other important operations that work without allocation.)
+func (c Cid) WriteBytes(w io.Writer) (int, error) {
+	n, err := io.WriteString(w, c.str)
+	if err != nil {
+		return n, err
+	}
+	if n != len(c.str) {
+		return n, fmt.Errorf("failed to write entire cid string")
+	}
+	return n, nil
+}
+
+// MarshalBinary is equivalent to Bytes(). It implements the
+// encoding.BinaryMarshaler interface.
+func (c Cid) MarshalBinary() ([]byte, error) {
+	return c.Bytes(), nil
+}
+
+// MarshalText is equivalent to String(). It implements the
+// encoding.TextMarshaler interface.
+func (c Cid) MarshalText() ([]byte, error) {
+	return []byte(c.String()), nil
+}
+
+// Equals checks that two Cids are the same.
+// In order for two Cids to be considered equal, the
+// Version, the Codec and the Multihash must match.
+func (c Cid) Equals(o Cid) bool {
+	return c == o
+}
+
+// UnmarshalJSON parses the JSON representation of a Cid.
 func (c *Cid) UnmarshalJSON(b []byte) error {
 	if len(b) < 2 {
 		return fmt.Errorf("invalid cid json blob")
 	}
-	out, err := Decode(string(b[1 : len(b)-1]))
+	obj := struct {
+		CidTarget string `json:"/"`
+	}{}
+	objptr := &obj
+	err := json.Unmarshal(b, &objptr)
+	if err != nil {
+		return err
+	}
+	if objptr == nil {
+		*c = Cid{}
+		return nil
+	}
+
+	if obj.CidTarget == "" {
+		return fmt.Errorf("cid was incorrectly formatted")
+	}
+
+	out, err := Decode(obj.CidTarget)
 	if err != nil {
 		return err
 	}

-	c.version = out.version
-	c.hash = out.hash
-	c.codec = out.codec
+	*c = out
+
 	return nil
 }

-func (c *Cid) MarshalJSON() ([]byte, error) {
-	return []byte(fmt.Sprintf("\"%s\"", c.String())), nil
+// MarshalJSON procudes a JSON representation of a Cid, which looks as follows:
+//
+//    { "/": "<cid-string>" }
+//
+// Note that this formatting comes from the IPLD specification
+// (https://github.com/ipld/specs/tree/master/ipld)
+func (c Cid) MarshalJSON() ([]byte, error) {
+	if !c.Defined() {
+		return []byte("null"), nil
+	}
+	return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil
 }

-func (c *Cid) KeyString() string {
-	return string(c.Bytes())
+// KeyString returns the binary representation of the Cid as a string
+func (c Cid) KeyString() string {
+	return c.str
+}
+
+// Loggable returns a Loggable (as defined by
+// https://godoc.org/github.com/ipfs/go-log).
+func (c Cid) Loggable() map[string]interface{} {
+	return map[string]interface{}{
+		"cid": c,
+	}
+}
+
+// Prefix builds and returns a Prefix out of a Cid.
+func (c Cid) Prefix() Prefix {
+	dec, _ := mh.Decode(c.Hash()) // assuming we got a valid multiaddr, this will not error
+	return Prefix{
+		MhType:   dec.Code,
+		MhLength: dec.Length,
+		Version:  c.Version(),
+		Codec:    c.Type(),
+	}
+}
+
+// Prefix represents all the metadata of a Cid,
+// that is, the Version, the Codec, the Multihash type
+// and the Multihash length. It does not contains
+// any actual content information.
+// NOTE: The use -1 in MhLength to mean default length is deprecated,
+//   use the V0Builder or V1Builder structures instead
+type Prefix struct {
+	Version  uint64
+	Codec    uint64
+	MhType   uint64
+	MhLength int
+}
+
+// Sum uses the information in a prefix to perform a multihash.Sum()
+// and return a newly constructed Cid with the resulting multihash.
+func (p Prefix) Sum(data []byte) (Cid, error) {
+	length := p.MhLength
+	if p.MhType == mh.ID {
+		length = -1
+	}
+
+	if p.Version == 0 && (p.MhType != mh.SHA2_256 ||
+		(p.MhLength != 32 && p.MhLength != -1)) {
+
+		return Undef, fmt.Errorf("invalid v0 prefix")
+	}
+
+	hash, err := mh.Sum(data, p.MhType, length)
+	if err != nil {
+		return Undef, err
+	}
+
+	switch p.Version {
+	case 0:
+		return NewCidV0(hash), nil
+	case 1:
+		return NewCidV1(p.Codec, hash), nil
+	default:
+		return Undef, fmt.Errorf("invalid cid version")
+	}
+}
+
+// Bytes returns a byte representation of a Prefix. It looks like:
+//
+//     <version><codec><mh-type><mh-length>
+func (p Prefix) Bytes() []byte {
+	size := varint.UvarintSize(p.Version)
+	size += varint.UvarintSize(p.Codec)
+	size += varint.UvarintSize(p.MhType)
+	size += varint.UvarintSize(uint64(p.MhLength))
+
+	buf := make([]byte, size)
+	n := varint.PutUvarint(buf, p.Version)
+	n += varint.PutUvarint(buf[n:], p.Codec)
+	n += varint.PutUvarint(buf[n:], p.MhType)
+	n += varint.PutUvarint(buf[n:], uint64(p.MhLength))
+	if n != size {
+		panic("size mismatch")
+	}
+	return buf
+}
+
+// PrefixFromBytes parses a Prefix-byte representation onto a
+// Prefix.
+func PrefixFromBytes(buf []byte) (Prefix, error) {
+	r := bytes.NewReader(buf)
+	vers, err := varint.ReadUvarint(r)
+	if err != nil {
+		return Prefix{}, err
+	}
+
+	codec, err := varint.ReadUvarint(r)
+	if err != nil {
+		return Prefix{}, err
+	}
+
+	mhtype, err := varint.ReadUvarint(r)
+	if err != nil {
+		return Prefix{}, err
+	}
+
+	mhlen, err := varint.ReadUvarint(r)
+	if err != nil {
+		return Prefix{}, err
+	}
+
+	return Prefix{
+		Version:  vers,
+		Codec:    codec,
+		MhType:   mhtype,
+		MhLength: int(mhlen),
+	}, nil
+}
+
+func CidFromBytes(data []byte) (int, Cid, error) {
+	if len(data) > 2 && data[0] == mh.SHA2_256 && data[1] == 32 {
+		if len(data) < 34 {
+			return 0, Undef, fmt.Errorf("not enough bytes for cid v0")
+		}
+
+		h, err := mh.Cast(data[:34])
+		if err != nil {
+			return 0, Undef, err
+		}
+
+		return 34, Cid{string(h)}, nil
+	}
+
+	vers, n, err := varint.FromUvarint(data)
+	if err != nil {
+		return 0, Undef, err
+	}
+
+	if vers != 1 {
+		return 0, Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers)
+	}
+
+	_, cn, err := varint.FromUvarint(data[n:])
+	if err != nil {
+		return 0, Undef, err
+	}
+
+	mhnr, _, err := mh.MHFromBytes(data[n+cn:])
+	if err != nil {
+		return 0, Undef, err
+	}
+
+	l := n + cn + mhnr
+
+	return l, Cid{string(data[0:l])}, nil
 }
--- a/cid_fuzz.go
+++ b/cid_fuzz.go
@@ -0,0 +1,37 @@
+// +build gofuzz
+
+package cid
+
+func Fuzz(data []byte) int {
+	cid, err := Cast(data)
+
+	if err != nil {
+		return 0
+	}
+
+	_ = cid.Bytes()
+	_ = cid.String()
+	p := cid.Prefix()
+	_ = p.Bytes()
+
+	if !cid.Equals(cid) {
+		panic("inequality")
+	}
+
+	// json loop
+	json, err := cid.MarshalJSON()
+	if err != nil {
+		panic(err.Error())
+	}
+	cid2 := Cid{}
+	err = cid2.UnmarshalJSON(json)
+	if err != nil {
+		panic(err.Error())
+	}
+
+	if !cid.Equals(cid2) {
+		panic("json loop not equal")
+	}
+
+	return 1
+}
--- a/cid_test.go
+++ b/cid_test.go
@@ -2,36 +2,115 @@ package cid

 import (
 	"bytes"
+	"encoding/json"
+	"fmt"
+	"math/rand"
+	"strings"
 	"testing"

-	mh "github.com/jbenet/go-multihash"
+	mbase "github.com/multiformats/go-multibase"
+	mh "github.com/multiformats/go-multihash"
 )

-func assertEqual(t *testing.T, a, b *Cid) {
-	if a.codec != b.codec {
+// Copying the "silly test" idea from
+// https://github.com/multiformats/go-multihash/blob/7aa9f26a231c6f34f4e9fad52bf580fd36627285/multihash_test.go#L13
+// Makes it so changing the table accidentally has to happen twice.
+var tCodecs = map[uint64]string{
+	Raw:                   "raw",
+	DagProtobuf:           "protobuf",
+	DagCBOR:               "cbor",
+	Libp2pKey:             "libp2p-key",
+	GitRaw:                "git-raw",
+	EthBlock:              "eth-block",
+	EthBlockList:          "eth-block-list",
+	EthTxTrie:             "eth-tx-trie",
+	EthTx:                 "eth-tx",
+	EthTxReceiptTrie:      "eth-tx-receipt-trie",
+	EthTxReceipt:          "eth-tx-receipt",
+	EthStateTrie:          "eth-state-trie",
+	EthAccountSnapshot:    "eth-account-snapshot",
+	EthStorageTrie:        "eth-storage-trie",
+	BitcoinBlock:          "bitcoin-block",
+	BitcoinTx:             "bitcoin-tx",
+	ZcashBlock:            "zcash-block",
+	ZcashTx:               "zcash-tx",
+	DecredBlock:           "decred-block",
+	DecredTx:              "decred-tx",
+	DashBlock:             "dash-block",
+	DashTx:                "dash-tx",
+	FilCommitmentUnsealed: "fil-commitment-unsealed",
+	FilCommitmentSealed:   "fil-commitment-sealed",
+}
+
+func assertEqual(t *testing.T, a, b Cid) {
+	if a.Type() != b.Type() {
 		t.Fatal("mismatch on type")
 	}

-	if a.version != b.version {
+	if a.Version() != b.Version() {
 		t.Fatal("mismatch on version")
 	}

-	if !bytes.Equal(a.hash, b.hash) {
+	if !bytes.Equal(a.Hash(), b.Hash()) {
 		t.Fatal("multihash mismatch")
 	}
 }

+func TestTable(t *testing.T) {
+	if len(tCodecs) != len(Codecs)-1 {
+		t.Errorf("Item count mismatch in the Table of Codec. Should be %d, got %d", len(tCodecs)+1, len(Codecs))
+	}
+
+	for k, v := range tCodecs {
+		if Codecs[v] != k {
+			t.Errorf("Table mismatch: 0x%x %s", k, v)
+		}
+	}
+}
+
+// The table returns cid.DagProtobuf for "v0"
+// so we test it apart
+func TestTableForV0(t *testing.T) {
+	if Codecs["v0"] != DagProtobuf {
+		t.Error("Table mismatch: Codecs[\"v0\"] should resolve to DagProtobuf (0x70)")
+	}
+}
+
+func TestPrefixSum(t *testing.T) {
+	// Test creating CIDs both manually and with Prefix.
+	// Tests: https://github.com/ipfs/go-cid/issues/83
+	for _, hashfun := range []uint64{
+		mh.ID, mh.SHA3, mh.SHA2_256,
+	} {
+		h1, err := mh.Sum([]byte("TEST"), hashfun, -1)
+		if err != nil {
+			t.Fatal(err)
+		}
+		c1 := NewCidV1(Raw, h1)
+
+		h2, err := mh.Sum([]byte("foobar"), hashfun, -1)
+		if err != nil {
+			t.Fatal(err)
+		}
+		c2 := NewCidV1(Raw, h2)
+
+		c3, err := c1.Prefix().Sum([]byte("foobar"))
+		if err != nil {
+			t.Fatal(err)
+		}
+		if !c2.Equals(c3) {
+			t.Fatal("expected CIDs to be equal")
+		}
+	}
+}
+
 func TestBasicMarshaling(t *testing.T) {
 	h, err := mh.Sum([]byte("TEST"), mh.SHA3, 4)
 	if err != nil {
 		t.Fatal(err)
 	}

-	cid := &Cid{
-		codec:   7,
-		version: 1,
-		hash:    h,
-	}
+	cid := NewCidV1(7, h)

 	data := cid.Bytes()

@@ -51,6 +130,110 @@ func TestBasicMarshaling(t *testing.T) {
 	assertEqual(t, cid, out2)
 }

+func TestBasesMarshaling(t *testing.T) {
+	h, err := mh.Sum([]byte("TEST"), mh.SHA3, 4)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	cid := NewCidV1(7, h)
+
+	data := cid.Bytes()
+
+	out, err := Cast(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	assertEqual(t, cid, out)
+
+	testBases := []mbase.Encoding{
+		mbase.Base16,
+		mbase.Base32,
+		mbase.Base32hex,
+		mbase.Base32pad,
+		mbase.Base32hexPad,
+		mbase.Base58BTC,
+		mbase.Base58Flickr,
+		mbase.Base64pad,
+		mbase.Base64urlPad,
+		mbase.Base64url,
+		mbase.Base64,
+	}
+
+	for _, b := range testBases {
+		s, err := cid.StringOfBase(b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if s[0] != byte(b) {
+			t.Fatal("Invalid multibase header")
+		}
+
+		out2, err := Decode(s)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		assertEqual(t, cid, out2)
+
+		encoder, err := mbase.NewEncoder(b)
+		if err != nil {
+			t.Fatal(err)
+		}
+		s2 := cid.Encode(encoder)
+		if s != s2 {
+			t.Fatalf("'%s' != '%s'", s, s2)
+		}
+	}
+}
+
+func TestBinaryMarshaling(t *testing.T) {
+	data := []byte("this is some test content")
+	hash, _ := mh.Sum(data, mh.SHA2_256, -1)
+	c := NewCidV1(DagCBOR, hash)
+	var c2 Cid
+
+	data, err := c.MarshalBinary()
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = c2.UnmarshalBinary(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !c.Equals(c2) {
+		t.Errorf("cids should be the same: %s %s", c, c2)
+	}
+}
+
+func TestTextMarshaling(t *testing.T) {
+	data := []byte("this is some test content")
+	hash, _ := mh.Sum(data, mh.SHA2_256, -1)
+	c := NewCidV1(DagCBOR, hash)
+	var c2 Cid
+
+	data, err := c.MarshalText()
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = c2.UnmarshalText(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !c.Equals(c2) {
+		t.Errorf("cids should be the same: %s %s", c, c2)
+	}
+}
+
+func TestEmptyString(t *testing.T) {
+	_, err := Decode("")
+	if err == nil {
+		t.Fatal("shouldnt be able to parse an empty cid")
+	}
+}
+
 func TestV0Handling(t *testing.T) {
 	old := "QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n"

@@ -59,17 +242,33 @@ func TestV0Handling(t *testing.T) {
 		t.Fatal(err)
 	}

-	if cid.version != 0 {
+	if cid.Version() != 0 {
 		t.Fatal("should have gotten version 0 cid")
 	}

-	if cid.hash.B58String() != old {
-		t.Fatal("marshaling roundtrip failed")
+	if cid.Hash().B58String() != old {
+		t.Fatalf("marshaling roundtrip failed: %s != %s", cid.Hash().B58String(), old)
 	}

 	if cid.String() != old {
 		t.Fatal("marshaling roundtrip failed")
 	}
+
+	new, err := cid.StringOfBase(mbase.Base58BTC)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if new != old {
+		t.Fatal("StringOfBase roundtrip failed")
+	}
+
+	encoder, err := mbase.NewEncoder(mbase.Base58BTC)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if cid.Encode(encoder) != old {
+		t.Fatal("Encode roundtrip failed")
+	}
 }

 func TestV0ErrorCases(t *testing.T) {
@@ -79,3 +278,323 @@ func TestV0ErrorCases(t *testing.T) {
 		t.Fatal("should have failed to decode that ref")
 	}
 }
+
+func TestNewPrefixV1(t *testing.T) {
+	data := []byte("this is some test content")
+
+	// Construct c1
+	prefix := NewPrefixV1(DagCBOR, mh.SHA2_256)
+	c1, err := prefix.Sum(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if c1.Prefix() != prefix {
+		t.Fatal("prefix not preserved")
+	}
+
+	// Construct c2
+	hash, err := mh.Sum(data, mh.SHA2_256, -1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	c2 := NewCidV1(DagCBOR, hash)
+
+	if !c1.Equals(c2) {
+		t.Fatal("cids mismatch")
+	}
+	if c1.Prefix() != c2.Prefix() {
+		t.Fatal("prefixes mismatch")
+	}
+}
+
+func TestNewPrefixV0(t *testing.T) {
+	data := []byte("this is some test content")
+
+	// Construct c1
+	prefix := NewPrefixV0(mh.SHA2_256)
+	c1, err := prefix.Sum(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if c1.Prefix() != prefix {
+		t.Fatal("prefix not preserved")
+	}
+
+	// Construct c2
+	hash, err := mh.Sum(data, mh.SHA2_256, -1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	c2 := NewCidV0(hash)
+
+	if !c1.Equals(c2) {
+		t.Fatal("cids mismatch")
+	}
+	if c1.Prefix() != c2.Prefix() {
+		t.Fatal("prefixes mismatch")
+	}
+
+}
+
+func TestInvalidV0Prefix(t *testing.T) {
+	tests := []Prefix{
+		{
+			MhType:   mh.SHA2_256,
+			MhLength: 31,
+		},
+		{
+			MhType:   mh.SHA2_256,
+			MhLength: 33,
+		},
+		{
+			MhType:   mh.SHA2_256,
+			MhLength: -2,
+		},
+		{
+			MhType:   mh.SHA2_512,
+			MhLength: 32,
+		},
+		{
+			MhType:   mh.SHA2_512,
+			MhLength: -1,
+		},
+	}
+
+	for i, p := range tests {
+		t.Log(i)
+		_, err := p.Sum([]byte("testdata"))
+		if err == nil {
+			t.Fatalf("should error (index %d)", i)
+		}
+	}
+
+}
+
+func TestPrefixRoundtrip(t *testing.T) {
+	data := []byte("this is some test content")
+	hash, _ := mh.Sum(data, mh.SHA2_256, -1)
+	c := NewCidV1(DagCBOR, hash)
+
+	pref := c.Prefix()
+
+	c2, err := pref.Sum(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if !c.Equals(c2) {
+		t.Fatal("output didnt match original")
+	}
+
+	pb := pref.Bytes()
+
+	pref2, err := PrefixFromBytes(pb)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if pref.Version != pref2.Version || pref.Codec != pref2.Codec ||
+		pref.MhType != pref2.MhType || pref.MhLength != pref2.MhLength {
+		t.Fatal("input prefix didnt match output")
+	}
+}
+
+func Test16BytesVarint(t *testing.T) {
+	data := []byte("this is some test content")
+	hash, _ := mh.Sum(data, mh.SHA2_256, -1)
+	c := NewCidV1(1<<63, hash)
+	_ = c.Bytes()
+}
+
+func TestFuzzCid(t *testing.T) {
+	buf := make([]byte, 128)
+	for i := 0; i < 200; i++ {
+		s := rand.Intn(128)
+		rand.Read(buf[:s])
+		_, _ = Cast(buf[:s])
+	}
+}
+
+func TestParse(t *testing.T) {
+	cid, err := Parse(123)
+	if err == nil {
+		t.Fatalf("expected error from Parse()")
+	}
+	if !strings.Contains(err.Error(), "can't parse 123 as Cid") {
+		t.Fatalf("expected int error, got %s", err.Error())
+	}
+
+	theHash := "QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n"
+	h, err := mh.FromB58String(theHash)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	assertions := [][]interface{}{
+		[]interface{}{NewCidV0(h), theHash},
+		[]interface{}{NewCidV0(h).Bytes(), theHash},
+		[]interface{}{h, theHash},
+		[]interface{}{theHash, theHash},
+		[]interface{}{"/ipfs/" + theHash, theHash},
+		[]interface{}{"https://ipfs.io/ipfs/" + theHash, theHash},
+		[]interface{}{"http://localhost:8080/ipfs/" + theHash, theHash},
+	}
+
+	assert := func(arg interface{}, expected string) error {
+		cid, err = Parse(arg)
+		if err != nil {
+			return err
+		}
+		if cid.Version() != 0 {
+			return fmt.Errorf("expected version 0, got %s", string(cid.Version()))
+		}
+		actual := cid.Hash().B58String()
+		if actual != expected {
+			return fmt.Errorf("expected hash %s, got %s", expected, actual)
+		}
+		actual = cid.String()
+		if actual != expected {
+			return fmt.Errorf("expected string %s, got %s", expected, actual)
+		}
+		return nil
+	}
+
+	for _, args := range assertions {
+		err := assert(args[0], args[1].(string))
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+func TestHexDecode(t *testing.T) {
+	hexcid := "f015512209d8453505bdc6f269678e16b3e56c2a2948a41f2c792617cc9611ed363c95b63"
+	c, err := Decode(hexcid)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if c.String() != "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm" {
+		t.Fatal("hash value failed to round trip decoding from hex")
+	}
+}
+
+func ExampleDecode() {
+	encoded := "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm"
+	c, err := Decode(encoded)
+	if err != nil {
+		fmt.Printf("Error: %s", err)
+		return
+	}
+
+	fmt.Println(c)
+	// Output: bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm
+}
+
+func TestFromJson(t *testing.T) {
+	cval := "bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm"
+	jsoncid := []byte(`{"/":"` + cval + `"}`)
+	var c Cid
+	err := json.Unmarshal(jsoncid, &c)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if c.String() != cval {
+		t.Fatal("json parsing failed")
+	}
+}
+
+func TestJsonRoundTrip(t *testing.T) {
+	exp, err := Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify it works for a *Cid.
+	enc, err := json.Marshal(exp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var actual Cid
+	err = json.Unmarshal(enc, &actual)
+	if !exp.Equals(actual) {
+		t.Fatal("cids not equal for *Cid")
+	}
+
+	// Verify it works for a Cid.
+	enc, err = json.Marshal(exp)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var actual2 Cid
+	err = json.Unmarshal(enc, &actual2)
+	if !exp.Equals(actual2) {
+		t.Fatal("cids not equal for Cid")
+	}
+}
+
+func BenchmarkStringV1(b *testing.B) {
+	data := []byte("this is some test content")
+	hash, _ := mh.Sum(data, mh.SHA2_256, -1)
+	cid := NewCidV1(Raw, hash)
+
+	b.ReportAllocs()
+	b.ResetTimer()
+
+	count := 0
+	for i := 0; i < b.N; i++ {
+		count += len(cid.String())
+	}
+	if count != 49*b.N {
+		b.FailNow()
+	}
+}
+
+func TestReadCidsFromBuffer(t *testing.T) {
+	cidstr := []string{
+		"bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm",
+		"k2cwueckqkibutvhkr4p2ln2pjcaxaakpd9db0e7j7ax1lxhhxy3ekpv",
+		"Qmf5Qzp6nGBku7CEn2UQx4mgN8TW69YUok36DrGa6NN893",
+		"zb2rhZi1JR4eNc2jBGaRYJKYM8JEB4ovenym8L1CmFsRAytkz",
+	}
+
+	var cids []Cid
+	var buf []byte
+	for _, cs := range cidstr {
+		c, err := Decode(cs)
+		if err != nil {
+			t.Fatal(err)
+		}
+		cids = append(cids, c)
+		buf = append(buf, c.Bytes()...)
+	}
+
+	var cur int
+	for _, expc := range cids {
+		n, c, err := CidFromBytes(buf[cur:])
+		if err != nil {
+			t.Fatal(err)
+		}
+		if c != expc {
+			t.Fatal("cids mismatched")
+		}
+		cur += n
+	}
+	if cur != len(buf) {
+		t.Fatal("had trailing bytes")
+	}
+}
+
+func TestBadParse(t *testing.T) {
+	hash, err := mh.Sum([]byte("foobar"), mh.SHA3_256, -1)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = Parse(hash)
+	if err == nil {
+		t.Fatal("expected to fail to parse an invalid CIDv1 CID")
+	}
+}
--- a/codecov.yml
+++ b/codecov.yml
@@ -0,0 +1,3 @@
+coverage:
+  range: "50...100"
+comment: off
--- a/deprecated.go
+++ b/deprecated.go
@@ -0,0 +1,28 @@
+package cid
+
+import (
+	mh "github.com/multiformats/go-multihash"
+)
+
+// NewPrefixV0 returns a CIDv0 prefix with the specified multihash type.
+// DEPRECATED: Use V0Builder
+func NewPrefixV0(mhType uint64) Prefix {
+	return Prefix{
+		MhType:   mhType,
+		MhLength: mh.DefaultLengths[mhType],
+		Version:  0,
+		Codec:    DagProtobuf,
+	}
+}
+
+// NewPrefixV1 returns a CIDv1 prefix with the specified codec and multihash
+// type.
+// DEPRECATED: Use V1Builder
+func NewPrefixV1(codecType uint64, mhType uint64) Prefix {
+	return Prefix{
+		MhType:   mhType,
+		MhLength: mh.DefaultLengths[mhType],
+		Version:  1,
+		Codec:    codecType,
+	}
+}
--- a/fuzz-data/corpus/cid0
+++ b/fuzz-data/corpus/cid0
@@ -0,0 +1 @@
+ ëgáD1üüÊe<C38A>-D˜/¹q3ø~å(Ä7`8–<38>‡n
--- a/fuzz-data/corpus/cid1
+++ b/fuzz-data/corpus/cid1
@@ -0,0 +1 @@
+q -[<5B>ï<EFBFBD>h<EFBFBD>[<5B><10><>
--- a/go.mod
+++ b/go.mod
@@ -0,0 +1,9 @@
+module github.com/ipfs/go-cid
+
+require (
+	github.com/multiformats/go-multibase v0.0.3
+	github.com/multiformats/go-multihash v0.0.13
+	github.com/multiformats/go-varint v0.0.5
+)
+
+go 1.13
--- a/go.sum
+++ b/go.sum
@@ -0,0 +1,28 @@
+github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g=
+github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ=
+github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771 h1:MHkK1uRtFbVqvAgvWxafZe54+5uBxLluGylDiKgdhwo=
+github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM=
+github.com/mr-tron/base58 v1.1.0 h1:Y51FGVJ91WBqCEabAi5OPUz38eAx8DakuAm5svLcsfQ=
+github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8=
+github.com/mr-tron/base58 v1.1.3 h1:v+sk57XuaCKGXpWtVBX8YJzO7hMGx4Aajh4TQbdEFdc=
+github.com/mr-tron/base58 v1.1.3/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
+github.com/multiformats/go-base32 v0.0.3 h1:tw5+NhuwaOjJCC5Pp82QuXbrmLzWg7uxlMFp8Nq/kkI=
+github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA=
+github.com/multiformats/go-base36 v0.1.0 h1:JR6TyF7JjGd3m6FbLU2cOxhC0Li8z8dLNGQ89tUg4F4=
+github.com/multiformats/go-base36 v0.1.0/go.mod h1:kFGE83c6s80PklsHO9sRn2NCoffoRdUUOENyW/Vv6sM=
+github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk=
+github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc=
+github.com/multiformats/go-multihash v0.0.13 h1:06x+mk/zj1FoMsgNejLpy6QTvJqlSt/BhLEy87zidlc=
+github.com/multiformats/go-multihash v0.0.13/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc=
+github.com/multiformats/go-varint v0.0.5 h1:XVZwSo04Cs3j/jS0uAEPpT3JY6DzMcVLLoWOSnCxOjg=
+github.com/multiformats/go-varint v0.0.5/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
+github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
+github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8 h1:1wopBVtVdWnn03fZelqdXTqk7U7zPQCb+T4rbU9ZEoU=
+golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
--- a/package.json
+++ b/package.json
@@ -1,30 +0,0 @@
-{
-  "author": "whyrusleeping",
-  "bugs": {
-    "url": "https://github.com/ipfs/go-cid"
-  },
-  "gx": {
-    "dvcsimport": "github.com/ipfs/go-cid"
-  },
-  "gxDependencies": [
-    {
-      "author": "whyrusleeping",
-      "hash": "QmYf7ng2hG5XBtJA3tN34DQ2GUN5HNksEw1rLDkmr6vGku",
-      "name": "go-multihash",
-      "version": "0.0.0"
-    },
-    {
-      "author": "whyrusleeping",
-      "hash": "QmYiTi9mKBMjfiup7na7PhJK7QEZPdMTJenLdgFYVQ2NUv",
-      "name": "go-multibase",
-      "version": "0.2.0"
-    }
-  ],
-  "gxVersion": "0.8.0",
-  "language": "go",
-  "license": "",
-  "name": "go-cid",
-  "releaseCmd": "git commit -a -m \"gx publish $VERSION\"",
-  "version": "0.3.0"
-}
-
--- a/set.go
+++ b/set.go
@@ -1,40 +1,49 @@
 package cid

+// Set is a implementation of a set of Cids, that is, a structure
+// to which holds a single copy of every Cids that is added to it.
 type Set struct {
-	set map[string]struct{}
+	set map[Cid]struct{}
 }

+// NewSet initializes and returns a new Set.
 func NewSet() *Set {
-	return &Set{set: make(map[string]struct{})}
+	return &Set{set: make(map[Cid]struct{})}
 }

-func (s *Set) Add(c *Cid) {
-	s.set[string(c.Bytes())] = struct{}{}
+// Add puts a Cid in the Set.
+func (s *Set) Add(c Cid) {
+	s.set[c] = struct{}{}
 }

-func (s *Set) Has(c *Cid) bool {
-	_, ok := s.set[string(c.Bytes())]
+// Has returns if the Set contains a given Cid.
+func (s *Set) Has(c Cid) bool {
+	_, ok := s.set[c]
 	return ok
 }

-func (s *Set) Remove(c *Cid) {
-	delete(s.set, string(c.Bytes()))
+// Remove deletes a Cid from the Set.
+func (s *Set) Remove(c Cid) {
+	delete(s.set, c)
 }

+// Len returns how many elements the Set has.
 func (s *Set) Len() int {
 	return len(s.set)
 }

-func (s *Set) Keys() []*Cid {
-	var out []*Cid
-	for k, _ := range s.set {
-		c, _ := Cast([]byte(k))
-		out = append(out, c)
+// Keys returns the Cids in the set.
+func (s *Set) Keys() []Cid {
+	out := make([]Cid, 0, len(s.set))
+	for k := range s.set {
+		out = append(out, k)
 	}
 	return out
 }

-func (s *Set) Visit(c *Cid) bool {
+// Visit adds a Cid to the set only if it is
+// not in it already.
+func (s *Set) Visit(c Cid) bool {
 	if !s.Has(c) {
 		s.Add(c)
 		return true
@@ -42,3 +51,15 @@ func (s *Set) Visit(c *Cid) bool {

 	return false
 }
+
+// ForEach allows to run a custom function on each
+// Cid in the set.
+func (s *Set) ForEach(f func(c Cid) error) error {
+	for c := range s.set {
+		err := f(c)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/set_test.go
+++ b/set_test.go
@@ -0,0 +1,88 @@
+package cid
+
+import (
+	"crypto/rand"
+	"errors"
+	"testing"
+
+	mh "github.com/multiformats/go-multihash"
+)
+
+func makeRandomCid(t *testing.T) Cid {
+	p := make([]byte, 256)
+	_, err := rand.Read(p)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	h, err := mh.Sum(p, mh.SHA3, 4)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	cid := NewCidV1(7, h)
+
+	return cid
+}
+
+func TestSet(t *testing.T) {
+	cid := makeRandomCid(t)
+	cid2 := makeRandomCid(t)
+	s := NewSet()
+
+	s.Add(cid)
+
+	if !s.Has(cid) {
+		t.Error("should have the CID")
+	}
+
+	if s.Len() != 1 {
+		t.Error("should report 1 element")
+	}
+
+	keys := s.Keys()
+
+	if len(keys) != 1 || !keys[0].Equals(cid) {
+		t.Error("key should correspond to Cid")
+	}
+
+	if s.Visit(cid) {
+		t.Error("visit should return false")
+	}
+
+	foreach := []Cid{}
+	foreachF := func(c Cid) error {
+		foreach = append(foreach, c)
+		return nil
+	}
+
+	if err := s.ForEach(foreachF); err != nil {
+		t.Error(err)
+	}
+
+	if len(foreach) != 1 {
+		t.Error("ForEach should have visited 1 element")
+	}
+
+	foreachErr := func(c Cid) error {
+		return errors.New("test")
+	}
+
+	if err := s.ForEach(foreachErr); err == nil {
+		t.Error("Should have returned an error")
+	}
+
+	if !s.Visit(cid2) {
+		t.Error("should have visited a new Cid")
+	}
+
+	if s.Len() != 2 {
+		t.Error("len should be 2 now")
+	}
+
+	s.Remove(cid2)
+
+	if s.Len() != 1 {
+		t.Error("len should be 1 now")
+	}
+}
--- a/varint.go
+++ b/varint.go
@@ -0,0 +1,40 @@
+package cid
+
+import (
+	"github.com/multiformats/go-varint"
+)
+
+// Version of varint function that work with a string rather than
+// []byte to avoid unnecessary allocation
+
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license as given at https://golang.org/LICENSE
+
+// uvarint decodes a uint64 from buf and returns that value and the
+// number of characters read (> 0). If an error occurred, the value is 0
+// and the number of bytes n is <= 0 meaning:
+//
+// 	n == 0: buf too small
+// 	n  < 0: value larger than 64 bits (overflow)
+// 	        and -n is the number of bytes read
+//
+func uvarint(buf string) (uint64, int, error) {
+	var x uint64
+	var s uint
+	// we have a binary string so we can't use a range loope
+	for i := 0; i < len(buf); i++ {
+		b := buf[i]
+		if b < 0x80 {
+			if i > 9 || i == 9 && b > 1 {
+				return 0, 0, varint.ErrOverflow
+			} else if b == 0 && i > 0 {
+				return 0, 0, varint.ErrNotMinimal
+			}
+			return x | uint64(b)<<s, i + 1, nil
+		}
+		x |= uint64(b&0x7f) << s
+		s += 7
+	}
+	return 0, 0, varint.ErrUnderflow
+}
--- a/varint_test.go
+++ b/varint_test.go
@@ -0,0 +1,30 @@
+package cid
+
+import (
+	"testing"
+
+	"github.com/multiformats/go-varint"
+)
+
+func TestUvarintRoundTrip(t *testing.T) {
+	testCases := []uint64{0, 1, 2, 127, 128, 129, 255, 256, 257, 1<<63 - 1}
+	for _, tc := range testCases {
+		t.Log("testing", tc)
+		buf := make([]byte, 16)
+		varint.PutUvarint(buf, tc)
+		v, l1, err := uvarint(string(buf))
+		if err != nil {
+			t.Fatalf("%v: %s", buf, err)
+		}
+		_, l2, err := varint.FromUvarint(buf)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if tc != v {
+			t.Errorf("roundtrip failed expected %d but got %d", tc, v)
+		}
+		if l1 != l2 {
+			t.Errorf("length incorrect expected %d but got %d", l2, l1)
+		}
+	}
+}
				`@@ -1 +0,0 @@`
				`0.3.0: QmfAjb1QYA9SS9TLVJBRZXEVriGaGrRZ3vJSajhLa52aYg`
				`@@ -0,0 +1 @@`
				ëgáD1üüÊe<C38A>-D˜/¹q3ø~å(Ä7`8–<38>‡n