Compare commits

..

1 Commits

Author SHA1 Message Date
gammazero
0ca2bbaeb3 Add SumStream to read data from io.Reader
This provides a way to create a CID without having to read all data into memory before passing it into Sum.  Instead, call SumStream passing in an io.Reader.
2021-04-07 12:45:16 -07:00
12 changed files with 131 additions and 344 deletions

View File

@@ -1,51 +0,0 @@
# File managed by web3-bot. DO NOT EDIT.
# See https://github.com/protocol/.github/ for details.
# Automatically merge pull requests opened by web3-bot, as soon as (and only if) all tests pass.
# This reduces the friction associated with updating with our workflows.
on: [ pull_request ]
name: Automerge
jobs:
automerge-check:
if: github.event.pull_request.user.login == 'web3-bot'
runs-on: ubuntu-latest
outputs:
status: ${{ steps.should-automerge.outputs.status }}
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Check if we should automerge
id: should-automerge
run: |
for commit in $(git rev-list --first-parent origin/${{ github.event.pull_request.base.ref }}..${{ github.event.pull_request.head.sha }}); do
committer=$(git show --format=$'%ce' -s $commit)
echo "Committer: $committer"
if [[ "$committer" != "web3-bot@users.noreply.github.com" ]]; then
echo "Commit $commit wasn't committed by web3-bot, but by $committer."
echo "::set-output name=status::false"
exit
fi
done
echo "::set-output name=status::true"
automerge:
needs: automerge-check
runs-on: ubuntu-latest
if: ${{ needs.automerge-check.outputs.status == 'true' }}
steps:
- name: Wait on tests
uses: lewagon/wait-on-check-action@bafe56a6863672c681c3cf671f5e10b20abf2eaa # v0.2
with:
ref: ${{ github.event.pull_request.head.sha }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
wait-interval: 10
running-workflow-name: 'automerge' # the name of this job
- name: Merge PR
uses: pascalgn/automerge-action@741c311a47881be9625932b0a0de1b0937aab1ae # v0.13.1
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
MERGE_LABELS: ""
MERGE_METHOD: "squash"
MERGE_DELETE_BRANCH: true

View File

@@ -1,50 +0,0 @@
# File managed by web3-bot. DO NOT EDIT.
# See https://github.com/protocol/.github/ for details.
on: [push, pull_request]
name: Go Checks
jobs:
unit:
runs-on: ubuntu-latest
name: All
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions/setup-go@v2
with:
go-version: "1.16.x"
- name: Install staticcheck
run: go install honnef.co/go/tools/cmd/staticcheck@434f5f3816b358fe468fa83dcba62d794e7fe04b # 2021.1 (v0.2.0)
- name: Check that go.mod is tidy
uses: protocol/multiple-go-modules@v1.0
with:
run: |
go mod tidy
if [[ -n $(git ls-files --other --exclude-standard --directory -- go.sum) ]]; then
echo "go.sum was added by go mod tidy"
exit 1
fi
git diff --exit-code -- go.sum go.mod
- name: gofmt
if: ${{ success() || failure() }} # run this step even if the previous one failed
run: |
out=$(gofmt -s -l .)
if [[ -n "$out" ]]; then
echo $out | awk '{print "::error file=" $0 ",line=0,col=0::File is not gofmt-ed."}'
exit 1
fi
- name: go vet
if: ${{ success() || failure() }} # run this step even if the previous one failed
uses: protocol/multiple-go-modules@v1.0
with:
run: go vet ./...
- name: staticcheck
if: ${{ success() || failure() }} # run this step even if the previous one failed
uses: protocol/multiple-go-modules@v1.0
with:
run: |
set -o pipefail
staticcheck ./... | sed -e 's@\(.*\)\.go@./\1.go@g'

View File

@@ -1,47 +0,0 @@
# File managed by web3-bot. DO NOT EDIT.
# See https://github.com/protocol/.github/ for details.
on: [push, pull_request]
name: Go Test
jobs:
unit:
strategy:
fail-fast: false
matrix:
os: [ "ubuntu", "windows", "macos" ]
go: [ "1.15.x", "1.16.x" ]
runs-on: ${{ matrix.os }}-latest
name: ${{ matrix.os}} (go ${{ matrix.go }})
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go }}
- name: Go information
run: |
go version
go env
- name: Run tests
uses: protocol/multiple-go-modules@v1.0
with:
run: go test -v -coverprofile coverage.txt ./...
- name: Run tests (32 bit)
if: ${{ matrix.os != 'macos' }} # can't run 32 bit tests on OSX.
uses: protocol/multiple-go-modules@v1.0
env:
GOARCH: 386
with:
run: go test -v ./...
- name: Run tests with race detector
if: ${{ matrix.os == 'ubuntu' }} # speed things up. Windows and OSX VMs are slow
uses: protocol/multiple-go-modules@v1.0
with:
run: go test -v -race ./...
- name: Upload coverage to Codecov
uses: codecov/codecov-action@a1ed4b322b4b38cb846afb5a0ebfa17086917d27 # v1.5.0
with:
file: coverage.txt
env_vars: OS=${{ matrix.os }}, GO=${{ matrix.go }}

30
.travis.yml Normal file
View File

@@ -0,0 +1,30 @@
os:
- linux
language: go
go:
- 1.11.x
env:
global:
- GOTFLAGS="-race"
matrix:
- BUILD_DEPTYPE=gomod
# disable travis install
install:
- true
script:
- bash <(curl -s https://raw.githubusercontent.com/ipfs/ci-helpers/master/travis-ci/run-standard-tests.sh)
cache:
directories:
- $GOPATH/pkg/mod
- $HOME/.cache/go-build
notifications:
email: false

View File

@@ -88,7 +88,7 @@ if c1.Equals(c2) {
```go
// To check if some data matches a given cid,
// Get your CIDs prefix, and use that to sum the data in question:
other, err := c.Prefix().Sum(mydata)
other, err := c.Prefix().SumStream(myFile)
if err != nil {...}
if !c.Equals(other) {

View File

@@ -1,6 +1,8 @@
package cid
import (
"io"
mh "github.com/multiformats/go-multihash"
)
@@ -41,6 +43,14 @@ func (p V0Builder) Sum(data []byte) (Cid, error) {
return Cid{string(hash)}, nil
}
func (p V0Builder) SumStream(r io.Reader) (Cid, error) {
hash, err := mh.SumStream(r, mh.SHA2_256, -1)
if err != nil {
return Undef, err
}
return Cid{string(hash)}, nil
}
func (p V0Builder) GetCodec() uint64 {
return DagProtobuf
}
@@ -64,6 +74,18 @@ func (p V1Builder) Sum(data []byte) (Cid, error) {
return NewCidV1(p.Codec, hash), nil
}
func (p V1Builder) SumStream(r io.Reader) (Cid, error) {
mhLen := p.MhLength
if mhLen <= 0 {
mhLen = -1
}
hash, err := mh.SumStream(r, p.MhType, mhLen)
if err != nil {
return Undef, err
}
return NewCidV1(p.Codec, hash), nil
}
func (p V1Builder) GetCodec() uint64 {
return p.Codec
}

View File

@@ -1,6 +1,7 @@
package cid
import (
"bytes"
"testing"
mh "github.com/multiformats/go-multihash"
@@ -16,6 +17,15 @@ func TestV0Builder(t *testing.T) {
t.Fatal(err)
}
reader := bytes.NewReader(data)
c1a, err := format.SumStream(reader)
if err != nil {
t.Fatal(err)
}
if !c1a.Equals(c1) {
t.Fatal("Sum and SumStream create different cids")
}
// Construct c2
hash, err := mh.Sum(data, mh.SHA2_256, -1)
if err != nil {
@@ -29,6 +39,16 @@ func TestV0Builder(t *testing.T) {
if c1.Prefix() != c2.Prefix() {
t.Fatal("prefixes mismatch")
}
reader.Seek(0, 0)
hash, err = mh.SumStream(reader, mh.SHA2_256, -1)
if err != nil {
t.Fatal(err)
}
c2 = NewCidV0(hash)
if !c1.Equals(c2) {
t.Fatal("Sum and SumStream create different cids")
}
}
func TestV1Builder(t *testing.T) {
@@ -41,6 +61,15 @@ func TestV1Builder(t *testing.T) {
t.Fatal(err)
}
reader := bytes.NewReader(data)
c1a, err := format.SumStream(reader)
if err != nil {
t.Fatal(err)
}
if !c1a.Equals(c1) {
t.Fatal("Sum and SumStream create different cids")
}
// Construct c2
hash, err := mh.Sum(data, mh.SHA2_256, -1)
if err != nil {

184
cid.go
View File

@@ -185,12 +185,10 @@ func NewCidV1(codecType uint64, mhash mh.Multihash) Cid {
return Cid{string(buf[:n+hashlen])}
}
var (
_ encoding.BinaryMarshaler = Cid{}
_ encoding.BinaryUnmarshaler = (*Cid)(nil)
_ encoding.TextMarshaler = Cid{}
_ encoding.TextUnmarshaler = (*Cid)(nil)
)
var _ encoding.BinaryMarshaler = Cid{}
var _ encoding.BinaryUnmarshaler = (*Cid)(nil)
var _ encoding.TextMarshaler = Cid{}
var _ encoding.TextUnmarshaler = (*Cid)(nil)
// Cid represents a self-describing content addressed
// identifier. It is formed by a Version, a Codec (which indicates
@@ -566,7 +564,7 @@ type Prefix struct {
// and return a newly constructed Cid with the resulting multihash.
func (p Prefix) Sum(data []byte) (Cid, error) {
length := p.MhLength
if p.MhType == mh.IDENTITY {
if p.MhType == mh.ID {
length = -1
}
@@ -591,6 +589,36 @@ func (p Prefix) Sum(data []byte) (Cid, error) {
}
}
// SumStream uses the information in a prefix, and data read from a io.Reader,
// to perform a multihash.SumStream() and return a newly constructed Cid with
// the resulting multihash.
func (p Prefix) SumStream(r io.Reader) (Cid, error) {
length := p.MhLength
if p.MhType == mh.ID {
length = -1
}
if p.Version == 0 && (p.MhType != mh.SHA2_256 ||
(p.MhLength != 32 && p.MhLength != -1)) {
return Undef, fmt.Errorf("invalid v0 prefix")
}
hash, err := mh.SumStream(r, p.MhType, length)
if err != nil {
return Undef, err
}
switch p.Version {
case 0:
return NewCidV0(hash), nil
case 1:
return NewCidV1(p.Codec, hash), nil
default:
return Undef, fmt.Errorf("invalid cid version")
}
}
// Bytes returns a byte representation of a Prefix. It looks like:
//
// <version><codec><mh-type><mh-length>
@@ -680,145 +708,3 @@ func CidFromBytes(data []byte) (int, Cid, error) {
return l, Cid{string(data[0:l])}, nil
}
func toBufByteReader(r io.Reader, dst []byte) *bufByteReader {
// If the reader already implements ByteReader, use it directly.
// Otherwise, use a fallback that does 1-byte Reads.
if br, ok := r.(io.ByteReader); ok {
return &bufByteReader{direct: br, dst: dst}
}
return &bufByteReader{fallback: r, dst: dst}
}
type bufByteReader struct {
direct io.ByteReader
fallback io.Reader
consumed int
dst []byte
}
func (r *bufByteReader) ReadByte() (byte, error) {
// We still have some of the initial bytes to use.
if r.consumed < len(r.dst) {
b := r.dst[r.consumed]
r.consumed++
return b, nil
}
r.consumed++
// The underlying reader has ReadByte; use it.
if br := r.direct; br != nil {
b, err := br.ReadByte()
if err != nil {
return 0, err
}
r.dst = append(r.dst, b)
return b, nil
}
// Fall back to a one-byte Read.
var p [1]byte
if _, err := io.ReadFull(r.fallback, p[:]); err != nil {
return 0, err
}
r.dst = append(r.dst, p[0])
return p[0], nil
}
// CidFromReader reads a precise number of bytes for a CID from a given reader.
// It returns the number of bytes read, the CID, and any error encountered.
// The number of bytes read is accurate even if a non-nil error is returned.
//
// It's recommended to supply a reader that buffers and implements io.ByteReader,
// as CidFromReader has to do many single-byte reads to decode varints.
// If the argument only implements io.Reader, single-byte Read calls are used instead.
func CidFromReader(r io.Reader) (int, Cid, error) {
// 64 bytes is enough for any CIDv0,
// and it's enough for most CIDv1s in practice.
// If the digest is too long, we'll allocate more.
buf := make([]byte, 0, 64)
// We read two bytes, to tell if this is a CIDv0 or a CIDv1.
if n, err := io.ReadFull(r, buf[:2]); err != nil {
return n, Undef, err
}
buf = buf[:2]
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
if buf[0] == mh.SHA2_256 && buf[1] == 32 {
if n, err := io.ReadFull(r, buf[2:34]); err != nil {
return len(buf) + n, Undef, err
}
buf = buf[:34]
h, err := mh.Cast(buf)
if err != nil {
return len(buf), Undef, err
}
return len(buf), Cid{string(h)}, nil
}
// The varint package wants a io.ByteReader, so we must wrap our io.Reader.
// Note that we already read two bytes, so bufByteReader uses those first.
// After those two bytes, bufByteReader appends the read bytes to br.dst.
br := toBufByteReader(r, buf[:2])
vers, err := varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
if vers != 1 {
return len(br.dst), Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers)
}
// CID block encoding multicodec.
_, err = varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// We could replace most of the code below with go-multihash's ReadMultihash.
// Note that it would save code, but prevent reusing buffers.
// Plus, we already have a ByteReader now.
mhStart := len(br.dst)
// Multihash hash function code.
_, err = varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// Multihash digest length.
mhl, err := varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// Update buf's length.
// We're not reading single bytes beyond this point.
buf = br.dst
br = nil
// Multihash digest; might be too long, so allocate.
// Refuse to make large allocations to prevent OOMs due to bugs.
// TODO: reuse buf if it has enough space
const maxDigestAlloc = 32 << 20 // 32MiB
if mhl > maxDigestAlloc {
return len(buf), Undef, fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)
}
digest := make([]byte, int(mhl))
if n, err := io.ReadFull(r, digest); err != nil {
return len(buf) + n, Undef, err
}
buf = append(buf, digest...)
// This simply ensures the multihash is valid.
_, _, err = mh.MHFromBytes(buf[mhStart:])
if err != nil {
return len(buf), Undef, err
}
return len(buf), Cid{string(buf)}, nil
}

View File

@@ -4,6 +4,7 @@ package cid
func Fuzz(data []byte) int {
cid, err := Cast(data)
if err != nil {
return 0
}

View File

@@ -4,12 +4,10 @@ import (
"bytes"
"encoding/json"
"fmt"
"io"
"math/rand"
"reflect"
"strings"
"testing"
"testing/iotest"
mbase "github.com/multiformats/go-multibase"
mh "github.com/multiformats/go-multihash"
@@ -84,7 +82,7 @@ func TestPrefixSum(t *testing.T) {
// Test creating CIDs both manually and with Prefix.
// Tests: https://github.com/ipfs/go-cid/issues/83
for _, hashfun := range []uint64{
mh.IDENTITY, mh.SHA3, mh.SHA2_256,
mh.ID, mh.SHA3, mh.SHA2_256,
} {
h1, err := mh.Sum([]byte("TEST"), hashfun, -1)
if err != nil {
@@ -390,6 +388,7 @@ func TestNewPrefixV0(t *testing.T) {
if c1.Prefix() != c2.Prefix() {
t.Fatal("prefixes mismatch")
}
}
func TestInvalidV0Prefix(t *testing.T) {
@@ -513,13 +512,13 @@ func TestParse(t *testing.T) {
}
assertions := [][]interface{}{
{NewCidV0(h), theHash},
{NewCidV0(h).Bytes(), theHash},
{h, theHash},
{theHash, theHash},
{"/ipfs/" + theHash, theHash},
{"https://ipfs.io/ipfs/" + theHash, theHash},
{"http://localhost:8080/ipfs/" + theHash, theHash},
[]interface{}{NewCidV0(h), theHash},
[]interface{}{NewCidV0(h).Bytes(), theHash},
[]interface{}{h, theHash},
[]interface{}{theHash, theHash},
[]interface{}{"/ipfs/" + theHash, theHash},
[]interface{}{"https://ipfs.io/ipfs/" + theHash, theHash},
[]interface{}{"http://localhost:8080/ipfs/" + theHash, theHash},
}
assert := func(arg interface{}, expected string) error {
@@ -694,31 +693,6 @@ func TestReadCidsFromBuffer(t *testing.T) {
if cur != len(buf) {
t.Fatal("had trailing bytes")
}
// The same, but now with CidFromReader.
// In multiple forms, to catch more io interface bugs.
for _, r := range []io.Reader{
// implements io.ByteReader
bytes.NewReader(buf),
// tiny reads, no io.ByteReader
iotest.OneByteReader(bytes.NewReader(buf)),
} {
cur = 0
for _, expc := range cids {
n, c, err := CidFromReader(r)
if err != nil {
t.Fatal(err)
}
if c != expc {
t.Fatal("cids mismatched")
}
cur += n
}
if cur != len(buf) {
t.Fatal("had trailing bytes")
}
}
}
func TestBadCidFromBytes(t *testing.T) {

3
go.mod
View File

@@ -2,9 +2,8 @@ module github.com/ipfs/go-cid
require (
github.com/multiformats/go-multibase v0.0.3
github.com/multiformats/go-multihash v0.0.15
github.com/multiformats/go-multihash v0.0.16-0.20210406191223-75ae3688857d
github.com/multiformats/go-varint v0.0.6
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf // indirect
)
go 1.15

12
go.sum
View File

@@ -13,23 +13,17 @@ github.com/multiformats/go-base36 v0.1.0 h1:JR6TyF7JjGd3m6FbLU2cOxhC0Li8z8dLNGQ8
github.com/multiformats/go-base36 v0.1.0/go.mod h1:kFGE83c6s80PklsHO9sRn2NCoffoRdUUOENyW/Vv6sM=
github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk=
github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc=
github.com/multiformats/go-multihash v0.0.15 h1:hWOPdrNqDjwHDx82vsYGSDZNyktOJJ2dzZJzFkOV1jM=
github.com/multiformats/go-multihash v0.0.15/go.mod h1:D6aZrWNLFTV/ynMpKsNtB40mJzmCl4jb1alC0OvHiHg=
github.com/multiformats/go-multihash v0.0.16-0.20210406191223-75ae3688857d h1:xUl/JcLVtafS0n5X8guAExxDS/mJUGoL31p/FlNtC8o=
github.com/multiformats/go-multihash v0.0.16-0.20210406191223-75ae3688857d/go.mod h1:D6aZrWNLFTV/ynMpKsNtB40mJzmCl4jb1alC0OvHiHg=
github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY=
github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83 h1:/ZScEX8SfEmUGRHs0gxpqteO5nfNW6axyZbBdw9A12g=
golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf h1:B2n+Zi5QeYRDAEodEu72OS36gmTWjgpXr2+cWcBW90o=
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY=
golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=