Compare commits

...

9 Commits

Author SHA1 Message Date
Daniel Martí
373e25552c avoid double alloc in NewCidV1
We allocate once via "make([]byte, len)",
and again when that buffer is converted to a string.

Thankfully, since Go 1.10 we have strings.Builder,
designed specifically for this use case.

In a downstream benchmark in go-car,
which needs to reconstruct many CID values,
we see small but nice gains:

    name           old time/op    new time/op    delta
    ReadBlocks-16    1.09ms ± 4%    1.06ms ± 5%   -3.33%  (p=0.007 n=11+11)

    name           old speed      new speed      delta
    ReadBlocks-16   478MB/s ± 4%   494MB/s ± 5%   +3.46%  (p=0.007 n=11+11)

    name           old alloc/op   new alloc/op   delta
    ReadBlocks-16    1.30MB ± 0%    1.25MB ± 0%   -3.86%  (p=0.000 n=12+12)

    name           old allocs/op  new allocs/op  delta
    ReadBlocks-16     9.50k ± 0%     8.45k ± 0%  -11.05%  (p=0.000 n=12+12)
2021-09-07 16:49:03 +02:00
Steven Allen
cf76220258 Merge pull request #131 from ipfs/web3-bot/sync
sync: update CI config files
2021-08-30 08:37:22 -07:00
web3-bot
9e2855d9ff update .github/workflows/go-check.yml 2021-08-17 13:32:06 +00:00
web3-bot
b8eba8ea35 update .github/workflows/go-test.yml 2021-08-17 13:32:06 +00:00
web3-bot
2628583977 update .github/workflows/automerge.yml 2021-08-17 13:32:06 +00:00
web3-bot
6e96c56557 run gofmt -s 2021-08-17 13:32:05 +00:00
web3-bot
44cccd62db bump go.mod to Go 1.16 and run go fix 2021-08-17 13:32:05 +00:00
Daniel Martí
de6c03deae amend the CidFromReader slice extension math
The append+make slice extension idiom works, but note that append uses
the slice's length as its base. We need to append the number of bytes
required for length to reach cidLength, not the capacity.

The added test case panicked before this change, and works now:

	--- FAIL: TestReadCidsFromBuffer (0.00s)
	panic: runtime error: slice bounds out of range [:73] with capacity 64 [recovered]
		panic: runtime error: slice bounds out of range [:73] with capacity 64

	goroutine 37 [running]:
	testing.tRunner.func1.2({0x570d60, 0xc000016438})
		testing/testing.go:1203 +0x24e
	testing.tRunner.func1()
		testing/testing.go:1206 +0x218
	panic({0x570d60, 0xc000016438})
		runtime/panic.go:1038 +0x215
	github.com/ipfs/go-cid.CidFromReader({0x5b0e20, 0xc000010900})
		github.com/ipfs/go-cid/cid.go:803 +0x75f
	github.com/ipfs/go-cid.TestReadCidsFromBuffer(0xc00014ba00)
		github.com/ipfs/go-cid/cid_test.go:710 +0x625
	testing.tRunner(0xc00014ba00, 0x58af38)
		testing/testing.go:1253 +0x102
	created by testing.(*T).Run
		testing/testing.go:1300 +0x35a
	exit status 2
	FAIL	github.com/ipfs/go-cid	0.004s
2021-07-16 10:10:50 +01:00
Daniel Martí
c4c8760a80 implement CidFromReader
And reuse two CidFromBytes tests for it, which includes both CIDv0 and
CIDv1 cases as inputs, as well as some inputs that should error.

Fixes #126.
2021-07-15 01:01:22 +01:00
7 changed files with 291 additions and 61 deletions

View File

@@ -33,7 +33,9 @@ jobs:
automerge: automerge:
needs: automerge-check needs: automerge-check
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: ${{ needs.automerge-check.outputs.status == 'true' }} # The check for the user is redundant here, as this job depends on the automerge-check job,
# but it prevents this job from spinning up, just to be skipped shortly after.
if: github.event.pull_request.user.login == 'web3-bot' && needs.automerge-check.outputs.status == 'true'
steps: steps:
- name: Wait on tests - name: Wait on tests
uses: lewagon/wait-on-check-action@bafe56a6863672c681c3cf671f5e10b20abf2eaa # v0.2 uses: lewagon/wait-on-check-action@bafe56a6863672c681c3cf671f5e10b20abf2eaa # v0.2

View File

@@ -8,17 +8,28 @@ jobs:
unit: unit:
runs-on: ubuntu-latest runs-on: ubuntu-latest
name: All name: All
env:
RUNGOGENERATE: false
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
with: with:
submodules: recursive submodules: recursive
- uses: actions/setup-go@v2 - uses: actions/setup-go@v2
with: with:
go-version: "1.16.x" go-version: "1.17.x"
- name: Run repo-specific setup
uses: ./.github/actions/go-check-setup
if: hashFiles('./.github/actions/go-check-setup') != ''
- name: Read config
if: hashFiles('./.github/workflows/go-check-config.json') != ''
run: |
if jq -re .gogenerate ./.github/workflows/go-check-config.json; then
echo "RUNGOGENERATE=true" >> $GITHUB_ENV
fi
- name: Install staticcheck - name: Install staticcheck
run: go install honnef.co/go/tools/cmd/staticcheck@434f5f3816b358fe468fa83dcba62d794e7fe04b # 2021.1 (v0.2.0) run: go install honnef.co/go/tools/cmd/staticcheck@df71e5d0e0ed317ebf43e6e59cf919430fa4b8f2 # 2021.1.1 (v0.2.1)
- name: Check that go.mod is tidy - name: Check that go.mod is tidy
uses: protocol/multiple-go-modules@v1.0 uses: protocol/multiple-go-modules@v1.2
with: with:
run: | run: |
go mod tidy go mod tidy
@@ -37,14 +48,27 @@ jobs:
fi fi
- name: go vet - name: go vet
if: ${{ success() || failure() }} # run this step even if the previous one failed if: ${{ success() || failure() }} # run this step even if the previous one failed
uses: protocol/multiple-go-modules@v1.0 uses: protocol/multiple-go-modules@v1.2
with: with:
run: go vet ./... run: go vet ./...
- name: staticcheck - name: staticcheck
if: ${{ success() || failure() }} # run this step even if the previous one failed if: ${{ success() || failure() }} # run this step even if the previous one failed
uses: protocol/multiple-go-modules@v1.0 uses: protocol/multiple-go-modules@v1.2
with: with:
run: | run: |
set -o pipefail set -o pipefail
staticcheck ./... | sed -e 's@\(.*\)\.go@./\1.go@g' staticcheck ./... | sed -e 's@\(.*\)\.go@./\1.go@g'
- name: go generate
uses: protocol/multiple-go-modules@v1.2
if: (success() || failure()) && env.RUNGOGENERATE == 'true'
with:
run: |
git clean -fd # make sure there aren't untracked files / directories
go generate ./...
# check if go generate modified or added any files
if ! $(git add . && git diff-index HEAD --exit-code --quiet); then
echo "go generated caused changes to the repository:"
git status --short
exit 1
fi

View File

@@ -10,7 +10,9 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ "ubuntu", "windows", "macos" ] os: [ "ubuntu", "windows", "macos" ]
go: [ "1.15.x", "1.16.x" ] go: [ "1.16.x", "1.17.x" ]
env:
COVERAGES: ""
runs-on: ${{ matrix.os }}-latest runs-on: ${{ matrix.os }}-latest
name: ${{ matrix.os}} (go ${{ matrix.go }}) name: ${{ matrix.os}} (go ${{ matrix.go }})
steps: steps:
@@ -24,24 +26,30 @@ jobs:
run: | run: |
go version go version
go env go env
- name: Run repo-specific setup
uses: ./.github/actions/go-test-setup
if: hashFiles('./.github/actions/go-test-setup') != ''
- name: Run tests - name: Run tests
uses: protocol/multiple-go-modules@v1.0 uses: protocol/multiple-go-modules@v1.2
with: with:
run: go test -v -coverprofile coverage.txt ./... run: go test -v -coverprofile module-coverage.txt ./...
- name: Run tests (32 bit) - name: Run tests (32 bit)
if: ${{ matrix.os != 'macos' }} # can't run 32 bit tests on OSX. if: ${{ matrix.os != 'macos' }} # can't run 32 bit tests on OSX.
uses: protocol/multiple-go-modules@v1.0 uses: protocol/multiple-go-modules@v1.2
env: env:
GOARCH: 386 GOARCH: 386
with: with:
run: go test -v ./... run: go test -v ./...
- name: Run tests with race detector - name: Run tests with race detector
if: ${{ matrix.os == 'ubuntu' }} # speed things up. Windows and OSX VMs are slow if: ${{ matrix.os == 'ubuntu' }} # speed things up. Windows and OSX VMs are slow
uses: protocol/multiple-go-modules@v1.0 uses: protocol/multiple-go-modules@v1.2
with: with:
run: go test -v -race ./... run: go test -v -race ./...
- name: Collect coverage files
shell: bash
run: echo "COVERAGES=$(find . -type f -name 'module-coverage.txt' | tr -s '\n' ',' | sed 's/,$//')" >> $GITHUB_ENV
- name: Upload coverage to Codecov - name: Upload coverage to Codecov
uses: codecov/codecov-action@a1ed4b322b4b38cb846afb5a0ebfa17086917d27 # v1.5.0 uses: codecov/codecov-action@51d810878be5422784e86451c0e7c14e5860ec47 # v2.0.2
with: with:
file: coverage.txt files: '${{ env.COVERAGES }}'
env_vars: OS=${{ matrix.os }}, GO=${{ matrix.go }} env_vars: OS=${{ matrix.os }}, GO=${{ matrix.go }}

157
cid.go
View File

@@ -22,6 +22,7 @@ package cid
import ( import (
"bytes" "bytes"
"encoding" "encoding"
"encoding/binary"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
@@ -173,16 +174,24 @@ func NewCidV0(mhash mh.Multihash) Cid {
// Panics if the multihash is invalid. // Panics if the multihash is invalid.
func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { func NewCidV1(codecType uint64, mhash mh.Multihash) Cid {
hashlen := len(mhash) hashlen := len(mhash)
// two 8 bytes (max) numbers plus hash
buf := make([]byte, 1+varint.UvarintSize(codecType)+hashlen) // Two 8 bytes (max) numbers plus hash.
n := varint.PutUvarint(buf, 1) // We use strings.Builder to only allocate once.
n += varint.PutUvarint(buf[n:], codecType) var b strings.Builder
cn := copy(buf[n:], mhash) b.Grow(1 + varint.UvarintSize(codecType) + hashlen)
b.WriteByte(1)
var buf [binary.MaxVarintLen64]byte
n := varint.PutUvarint(buf[:], codecType)
b.Write(buf[:n])
cn, _ := b.Write(mhash)
if cn != hashlen { if cn != hashlen {
panic("copy hash length is inconsistent") panic("copy hash length is inconsistent")
} }
return Cid{string(buf[:n+hashlen])} return Cid{b.String()}
} }
var ( var (
@@ -680,3 +689,139 @@ func CidFromBytes(data []byte) (int, Cid, error) {
return l, Cid{string(data[0:l])}, nil return l, Cid{string(data[0:l])}, nil
} }
func toBufByteReader(r io.Reader, dst []byte) *bufByteReader {
// If the reader already implements ByteReader, use it directly.
// Otherwise, use a fallback that does 1-byte Reads.
if br, ok := r.(io.ByteReader); ok {
return &bufByteReader{direct: br, dst: dst}
}
return &bufByteReader{fallback: r, dst: dst}
}
type bufByteReader struct {
direct io.ByteReader
fallback io.Reader
dst []byte
}
func (r *bufByteReader) ReadByte() (byte, error) {
// The underlying reader has ReadByte; use it.
if br := r.direct; br != nil {
b, err := br.ReadByte()
if err != nil {
return 0, err
}
r.dst = append(r.dst, b)
return b, nil
}
// Fall back to a one-byte Read.
// TODO: consider reading straight into dst,
// once we have benchmarks and if they prove that to be faster.
var p [1]byte
if _, err := io.ReadFull(r.fallback, p[:]); err != nil {
return 0, err
}
r.dst = append(r.dst, p[0])
return p[0], nil
}
// CidFromReader reads a precise number of bytes for a CID from a given reader.
// It returns the number of bytes read, the CID, and any error encountered.
// The number of bytes read is accurate even if a non-nil error is returned.
//
// It's recommended to supply a reader that buffers and implements io.ByteReader,
// as CidFromReader has to do many single-byte reads to decode varints.
// If the argument only implements io.Reader, single-byte Read calls are used instead.
func CidFromReader(r io.Reader) (int, Cid, error) {
// 64 bytes is enough for any CIDv0,
// and it's enough for most CIDv1s in practice.
// If the digest is too long, we'll allocate more.
br := toBufByteReader(r, make([]byte, 0, 64))
// We read the first varint, to tell if this is a CIDv0 or a CIDv1.
// The varint package wants a io.ByteReader, so we must wrap our io.Reader.
vers, err := varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
if vers == mh.SHA2_256 {
if n, err := io.ReadFull(r, br.dst[1:34]); err != nil {
return len(br.dst) + n, Undef, err
}
br.dst = br.dst[:34]
h, err := mh.Cast(br.dst)
if err != nil {
return len(br.dst), Undef, err
}
return len(br.dst), Cid{string(h)}, nil
}
if vers != 1 {
return len(br.dst), Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers)
}
// CID block encoding multicodec.
_, err = varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// We could replace most of the code below with go-multihash's ReadMultihash.
// Note that it would save code, but prevent reusing buffers.
// Plus, we already have a ByteReader now.
mhStart := len(br.dst)
// Multihash hash function code.
_, err = varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// Multihash digest length.
mhl, err := varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// Refuse to make large allocations to prevent OOMs due to bugs.
const maxDigestAlloc = 32 << 20 // 32MiB
if mhl > maxDigestAlloc {
return len(br.dst), Undef, fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)
}
// Fine to convert mhl to int, given maxDigestAlloc.
prefixLength := len(br.dst)
cidLength := prefixLength + int(mhl)
if cidLength > cap(br.dst) {
// If the multihash digest doesn't fit in our initial 64 bytes,
// efficiently extend the slice via append+make.
br.dst = append(br.dst, make([]byte, cidLength-len(br.dst))...)
} else {
// The multihash digest fits inside our buffer,
// so just extend its capacity.
br.dst = br.dst[:cidLength]
}
if n, err := io.ReadFull(r, br.dst[prefixLength:cidLength]); err != nil {
// We can't use len(br.dst) here,
// as we've only read n bytes past prefixLength.
return prefixLength + n, Undef, err
}
// This simply ensures the multihash is valid.
// TODO: consider removing this bit, as it's probably redundant;
// for now, it helps ensure consistency with CidFromBytes.
_, _, err = mh.MHFromBytes(br.dst[mhStart:])
if err != nil {
return len(br.dst), Undef, err
}
return len(br.dst), Cid{string(br.dst)}, nil
}

View File

@@ -1,3 +1,4 @@
//go:build gofuzz
// +build gofuzz // +build gofuzz
package cid package cid

View File

@@ -4,10 +4,12 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"math/rand" "math/rand"
"reflect" "reflect"
"strings" "strings"
"testing" "testing"
"testing/iotest"
mbase "github.com/multiformats/go-multibase" mbase "github.com/multiformats/go-multibase"
mh "github.com/multiformats/go-multihash" mh "github.com/multiformats/go-multihash"
@@ -665,6 +667,7 @@ func TestReadCidsFromBuffer(t *testing.T) {
"k2cwueckqkibutvhkr4p2ln2pjcaxaakpd9db0e7j7ax1lxhhxy3ekpv", "k2cwueckqkibutvhkr4p2ln2pjcaxaakpd9db0e7j7ax1lxhhxy3ekpv",
"Qmf5Qzp6nGBku7CEn2UQx4mgN8TW69YUok36DrGa6NN893", "Qmf5Qzp6nGBku7CEn2UQx4mgN8TW69YUok36DrGa6NN893",
"zb2rhZi1JR4eNc2jBGaRYJKYM8JEB4ovenym8L1CmFsRAytkz", "zb2rhZi1JR4eNc2jBGaRYJKYM8JEB4ovenym8L1CmFsRAytkz",
"bafkqarjpmzuwyzltorxxezjpkvcfgqkfjfbfcvslivje2vchkzdu6rckjjcfgtkolaze6mssjqzeyn2ekrcfatkjku2vowseky3fswkfkm2deqkrju3e2",
} }
var cids []Cid var cids []Cid
@@ -692,51 +695,98 @@ func TestReadCidsFromBuffer(t *testing.T) {
if cur != len(buf) { if cur != len(buf) {
t.Fatal("had trailing bytes") t.Fatal("had trailing bytes")
} }
// The same, but now with CidFromReader.
// In multiple forms, to catch more io interface bugs.
for _, r := range []io.Reader{
// implements io.ByteReader
bytes.NewReader(buf),
// tiny reads, no io.ByteReader
iotest.OneByteReader(bytes.NewReader(buf)),
} {
cur = 0
for _, expc := range cids {
n, c, err := CidFromReader(r)
if err != nil {
t.Fatal(err)
}
if c != expc {
t.Fatal("cids mismatched")
}
cur += n
}
if cur != len(buf) {
t.Fatal("had trailing bytes")
}
}
} }
func TestBadCidFromBytes(t *testing.T) { func TestBadCidInput(t *testing.T) {
l, c, err := CidFromBytes([]byte{mh.SHA2_256, 32, 0x00}) for _, name := range []string{
if err == nil { "FromBytes",
t.Fatal("expected not-enough-bytes for V0 CidFromBytes") "FromReader",
} } {
if l != 0 { t.Run(name, func(t *testing.T) {
t.Fatal("expected length=0 from bad CidFromBytes") usingReader := name == "FromReader"
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm") fromBytes := CidFromBytes
if err != nil { if usingReader {
t.Fatal(err) fromBytes = func(data []byte) (int, Cid, error) {
} return CidFromReader(bytes.NewReader(data))
byts := make([]byte, c.ByteLen()) }
copy(byts, c.Bytes()) }
byts[1] = 0x80 // bad codec varint
byts[2] = 0x00
l, c, err = CidFromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CidFromBytes")
}
if l != 0 {
t.Fatal("expected length=0 from bad CidFromBytes")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
copy(byts, c.Bytes()) l, c, err := fromBytes([]byte{mh.SHA2_256, 32, 0x00})
byts[2] = 0x80 // bad multihash varint if err == nil {
byts[3] = 0x00 t.Fatal("expected not-enough-bytes for V0 CID")
l, c, err = CidFromBytes(byts) }
if err == nil { if !usingReader && l != 0 {
t.Fatal("expected not-enough-bytes for V1 CidFromBytes") t.Fatal("expected length==0 from bad CID")
} } else if usingReader && l == 0 {
if l != 0 { t.Fatal("expected length!=0 from bad CID")
t.Fatal("expected length=0 from bad CidFromBytes") }
} if c != Undef {
if c != Undef { t.Fatal("expected Undef CID from bad CID")
t.Fatal("expected Undef CID from bad CidFromBytes") }
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
if err != nil {
t.Fatal(err)
}
byts := make([]byte, c.ByteLen())
copy(byts, c.Bytes())
byts[1] = 0x80 // bad codec varint
byts[2] = 0x00
l, c, err = fromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CID")
}
copy(byts, c.Bytes())
byts[2] = 0x80 // bad multihash varint
byts[3] = 0x00
l, c, err = fromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
})
} }
} }

2
go.mod
View File

@@ -7,4 +7,4 @@ require (
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf // indirect golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf // indirect
) )
go 1.15 go 1.16