Compare commits

..

1 Commits

Author SHA1 Message Date
Daniel Martí
41f2377d96 implement CidFromReader
And reuse a CidFromBytes test for it, which includes both CIDv0 and
CIDv1 cases as inputs.

Fixes #126.
2021-07-02 18:35:02 +01:00
2 changed files with 91 additions and 107 deletions

94
cid.go
View File

@@ -694,10 +694,19 @@ type bufByteReader struct {
direct io.ByteReader
fallback io.Reader
dst []byte
consumed int
dst []byte
}
func (r *bufByteReader) ReadByte() (byte, error) {
// We still have some of the initial bytes to use.
if r.consumed < len(r.dst) {
b := r.dst[r.consumed]
r.consumed++
return b, nil
}
r.consumed++
// The underlying reader has ReadByte; use it.
if br := r.direct; br != nil {
b, err := br.ReadByte()
@@ -709,8 +718,6 @@ func (r *bufByteReader) ReadByte() (byte, error) {
}
// Fall back to a one-byte Read.
// TODO: consider reading straight into dst,
// once we have benchmarks and if they prove that to be faster.
var p [1]byte
if _, err := io.ReadFull(r.fallback, p[:]); err != nil {
return 0, err
@@ -730,30 +737,38 @@ func CidFromReader(r io.Reader) (int, Cid, error) {
// 64 bytes is enough for any CIDv0,
// and it's enough for most CIDv1s in practice.
// If the digest is too long, we'll allocate more.
br := toBufByteReader(r, make([]byte, 0, 64))
buf := make([]byte, 0, 64)
// We read two bytes, to tell if this is a CIDv0 or a CIDv1.
if n, err := io.ReadFull(r, buf[:2]); err != nil {
return n, Undef, err
}
buf = buf[:2]
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
if buf[0] == mh.SHA2_256 && buf[1] == 32 {
if n, err := io.ReadFull(r, buf[2:34]); err != nil {
return len(buf) + n, Undef, err
}
buf = buf[:34]
h, err := mh.Cast(buf)
if err != nil {
return len(buf), Undef, err
}
return len(buf), Cid{string(h)}, nil
}
// We read the first varint, to tell if this is a CIDv0 or a CIDv1.
// The varint package wants a io.ByteReader, so we must wrap our io.Reader.
// Note that we already read two bytes, so bufByteReader uses those first.
// After those two bytes, bufByteReader appends the read bytes to br.dst.
br := toBufByteReader(r, buf[:2])
vers, err := varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
if vers == mh.SHA2_256 {
if n, err := io.ReadFull(r, br.dst[1:34]); err != nil {
return len(br.dst) + n, Undef, err
}
br.dst = br.dst[:34]
h, err := mh.Cast(br.dst)
if err != nil {
return len(br.dst), Undef, err
}
return len(br.dst), Cid{string(h)}, nil
}
if vers != 1 {
return len(br.dst), Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers)
}
@@ -781,38 +796,29 @@ func CidFromReader(r io.Reader) (int, Cid, error) {
return len(br.dst), Undef, err
}
// Update buf's length.
// We're not reading single bytes beyond this point.
buf = br.dst
br = nil
// Multihash digest; might be too long, so allocate.
// Refuse to make large allocations to prevent OOMs due to bugs.
// TODO: reuse buf if it has enough space
const maxDigestAlloc = 32 << 20 // 32MiB
if mhl > maxDigestAlloc {
return len(br.dst), Undef, fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)
return len(buf), Undef, fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)
}
// Fine to convert mhl to int, given maxDigestAlloc.
prefixLength := len(br.dst)
cidLength := prefixLength + int(mhl)
if cidLength > cap(br.dst) {
// If the multihash digest doesn't fit in our initial 64 bytes,
// efficiently extend the slice via append+make.
br.dst = append(br.dst, make([]byte, cidLength-cap(br.dst))...)
} else {
// The multihash digest fits inside our buffer,
// so just extend its capacity.
br.dst = br.dst[:cidLength]
}
if n, err := io.ReadFull(r, br.dst[prefixLength:cidLength]); err != nil {
// We can't use len(br.dst) here,
// as we've only read n bytes past prefixLength.
return prefixLength + n, Undef, err
digest := make([]byte, int(mhl))
if n, err := io.ReadFull(r, digest); err != nil {
return len(buf) + n, Undef, err
}
buf = append(buf, digest...)
// This simply ensures the multihash is valid.
// TODO: consider removing this bit, as it's probably redundant;
// for now, it helps ensure consistency with CidFromBytes.
_, _, err = mh.MHFromBytes(br.dst[mhStart:])
_, _, err = mh.MHFromBytes(buf[mhStart:])
if err != nil {
return len(br.dst), Undef, err
return len(buf), Undef, err
}
return len(br.dst), Cid{string(br.dst)}, nil
return len(buf), Cid{string(buf)}, nil
}

View File

@@ -721,71 +721,49 @@ func TestReadCidsFromBuffer(t *testing.T) {
}
}
func TestBadCidInput(t *testing.T) {
for _, name := range []string{
"FromBytes",
"FromReader",
} {
t.Run(name, func(t *testing.T) {
usingReader := name == "FromReader"
func TestBadCidFromBytes(t *testing.T) {
l, c, err := CidFromBytes([]byte{mh.SHA2_256, 32, 0x00})
if err == nil {
t.Fatal("expected not-enough-bytes for V0 CidFromBytes")
}
if l != 0 {
t.Fatal("expected length=0 from bad CidFromBytes")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
fromBytes := CidFromBytes
if usingReader {
fromBytes = func(data []byte) (int, Cid, error) {
return CidFromReader(bytes.NewReader(data))
}
}
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
if err != nil {
t.Fatal(err)
}
byts := make([]byte, c.ByteLen())
copy(byts, c.Bytes())
byts[1] = 0x80 // bad codec varint
byts[2] = 0x00
l, c, err = CidFromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CidFromBytes")
}
if l != 0 {
t.Fatal("expected length=0 from bad CidFromBytes")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
l, c, err := fromBytes([]byte{mh.SHA2_256, 32, 0x00})
if err == nil {
t.Fatal("expected not-enough-bytes for V0 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CID")
}
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
if err != nil {
t.Fatal(err)
}
byts := make([]byte, c.ByteLen())
copy(byts, c.Bytes())
byts[1] = 0x80 // bad codec varint
byts[2] = 0x00
l, c, err = fromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CID")
}
copy(byts, c.Bytes())
byts[2] = 0x80 // bad multihash varint
byts[3] = 0x00
l, c, err = fromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
})
copy(byts, c.Bytes())
byts[2] = 0x80 // bad multihash varint
byts[3] = 0x00
l, c, err = CidFromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CidFromBytes")
}
if l != 0 {
t.Fatal("expected length=0 from bad CidFromBytes")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
}