Compare commits

..

1 Commits

Author SHA1 Message Date
Daniel Martí
8f4ec9e084 implement CidFromReader
And reuse two CidFromBytes tests for it, which includes both CIDv0 and
CIDv1 cases as inputs, as well as some inputs that should error.

Fixes #126.
2021-07-14 23:28:25 +01:00
2 changed files with 107 additions and 91 deletions

94
cid.go
View File

@@ -694,19 +694,10 @@ type bufByteReader struct {
direct io.ByteReader
fallback io.Reader
consumed int
dst []byte
dst []byte
}
func (r *bufByteReader) ReadByte() (byte, error) {
// We still have some of the initial bytes to use.
if r.consumed < len(r.dst) {
b := r.dst[r.consumed]
r.consumed++
return b, nil
}
r.consumed++
// The underlying reader has ReadByte; use it.
if br := r.direct; br != nil {
b, err := br.ReadByte()
@@ -718,6 +709,8 @@ func (r *bufByteReader) ReadByte() (byte, error) {
}
// Fall back to a one-byte Read.
// TODO: consider reading straight into dst,
// once we have benchmarks and if they prove that to be faster.
var p [1]byte
if _, err := io.ReadFull(r.fallback, p[:]); err != nil {
return 0, err
@@ -737,38 +730,30 @@ func CidFromReader(r io.Reader) (int, Cid, error) {
// 64 bytes is enough for any CIDv0,
// and it's enough for most CIDv1s in practice.
// If the digest is too long, we'll allocate more.
buf := make([]byte, 0, 64)
// We read two bytes, to tell if this is a CIDv0 or a CIDv1.
if n, err := io.ReadFull(r, buf[:2]); err != nil {
return n, Undef, err
}
buf = buf[:2]
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
if buf[0] == mh.SHA2_256 && buf[1] == 32 {
if n, err := io.ReadFull(r, buf[2:34]); err != nil {
return len(buf) + n, Undef, err
}
buf = buf[:34]
h, err := mh.Cast(buf)
if err != nil {
return len(buf), Undef, err
}
return len(buf), Cid{string(h)}, nil
}
br := toBufByteReader(r, make([]byte, 0, 64))
// We read the first varint, to tell if this is a CIDv0 or a CIDv1.
// The varint package wants a io.ByteReader, so we must wrap our io.Reader.
// Note that we already read two bytes, so bufByteReader uses those first.
// After those two bytes, bufByteReader appends the read bytes to br.dst.
br := toBufByteReader(r, buf[:2])
vers, err := varint.ReadUvarint(br)
if err != nil {
return len(br.dst), Undef, err
}
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
if vers == mh.SHA2_256 {
if n, err := io.ReadFull(r, br.dst[1:34]); err != nil {
return len(br.dst) + n, Undef, err
}
br.dst = br.dst[:34]
h, err := mh.Cast(br.dst)
if err != nil {
return len(br.dst), Undef, err
}
return len(br.dst), Cid{string(h)}, nil
}
if vers != 1 {
return len(br.dst), Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers)
}
@@ -796,29 +781,38 @@ func CidFromReader(r io.Reader) (int, Cid, error) {
return len(br.dst), Undef, err
}
// Update buf's length.
// We're not reading single bytes beyond this point.
buf = br.dst
br = nil
// Multihash digest; might be too long, so allocate.
// Refuse to make large allocations to prevent OOMs due to bugs.
// TODO: reuse buf if it has enough space
const maxDigestAlloc = 32 << 20 // 32MiB
if mhl > maxDigestAlloc {
return len(buf), Undef, fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)
return len(br.dst), Undef, fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)
}
digest := make([]byte, int(mhl))
if n, err := io.ReadFull(r, digest); err != nil {
return len(buf) + n, Undef, err
// Fine to convert mhl to int, given maxDigestAlloc.
prefixLength := len(br.dst)
cidLength := prefixLength + int(mhl)
if cidLength > cap(br.dst) {
// If the multihash digest doesn't fit in our initial 64 bytes,
// efficiently extend the slice via append+make.
br.dst = append(br.dst, make([]byte, cidLength-cap(br.dst))...)
} else {
// The multihash digest fits inside our buffer,
// so just extend its capacity.
br.dst = br.dst[:cidLength]
}
if n, err := io.ReadFull(r, br.dst[prefixLength:cidLength]); err != nil {
// We can't use len(br.dst) here,
// as we've only read n bytes past prefixLength.
return prefixLength + n, Undef, err
}
buf = append(buf, digest...)
// This simply ensures the multihash is valid.
_, _, err = mh.MHFromBytes(buf[mhStart:])
// TODO: consider removing this bit, as it's probably redundant;
// for now, it helps ensure consistency with CidFromBytes.
_, _, err = mh.MHFromBytes(br.dst[mhStart:])
if err != nil {
return len(buf), Undef, err
return len(br.dst), Undef, err
}
return len(buf), Cid{string(buf)}, nil
return len(br.dst), Cid{string(br.dst)}, nil
}

View File

@@ -721,49 +721,71 @@ func TestReadCidsFromBuffer(t *testing.T) {
}
}
func TestBadCidFromBytes(t *testing.T) {
l, c, err := CidFromBytes([]byte{mh.SHA2_256, 32, 0x00})
if err == nil {
t.Fatal("expected not-enough-bytes for V0 CidFromBytes")
}
if l != 0 {
t.Fatal("expected length=0 from bad CidFromBytes")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
func TestBadCidInput(t *testing.T) {
for _, name := range []string{
"FromBytes",
"FromReader",
} {
t.Run(name, func(t *testing.T) {
usingReader := name == "FromReader"
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
if err != nil {
t.Fatal(err)
}
byts := make([]byte, c.ByteLen())
copy(byts, c.Bytes())
byts[1] = 0x80 // bad codec varint
byts[2] = 0x00
l, c, err = CidFromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CidFromBytes")
}
if l != 0 {
t.Fatal("expected length=0 from bad CidFromBytes")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
fromBytes := CidFromBytes
if usingReader {
fromBytes = func(data []byte) (int, Cid, error) {
return CidFromReader(bytes.NewReader(data))
}
}
copy(byts, c.Bytes())
byts[2] = 0x80 // bad multihash varint
byts[3] = 0x00
l, c, err = CidFromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CidFromBytes")
}
if l != 0 {
t.Fatal("expected length=0 from bad CidFromBytes")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
l, c, err := fromBytes([]byte{mh.SHA2_256, 32, 0x00})
if err == nil {
t.Fatal("expected not-enough-bytes for V0 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CID")
}
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
if err != nil {
t.Fatal(err)
}
byts := make([]byte, c.ByteLen())
copy(byts, c.Bytes())
byts[1] = 0x80 // bad codec varint
byts[2] = 0x00
l, c, err = fromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CID")
}
copy(byts, c.Bytes())
byts[2] = 0x80 // bad multihash varint
byts[3] = 0x00
l, c, err = fromBytes(byts)
if err == nil {
t.Fatal("expected not-enough-bytes for V1 CID")
}
if !usingReader && l != 0 {
t.Fatal("expected length==0 from bad CID")
} else if usingReader && l == 0 {
t.Fatal("expected length!=0 from bad CID")
}
if c != Undef {
t.Fatal("expected Undef CID from bad CidFromBytes")
}
})
}
}