Files
ucan/pkg/container/car.go

264 lines
6.2 KiB
Go
Raw Permalink Normal View History

package container
import (
"bufio"
"bytes"
"encoding/binary"
"fmt"
"io"
"iter"
"github.com/ipfs/go-cid"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec/dagcbor"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/fluent/qp"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
"github.com/ipld/go-ipld-prime/node/basicnode"
)
/*
Note: below is essentially a re-implementation of the CAR file v1 read and write.
This exists here for two reasons:
- go-car's API forces to go through an IPLD getter or through a blockstore API
- generally, go-car is a very complex and large dependency
*/
// EmptyCid is a "zero" Cid: zero-length "identity" multihash with "raw" codec
// It can be used to have at least one root in a CARv1 file (making it legal), yet
// denote that it can be ignored.
var EmptyCid = cid.MustParse([]byte{01, 55, 00, 00})
type carBlock struct {
c cid.Cid
data []byte
}
2024-09-19 21:26:42 +02:00
// writeCar writes a CARv1 file containing the blocks from the iterator.
// If no roots are provided, a single EmptyCid is used as root to make the file
// spec compliant.
func writeCar(w io.Writer, roots []cid.Cid, blocks iter.Seq2[carBlock, error]) error {
if len(roots) == 0 {
roots = []cid.Cid{EmptyCid}
}
h := carHeader{
Roots: roots,
Version: 1,
}
hb, err := h.Write()
if err != nil {
return err
}
err = ldWrite(w, hb)
if err != nil {
return err
}
for block, err := range blocks {
if err != nil {
return err
}
err = ldWrite(w, block.c.Bytes(), block.data)
if err != nil {
return err
}
}
return nil
}
// readCar reads a CARv1 file from the reader, and return a block iterator.
// Roots are ignored.
func readCar(r io.Reader) (roots []cid.Cid, blocks iter.Seq2[carBlock, error], err error) {
br := bufio.NewReader(r)
hb, err := ldRead(br)
if err != nil {
return nil, nil, err
}
h, err := readHeader(hb)
if err != nil {
return nil, nil, err
}
if h.Version != 1 {
return nil, nil, fmt.Errorf("invalid car version: %d", h.Version)
}
return h.Roots, func(yield func(block carBlock, err error) bool) {
for {
block, err := readBlock(br)
if err == io.EOF {
return
}
if err != nil {
if !yield(carBlock{}, err) {
return
}
}
if !yield(block, nil) {
return
}
}
}, nil
}
// readBlock reads a section from the reader and decode a (cid+data) block.
func readBlock(r *bufio.Reader) (carBlock, error) {
raw, err := ldRead(r)
if err != nil {
return carBlock{}, err
}
n, c, err := cid.CidFromReader(bytes.NewReader(raw))
if err != nil {
return carBlock{}, err
}
data := raw[n:]
// integrity check
hashed, err := c.Prefix().Sum(data)
if err != nil {
return carBlock{}, err
}
if !hashed.Equals(c) {
return carBlock{}, fmt.Errorf("mismatch in content integrity, name: %s, data: %s", c, hashed)
}
return carBlock{c: c, data: data}, nil
}
// maxAllowedSectionSize dictates the maximum number of bytes that a CARv1 header
// or section is allowed to occupy without causing a decode to error.
// This cannot be supplied as an option, only adjusted as a global. You should
// use v2#NewReader instead since it allows for options to be passed in.
var maxAllowedSectionSize uint = 32 << 20 // 32MiB
// ldRead performs a length-delimited read of a section from the reader.
// A section is composed of an uint length followed by the data.
func ldRead(r *bufio.Reader) ([]byte, error) {
if _, err := r.Peek(1); err != nil { // no more blocks, likely clean io.EOF
return nil, err
}
l, err := binary.ReadUvarint(r)
if err != nil {
if err == io.EOF {
return nil, io.ErrUnexpectedEOF // don't silently pretend this is a clean EOF
}
return nil, err
}
if l == 0 {
return nil, fmt.Errorf("invalid zero size section")
}
if l > uint64(maxAllowedSectionSize) { // Don't OOM
return nil, fmt.Errorf("malformed car; header is bigger than MaxAllowedSectionSize")
}
buf := make([]byte, l)
if _, err := io.ReadFull(r, buf); err != nil {
if err == io.EOF {
// we should be able to read the promised bytes, this is not normal
return nil, io.ErrUnexpectedEOF
}
return nil, err
}
return buf, nil
}
// ldWrite performs a length-delimited write of a section on the writer.
// A section is composed of an uint length followed by the data.
func ldWrite(w io.Writer, d ...[]byte) error {
var sum uint64
for _, s := range d {
sum += uint64(len(s))
}
buf := make([]byte, 8)
n := binary.PutUvarint(buf, sum)
_, err := w.Write(buf[:n])
if err != nil {
return err
}
for _, s := range d {
_, err = w.Write(s)
if err != nil {
return err
}
}
return nil
}
type carHeader struct {
Roots []cid.Cid
Version uint64
}
const rootsKey = "roots"
const versionKey = "version"
func readHeader(data []byte) (*carHeader, error) {
var header carHeader
nd, err := ipld.Decode(data, dagcbor.Decode)
if err != nil {
return nil, err
}
if nd.Length() != 2 {
return nil, fmt.Errorf("malformed car header")
}
rootsNd, err := nd.LookupByString(rootsKey)
if err != nil {
return nil, fmt.Errorf("malformed car header")
}
it := rootsNd.ListIterator()
if it == nil {
return nil, fmt.Errorf("malformed car header")
}
header.Roots = make([]cid.Cid, 0, rootsNd.Length())
for !it.Done() {
_, nd, err := it.Next()
if err != nil {
return nil, err
}
lk, err := nd.AsLink()
if err != nil {
return nil, err
}
switch lk := lk.(type) {
case cidlink.Link:
header.Roots = append(header.Roots, lk.Cid)
default:
return nil, fmt.Errorf("malformed car header")
}
}
versionNd, err := nd.LookupByString(versionKey)
if err != nil {
return nil, fmt.Errorf("malformed car header")
}
version, err := versionNd.AsInt()
if err != nil {
return nil, fmt.Errorf("malformed car header")
}
header.Version = uint64(version)
return &header, nil
}
func (ch *carHeader) Write() ([]byte, error) {
nd, err := qp.BuildMap(basicnode.Prototype.Any, 2, func(ma datamodel.MapAssembler) {
qp.MapEntry(ma, rootsKey, qp.List(int64(len(ch.Roots)), func(la datamodel.ListAssembler) {
for _, root := range ch.Roots {
qp.ListEntry(la, qp.Link(cidlink.Link{Cid: root}))
}
}))
qp.MapEntry(ma, versionKey, qp.Int(1))
})
if err != nil {
return nil, err
}
return ipld.Encode(nd, dagcbor.Encode)
}