add a token container with serialization as CARv1 file

This commit is contained in:
Michael Muré
2024-09-18 12:53:31 +02:00
parent 8615f6c72b
commit df9beadf9c
7 changed files with 378 additions and 4 deletions

186
pkg/container/car.go Normal file
View File

@@ -0,0 +1,186 @@
package container
import (
"bufio"
"bytes"
"encoding/binary"
"fmt"
"io"
"iter"
"github.com/ipfs/go-cid"
cbor "github.com/ipfs/go-ipld-cbor"
)
/*
Note: below is essentially a re-implementation of the CAR file v1 read and write.
This exists here for two reasons:
- go-car's API forces to go through an IPLD getter or through a blockstore API
- generally, go-car is a very complex and large dependency
*/
// EmptyCid is a "zero" Cid: zero-length "identity" multihash with "raw" codec
// It can be used to have at least one root in a CARv1 file (making it legal), yet
// denote that it can be ignored.
var EmptyCid = cid.MustParse([]byte{01, 55, 00, 00})
type carBlock struct {
c cid.Cid
data []byte
}
// writeCar writes a CARv1 file with no roots, containing the blocks from the iterator.
func writeCar(w io.Writer, roots []cid.Cid, blocks iter.Seq[carBlock]) error {
if len(roots) == 0 {
roots = []cid.Cid{EmptyCid}
}
h := carHeader{
Roots: roots,
Version: 1,
}
hb, err := cbor.DumpObject(h)
if err != nil {
return err
}
err = ldWrite(w, hb)
if err != nil {
return err
}
for block := range blocks {
err = ldWrite(w, block.c.Bytes(), block.data)
if err != nil {
return err
}
}
return nil
}
// readCar reads a CARv1 file from the reader, and return a block iterator.
// Roots are ignored.
func readCar(r io.Reader) (roots []cid.Cid, blocks iter.Seq2[carBlock, error], err error) {
br := bufio.NewReader(r)
hb, err := ldRead(br)
if err != nil {
return nil, nil, err
}
var h carHeader
if err := cbor.DecodeInto(hb, &h); err != nil {
return nil, nil, fmt.Errorf("invalid header: %v", err)
}
if h.Version != 1 {
return nil, nil, fmt.Errorf("invalid car version: %d", h.Version)
}
return h.Roots, func(yield func(block carBlock, err error) bool) {
for {
block, err := readBlock(br)
if err == io.EOF {
return
}
if err != nil {
if !yield(carBlock{}, err) {
return
}
}
if !yield(block, nil) {
return
}
}
}, nil
}
// readBlock reads a section from the reader and decode a (cid+data) block.
func readBlock(r *bufio.Reader) (carBlock, error) {
raw, err := ldRead(r)
if err != nil {
return carBlock{}, err
}
n, c, err := cid.CidFromReader(bytes.NewReader(raw))
if err != nil {
return carBlock{}, err
}
data := raw[n:]
// integrity check
hashed, err := c.Prefix().Sum(data)
if err != nil {
return carBlock{}, err
}
if !hashed.Equals(c) {
return carBlock{}, fmt.Errorf("mismatch in content integrity, name: %s, data: %s", c, hashed)
}
return carBlock{c: c, data: data}, nil
}
// maxAllowedSectionSize dictates the maximum number of bytes that a CARv1 header
// or section is allowed to occupy without causing a decode to error.
// This cannot be supplied as an option, only adjusted as a global. You should
// use v2#NewReader instead since it allows for options to be passed in.
var maxAllowedSectionSize uint = 32 << 20 // 32MiB
// ldRead performs a length-delimited read of a section from the reader.
// A section is composed of an uint length followed by the data.
func ldRead(r *bufio.Reader) ([]byte, error) {
if _, err := r.Peek(1); err != nil { // no more blocks, likely clean io.EOF
return nil, err
}
l, err := binary.ReadUvarint(r)
if err != nil {
if err == io.EOF {
return nil, io.ErrUnexpectedEOF // don't silently pretend this is a clean EOF
}
return nil, err
}
if l > uint64(maxAllowedSectionSize) { // Don't OOM
return nil, fmt.Errorf("malformed car; header is bigger than MaxAllowedSectionSize")
}
buf := make([]byte, l)
if _, err := io.ReadFull(r, buf); err != nil {
return nil, err
}
return buf, nil
}
// ldWrite performs a length-delimited write of a section on the writer.
// A section is composed of an uint length followed by the data.
func ldWrite(w io.Writer, d ...[]byte) error {
var sum uint64
for _, s := range d {
sum += uint64(len(s))
}
buf := make([]byte, 8)
n := binary.PutUvarint(buf, sum)
_, err := w.Write(buf[:n])
if err != nil {
return err
}
for _, s := range d {
_, err = w.Write(s)
if err != nil {
return err
}
}
return nil
}
type carHeader struct {
Roots []cid.Cid
Version uint64
}
func init() {
cbor.RegisterCborType(carHeader{})
}

40
pkg/container/car_test.go Normal file
View File

@@ -0,0 +1,40 @@
package container
import (
"bytes"
"os"
"testing"
"github.com/stretchr/testify/require"
)
func TestCarRoundTrip(t *testing.T) {
// this car file is a complex and legal CARv1 file
original, err := os.ReadFile("testdata/sample-v1.car")
require.NoError(t, err)
roots, it, err := readCar(bytes.NewReader(original))
require.NoError(t, err)
var blks []carBlock
for blk, err := range it {
require.NoError(t, err)
blks = append(blks, blk)
}
require.Len(t, blks, 1049)
buf := bytes.NewBuffer(nil)
err = writeCar(buf, roots, func(yield func(carBlock) bool) {
for _, blk := range blks {
if !yield(blk) {
return
}
}
})
require.NoError(t, err)
// Bytes equal after the round-trip
require.Equal(t, original, buf.Bytes())
}

View File

@@ -0,0 +1,63 @@
package container
import (
"encoding/base64"
"io"
"github.com/ipfs/go-cid"
)
// TODO: should the invocation being set as root in the car file?
type Container map[cid.Cid][]byte
func New() Container {
return make(Container)
}
func FromCar(r io.Reader) (Container, error) {
_, it, err := readCar(r)
if err != nil {
return nil, err
}
c := New()
for block, err := range it {
if err != nil {
return nil, err
}
c[block.c] = block.data
}
return c, nil
}
func FromCarBase64(r io.Reader) (Container, error) {
return FromCar(base64.NewDecoder(base64.StdEncoding, r))
}
func (ctn Container) ToCar(w io.Writer) error {
return writeCar(w, nil, func(yield func(carBlock) bool) {
for c, bytes := range ctn {
if !yield(carBlock{c: c, data: bytes}) {
return
}
}
})
}
func (ctn Container) ToCarBase64(w io.Writer) error {
w2 := base64.NewEncoder(base64.StdEncoding, w)
defer w2.Close()
return ctn.ToCar(w2)
}
func (ctn Container) AddBytes(cid cid.Cid, data []byte) {
ctn[cid] = data
}
func (ctn Container) GetBytes(cid cid.Cid) ([]byte, bool) {
b, ok := ctn[cid]
return b, ok
}

View File

@@ -0,0 +1,52 @@
package container
import (
"bytes"
"crypto/rand"
"io"
"testing"
"github.com/ipfs/go-cid"
mh "github.com/multiformats/go-multihash"
"github.com/stretchr/testify/require"
)
func TestContainerRoundTrip(t *testing.T) {
for _, tc := range []struct {
name string
writer func(ctn Container, w io.Writer) error
reader func(io.Reader) (Container, error)
}{
{"carBytes", Container.ToCar, FromCar},
{"carBase64", Container.ToCarBase64, FromCarBase64},
} {
t.Run(tc.name, func(t *testing.T) {
ctn := New()
builder := cid.V1Builder{Codec: cid.Raw, MhType: mh.SHA2_256}
for i := 0; i < 10; i++ {
data := randBytes(32)
c, err := builder.Sum(data)
require.NoError(t, err)
ctn.AddBytes(c, data)
}
buf := bytes.NewBuffer(nil)
err := tc.writer(ctn, buf)
require.NoError(t, err)
ctn2, err := tc.reader(bytes.NewReader(buf.Bytes()))
require.NoError(t, err)
require.Equal(t, ctn, ctn2)
})
}
}
func randBytes(n int) []byte {
b := make([]byte, n)
_, _ = rand.Read(b)
return b
}

BIN
pkg/container/testdata/sample-v1.car vendored Normal file

Binary file not shown.