container: implement header based serialization, also add SPEC test vectors

This commit is contained in:
Michael Muré
2025-01-08 17:31:04 +01:00
parent 704ed25768
commit 8f3f1c775e
5 changed files with 333 additions and 197 deletions

View File

@@ -52,14 +52,14 @@ The following base encoding combination are REQUIRED to be supported:
The CBOR bytes MUST be prepended by a single byte header to indicate the selection combination of base encoding and compression. This header value MUST be set according to the following table:
| Header as hex | Header as ASCII | Base encoding | Compression |
|---------------|-----------------|--------------------------|----------------|
| 0x40 | @ | raw bytes | no compression |
| 0x42 | B | base64 std padding | no compression |
| 0x43 | C | base64 url | no compression |
| 0x4D | M | raw bytes | gzip |
| 0x4F | O | base64 std padding | gzip |
| 0x50 | P | base64 url | gzip |
| Header as hex | Header as ASCII | Base encoding | Compression |
|---------------|-----------------|-------------------------|----------------|
| 0x40 | @ | raw bytes | no compression |
| 0x42 | B | base64 std padding | no compression |
| 0x43 | C | base64 url (no padding) | no compression |
| 0x4D | M | raw bytes | gzip |
| 0x4F | O | base64 std padding | gzip |
| 0x50 | P | base64 url (no padding) | gzip |
# 3 FAQ

118
pkg/container/packaging.go Normal file
View File

@@ -0,0 +1,118 @@
package container
import (
"compress/gzip"
"encoding/base64"
"errors"
"fmt"
"io"
)
const containerVersionTag = "ctn-v1"
type header byte
const (
headerRawBytes = header(0x40)
headerBase64StdPadding = header(0x42)
headerBase64URL = header(0x43)
headerRawBytesGzip = header(0x4D)
headerBase64StdPaddingGzip = header(0x4F)
headerBase64URLGzip = header(0x50)
)
func (h header) encoder(w io.Writer) *payloadWriter {
res := &payloadWriter{rawWriter: w, writer: w, header: h}
switch h {
case headerBase64StdPadding, headerBase64StdPaddingGzip:
b64Writer := base64.NewEncoder(base64.StdEncoding, res.writer)
res.writer = b64Writer
res.closers = append([]io.Closer{b64Writer}, res.closers...)
case headerBase64URL, headerBase64URLGzip:
b64Writer := base64.NewEncoder(base64.RawURLEncoding, res.writer)
res.writer = b64Writer
res.closers = append([]io.Closer{b64Writer}, res.closers...)
}
switch h {
case headerRawBytesGzip, headerBase64StdPaddingGzip, headerBase64URLGzip:
gzipWriter := gzip.NewWriter(res.writer)
res.writer = gzipWriter
res.closers = append([]io.Closer{gzipWriter}, res.closers...)
}
return res
}
func decodePayload(r io.Reader) (io.Reader, error) {
headerBuf := make([]byte, 1)
_, err := r.Read(headerBuf)
if err != nil {
return nil, err
}
h := header(headerBuf[0])
switch h {
case headerRawBytes,
headerBase64StdPadding,
headerBase64URL,
headerRawBytesGzip,
headerBase64StdPaddingGzip,
headerBase64URLGzip:
default:
return nil, fmt.Errorf("unknown container header")
}
switch h {
case headerBase64StdPadding, headerBase64StdPaddingGzip:
r = base64.NewDecoder(base64.StdEncoding, r)
case headerBase64URL, headerBase64URLGzip:
r = base64.NewDecoder(base64.RawURLEncoding, r)
}
switch h {
case headerRawBytesGzip, headerBase64StdPaddingGzip, headerBase64URLGzip:
gzipReader, err := gzip.NewReader(r)
if err != nil {
return nil, err
}
r = gzipReader
}
return r, nil
}
var _ io.WriteCloser = &payloadWriter{}
// payloadWriter is tasked with two things:
// - prepend the header byte
// - call Close() on all the underlying io.Writer
type payloadWriter struct {
rawWriter io.Writer
writer io.Writer
header header
headerWrote bool
closers []io.Closer
}
func (w *payloadWriter) Write(p []byte) (n int, err error) {
if !w.headerWrote {
_, err := w.rawWriter.Write([]byte{byte(w.header)})
if err != nil {
return 0, err
}
w.headerWrote = true
}
return w.writer.Write(p)
}
func (w *payloadWriter) Close() error {
var errs error
for _, closer := range w.closers {
if err := closer.Close(); err != nil {
errs = errors.Join(errs, err)
}
}
return errs
}

View File

@@ -2,7 +2,6 @@ package container
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"io"
@@ -25,6 +24,72 @@ var ErrMultipleInvocations = fmt.Errorf("multiple invocations")
// Reader is a token container reader. It exposes the tokens conveniently decoded.
type Reader map[cid.Cid]token.Token
// FromBytes decodes a container from a []byte
func FromBytes(data []byte) (Reader, error) {
return FromReader(bytes.NewReader(data))
}
// FromString decodes a container from a string
func FromString(s string) (Reader, error) {
return FromReader(strings.NewReader(s))
}
// FromReader decodes a container from an io.Reader.
func FromReader(r io.Reader) (Reader, error) {
payload, err := decodePayload(r)
if err != nil {
return nil, err
}
n, err := ipld.DecodeStreaming(payload, cbor.Decode)
if err != nil {
return nil, err
}
if n.Kind() != datamodel.Kind_Map {
return nil, fmt.Errorf("invalid container format: expected map")
}
if n.Length() != 1 {
return nil, fmt.Errorf("invalid container format: expected single version key")
}
// get the first (and only) key-value pair
it := n.MapIterator()
key, tokensNode, err := it.Next()
if err != nil {
return nil, err
}
version, err := key.AsString()
if err != nil {
return nil, fmt.Errorf("invalid container format: version must be string")
}
if version != containerVersionTag {
return nil, fmt.Errorf("unsupported container version: %s", version)
}
if tokensNode.Kind() != datamodel.Kind_List {
return nil, fmt.Errorf("invalid container format: tokens must be a list")
}
ctn := make(Reader, tokensNode.Length())
it2 := tokensNode.ListIterator()
for !it2.Done() {
_, val, err := it2.Next()
if err != nil {
return nil, err
}
data, err := val.AsBytes()
if err != nil {
return nil, err
}
err = ctn.addToken(data)
if err != nil {
return nil, err
}
}
return ctn, nil
}
// GetToken returns an arbitrary decoded token, from its CID.
// If not found, ErrNotFound is returned.
func (ctn Reader) GetToken(cid cid.Cid) (token.Token, error) {
@@ -65,7 +130,7 @@ func (ctn Reader) GetAllDelegations() iter.Seq2[cid.Cid, *delegation.Token] {
// GetInvocation returns a single invocation.Token.
// If none are found, ErrNotFound is returned.
// If more than one invocation exist, ErrMultipleInvocations is returned.
// If more than one invocation exists, ErrMultipleInvocations is returned.
func (ctn Reader) GetInvocation() (*invocation.Token, error) {
var res *invocation.Token
for _, t := range ctn {
@@ -95,110 +160,6 @@ func (ctn Reader) GetAllInvocations() iter.Seq2[cid.Cid, *invocation.Token] {
}
}
// FromCbor decodes a DAG-CBOR encoded container.
func FromCbor(data []byte) (Reader, error) {
return FromCborReader(bytes.NewReader(data))
}
// FromCborReader is the same as FromCbor, but with an io.Reader.
func FromCborReader(r io.Reader) (Reader, error) {
n, err := ipld.DecodeStreaming(r, cbor.Decode)
if err != nil {
return nil, err
}
if n.Kind() != datamodel.Kind_Map {
return nil, fmt.Errorf("invalid container format: expected map")
}
if n.Length() != 1 {
return nil, fmt.Errorf("invalid container format: expected single version key")
}
// get the first (and only) key-value pair
it := n.MapIterator()
key, tokensNode, err := it.Next()
if err != nil {
return nil, err
}
version, err := key.AsString()
if err != nil {
return nil, fmt.Errorf("invalid container format: version must be string")
}
if version != currentContainerVersion {
return nil, fmt.Errorf("unsupported container version: %s", version)
}
if tokensNode.Kind() != datamodel.Kind_List {
return nil, fmt.Errorf("invalid container format: tokens must be a list")
}
ctn := make(Reader, tokensNode.Length())
it2 := tokensNode.ListIterator()
for !it2.Done() {
_, val, err := it2.Next()
if err != nil {
return nil, err
}
data, err := val.AsBytes()
if err != nil {
return nil, err
}
err = ctn.addToken(data)
if err != nil {
return nil, err
}
}
return ctn, nil
}
// FromCborBase64 decodes a base64 DAG-CBOR encoded container.
func FromCborBase64(data string) (Reader, error) {
return FromCborBase64Reader(strings.NewReader(data))
}
// FromCborBase64Reader is the same as FromCborBase64, but with an io.Reader.
func FromCborBase64Reader(r io.Reader) (Reader, error) {
return FromCborReader(base64.NewDecoder(base64.StdEncoding, r))
}
// FromCar decodes a CAR file encoded container.
func FromCar(data []byte) (Reader, error) {
return FromCarReader(bytes.NewReader(data))
}
// FromCarReader is the same as FromCar, but with an io.Reader.
func FromCarReader(r io.Reader) (Reader, error) {
_, it, err := readCar(r)
if err != nil {
return nil, err
}
ctn := make(Reader)
for block, err := range it {
if err != nil {
return nil, err
}
err = ctn.addToken(block.data)
if err != nil {
return nil, err
}
}
return ctn, nil
}
// FromCarBase64 decodes a base64 CAR file encoded container.
func FromCarBase64(data string) (Reader, error) {
return FromCarReader(strings.NewReader(data))
}
// FromCarBase64Reader is the same as FromCarBase64, but with an io.Reader.
func FromCarBase64Reader(r io.Reader) (Reader, error) {
return FromCarReader(base64.NewDecoder(base64.StdEncoding, r))
}
func (ctn Reader) addToken(data []byte) error {
tkn, c, err := token.FromSealed(data)
if err != nil {

View File

@@ -22,14 +22,22 @@ import (
func TestContainerRoundTrip(t *testing.T) {
for _, tc := range []struct {
name string
writer func(ctn Writer, w io.Writer) error
reader func(io.Reader) (Reader, error)
name string
expectedHeader header
writer any
}{
{"car", Writer.ToCarWriter, FromCarReader},
{"carBase64", Writer.ToCarBase64Writer, FromCarBase64Reader},
{"cbor", Writer.ToCborWriter, FromCborReader},
{"cborBase64", Writer.ToCborBase64Writer, FromCborBase64Reader},
{"Bytes", headerRawBytes, Writer.ToBytes},
{"BytesWriter", headerRawBytes, Writer.ToBytesWriter},
{"BytesGzipped", headerRawBytesGzip, Writer.ToBytesGzipped},
{"BytesGzippedWriter", headerRawBytesGzip, Writer.ToBytesGzippedWriter},
{"Base64StdPadding", headerBase64StdPadding, Writer.ToBase64StdPadding},
{"Base64StdPaddingWriter", headerBase64StdPadding, Writer.ToBase64StdPaddingWriter},
{"Base64StdPaddingGzipped", headerBase64StdPaddingGzip, Writer.ToBase64StdPaddingGzipped},
{"Base64StdPaddingGzippedWriter", headerBase64StdPaddingGzip, Writer.ToBase64StdPaddingGzippedWriter},
{"Base64URL", headerBase64URL, Writer.ToBase64URL},
{"Base64URLWriter", headerBase64URL, Writer.ToBase64URLWriter},
{"Base64URLGzip", headerBase64URLGzip, Writer.ToBase64URLGzip},
{"Base64URLGzipWriter", headerBase64URLGzip, Writer.ToBase64URLGzipWriter},
} {
t.Run(tc.name, func(t *testing.T) {
tokens := make(map[cid.Cid]*delegation.Token)
@@ -44,16 +52,48 @@ func TestContainerRoundTrip(t *testing.T) {
dataSize += len(data)
}
buf := bytes.NewBuffer(nil)
var reader Reader
var serialLen int
err := tc.writer(writer, buf)
require.NoError(t, err)
switch fn := tc.writer.(type) {
case func(ctn Writer, w io.Writer) error:
buf := bytes.NewBuffer(nil)
err := fn(writer, buf)
require.NoError(t, err)
serialLen = buf.Len()
t.Logf("data size %d", dataSize)
t.Logf("container overhead: %d%%, %d bytes", int(float32(buf.Len()-dataSize)/float32(dataSize)*100.0), buf.Len()-dataSize)
h, err := buf.ReadByte()
require.NoError(t, err)
require.Equal(t, byte(tc.expectedHeader), h)
err = buf.UnreadByte()
require.NoError(t, err)
reader, err := tc.reader(bytes.NewReader(buf.Bytes()))
require.NoError(t, err)
reader, err = FromReader(bytes.NewReader(buf.Bytes()))
require.NoError(t, err)
case func(ctn Writer) ([]byte, error):
b, err := fn(writer)
require.NoError(t, err)
serialLen = len(b)
require.Equal(t, byte(tc.expectedHeader), b[0])
reader, err = FromBytes(b)
require.NoError(t, err)
case func(ctn Writer) (string, error):
s, err := fn(writer)
require.NoError(t, err)
serialLen = len(s)
require.Equal(t, byte(tc.expectedHeader), s[0])
reader, err = FromString(s)
require.NoError(t, err)
}
t.Logf("data size %d, container size %d, overhead: %d%%, %d bytes",
dataSize, serialLen, int(float32(serialLen-dataSize)/float32(dataSize)*100.0), serialLen-dataSize)
for c, dlg := range tokens {
tknRead, err := reader.GetToken(c)
@@ -98,10 +138,12 @@ func BenchmarkContainerSerialisation(b *testing.B) {
writer func(ctn Writer, w io.Writer) error
reader func(io.Reader) (Reader, error)
}{
{"car", Writer.ToCarWriter, FromCarReader},
{"carBase64", Writer.ToCarBase64Writer, FromCarBase64Reader},
{"cbor", Writer.ToCborWriter, FromCborReader},
{"cborBase64", Writer.ToCborBase64Writer, FromCborBase64Reader},
{"Bytes", Writer.ToBytesWriter, FromReader},
{"BytesGzipped", Writer.ToBytesGzippedWriter, FromReader},
{"Base64StdPadding", Writer.ToBase64StdPaddingWriter, FromReader},
{"Base64StdPaddingGzipped", Writer.ToBase64StdPaddingGzippedWriter, FromReader},
{"Base64URL", Writer.ToBase64URLWriter, FromReader},
{"Base64URLGzip", Writer.ToBase64URLGzipWriter, FromReader},
} {
writer := NewWriter()
@@ -184,7 +226,7 @@ func FuzzContainerRead(f *testing.F) {
_, c, data := randToken()
writer.AddSealed(c, data)
}
data, err := writer.ToCbor()
data, err := writer.ToBytes()
require.NoError(f, err)
f.Add(data)
@@ -194,7 +236,7 @@ func FuzzContainerRead(f *testing.F) {
start := time.Now()
// search for panics
_, _ = FromCbor(data)
_, _ = FromBytes(data)
if time.Since(start) > 100*time.Millisecond {
panic("too long")

View File

@@ -2,7 +2,6 @@ package container
import (
"bytes"
"encoding/base64"
"io"
"github.com/ipfs/go-cid"
@@ -25,22 +24,92 @@ func (ctn Writer) AddSealed(cid cid.Cid, data []byte) {
ctn[cid] = data
}
const currentContainerVersion = "ctn-v1"
// ToBytes encode the container into raw bytes.
func (ctn Writer) ToBytes() ([]byte, error) {
return ctn.toBytes(headerRawBytes)
}
// ToCbor encode the container into a CBOR binary format.
func (ctn Writer) ToCbor() ([]byte, error) {
// ToBytesWriter is the same as ToBytes, but with an io.Writer.
func (ctn Writer) ToBytesWriter(w io.Writer) error {
return ctn.toWriter(headerRawBytes, w)
}
// ToBytesGzipped encode the container into gzipped bytes.
func (ctn Writer) ToBytesGzipped() ([]byte, error) {
return ctn.toBytes(headerRawBytesGzip)
}
// ToBytesGzippedWriter is the same as ToBytesGzipped, but with an io.Writer.
func (ctn Writer) ToBytesGzippedWriter(w io.Writer) error {
return ctn.toWriter(headerRawBytesGzip, w)
}
// ToBase64StdPadding encode the container into a base64 string, with standard encoding and padding.
func (ctn Writer) ToBase64StdPadding() (string, error) {
return ctn.toString(headerBase64StdPadding)
}
// ToBase64StdPaddingWriter is the same as ToBase64StdPadding, but with an io.Writer.
func (ctn Writer) ToBase64StdPaddingWriter(w io.Writer) error {
return ctn.toWriter(headerBase64StdPadding, w)
}
// ToBase64StdPaddingGzipped encode the container into a pre-gzipped base64 string, with standard encoding and padding.
func (ctn Writer) ToBase64StdPaddingGzipped() (string, error) {
return ctn.toString(headerBase64StdPaddingGzip)
}
// ToBase64StdPaddingGzippedWriter is the same as ToBase64StdPaddingGzipped, but with an io.Writer.
func (ctn Writer) ToBase64StdPaddingGzippedWriter(w io.Writer) error {
return ctn.toWriter(headerBase64StdPaddingGzip, w)
}
// ToBase64URL encode the container into base64 string, with URL-safe encoding and no padding.
func (ctn Writer) ToBase64URL() (string, error) {
return ctn.toString(headerBase64URL)
}
// ToBase64URLWriter is the same as ToBase64URL, but with an io.Writer.
func (ctn Writer) ToBase64URLWriter(w io.Writer) error {
return ctn.toWriter(headerBase64URL, w)
}
// ToBase64URL encode the container into pre-gzipped base64 string, with URL-safe encoding and no padding.
func (ctn Writer) ToBase64URLGzip() (string, error) {
return ctn.toString(headerBase64URLGzip)
}
// ToBase64URLWriter is the same as ToBase64URL, but with an io.Writer.
func (ctn Writer) ToBase64URLGzipWriter(w io.Writer) error {
return ctn.toWriter(headerBase64URLGzip, w)
}
func (ctn Writer) toBytes(header header) ([]byte, error) {
var buf bytes.Buffer
err := ctn.ToCborWriter(&buf)
err := ctn.toWriter(header, &buf)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// ToCborWriter is the same as ToCbor, but with an io.Writer.
func (ctn Writer) ToCborWriter(w io.Writer) error {
func (ctn Writer) toString(header header) (string, error) {
var buf bytes.Buffer
err := ctn.toWriter(header, &buf)
if err != nil {
return "", err
}
return buf.String(), nil
}
func (ctn Writer) toWriter(header header, w io.Writer) (err error) {
encoder := header.encoder(w)
defer func() {
err = encoder.Close()
}()
node, err := qp.BuildMap(basicnode.Prototype.Any, 1, func(ma datamodel.MapAssembler) {
qp.MapEntry(ma, currentContainerVersion, qp.List(int64(len(ctn)), func(la datamodel.ListAssembler) {
qp.MapEntry(ma, containerVersionTag, qp.List(int64(len(ctn)), func(la datamodel.ListAssembler) {
for _, data := range ctn {
qp.ListEntry(la, qp.Bytes(data))
}
@@ -49,60 +118,6 @@ func (ctn Writer) ToCborWriter(w io.Writer) error {
if err != nil {
return err
}
return ipld.EncodeStreaming(w, node, cbor.Encode)
}
// ToCborBase64 encode the container into a base64 encoded CBOR binary format.
func (ctn Writer) ToCborBase64() (string, error) {
var buf bytes.Buffer
err := ctn.ToCborBase64Writer(&buf)
if err != nil {
return "", err
}
return buf.String(), nil
}
// ToCborBase64Writer is the same as ToCborBase64, but with an io.Writer.
func (ctn Writer) ToCborBase64Writer(w io.Writer) error {
w2 := base64.NewEncoder(base64.StdEncoding, w)
defer w2.Close()
return ctn.ToCborWriter(w2)
}
// ToCar encode the container into a CAR file.
func (ctn Writer) ToCar() ([]byte, error) {
var buf bytes.Buffer
err := ctn.ToCarWriter(&buf)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// ToCarWriter is the same as ToCar, but with an io.Writer.
func (ctn Writer) ToCarWriter(w io.Writer) error {
return writeCar(w, nil, func(yield func(carBlock, error) bool) {
for c, data := range ctn {
if !yield(carBlock{c: c, data: data}, nil) {
return
}
}
})
}
// ToCarBase64 encode the container into a base64 encoded CAR file.
func (ctn Writer) ToCarBase64() (string, error) {
var buf bytes.Buffer
err := ctn.ToCarBase64Writer(&buf)
if err != nil {
return "", err
}
return buf.String(), nil
}
// ToCarBase64Writer is the same as ToCarBase64, but with an io.Writer.
func (ctn Writer) ToCarBase64Writer(w io.Writer) error {
w2 := base64.NewEncoder(base64.StdEncoding, w)
defer w2.Close()
return ctn.ToCarWriter(w2)
return ipld.EncodeStreaming(encoder, node, cbor.Encode)
}