diff --git a/pkg/container/SPEC.md b/pkg/container/SPEC.md index be15c35..6c8fe60 100644 --- a/pkg/container/SPEC.md +++ b/pkg/container/SPEC.md @@ -52,14 +52,14 @@ The following base encoding combination are REQUIRED to be supported: The CBOR bytes MUST be prepended by a single byte header to indicate the selection combination of base encoding and compression. This header value MUST be set according to the following table: -| Header as hex | Header as ASCII | Base encoding | Compression | -|---------------|-----------------|--------------------------|----------------| -| 0x40 | @ | raw bytes | no compression | -| 0x42 | B | base64 std padding | no compression | -| 0x43 | C | base64 url | no compression | -| 0x4D | M | raw bytes | gzip | -| 0x4F | O | base64 std padding | gzip | -| 0x50 | P | base64 url | gzip | +| Header as hex | Header as ASCII | Base encoding | Compression | +|---------------|-----------------|-------------------------|----------------| +| 0x40 | @ | raw bytes | no compression | +| 0x42 | B | base64 std padding | no compression | +| 0x43 | C | base64 url (no padding) | no compression | +| 0x4D | M | raw bytes | gzip | +| 0x4F | O | base64 std padding | gzip | +| 0x50 | P | base64 url (no padding) | gzip | # 3 FAQ diff --git a/pkg/container/packaging.go b/pkg/container/packaging.go new file mode 100644 index 0000000..f1a8854 --- /dev/null +++ b/pkg/container/packaging.go @@ -0,0 +1,118 @@ +package container + +import ( + "compress/gzip" + "encoding/base64" + "errors" + "fmt" + "io" +) + +const containerVersionTag = "ctn-v1" + +type header byte + +const ( + headerRawBytes = header(0x40) + headerBase64StdPadding = header(0x42) + headerBase64URL = header(0x43) + headerRawBytesGzip = header(0x4D) + headerBase64StdPaddingGzip = header(0x4F) + headerBase64URLGzip = header(0x50) +) + +func (h header) encoder(w io.Writer) *payloadWriter { + res := &payloadWriter{rawWriter: w, writer: w, header: h} + + switch h { + case headerBase64StdPadding, headerBase64StdPaddingGzip: + b64Writer := base64.NewEncoder(base64.StdEncoding, res.writer) + res.writer = b64Writer + res.closers = append([]io.Closer{b64Writer}, res.closers...) + case headerBase64URL, headerBase64URLGzip: + b64Writer := base64.NewEncoder(base64.RawURLEncoding, res.writer) + res.writer = b64Writer + res.closers = append([]io.Closer{b64Writer}, res.closers...) + } + + switch h { + case headerRawBytesGzip, headerBase64StdPaddingGzip, headerBase64URLGzip: + gzipWriter := gzip.NewWriter(res.writer) + res.writer = gzipWriter + res.closers = append([]io.Closer{gzipWriter}, res.closers...) + } + + return res +} + +func decodePayload(r io.Reader) (io.Reader, error) { + headerBuf := make([]byte, 1) + _, err := r.Read(headerBuf) + if err != nil { + return nil, err + } + h := header(headerBuf[0]) + + switch h { + case headerRawBytes, + headerBase64StdPadding, + headerBase64URL, + headerRawBytesGzip, + headerBase64StdPaddingGzip, + headerBase64URLGzip: + default: + return nil, fmt.Errorf("unknown container header") + } + + switch h { + case headerBase64StdPadding, headerBase64StdPaddingGzip: + r = base64.NewDecoder(base64.StdEncoding, r) + case headerBase64URL, headerBase64URLGzip: + r = base64.NewDecoder(base64.RawURLEncoding, r) + } + + switch h { + case headerRawBytesGzip, headerBase64StdPaddingGzip, headerBase64URLGzip: + gzipReader, err := gzip.NewReader(r) + if err != nil { + return nil, err + } + r = gzipReader + } + + return r, nil +} + +var _ io.WriteCloser = &payloadWriter{} + +// payloadWriter is tasked with two things: +// - prepend the header byte +// - call Close() on all the underlying io.Writer +type payloadWriter struct { + rawWriter io.Writer + writer io.Writer + header header + headerWrote bool + closers []io.Closer +} + +func (w *payloadWriter) Write(p []byte) (n int, err error) { + if !w.headerWrote { + _, err := w.rawWriter.Write([]byte{byte(w.header)}) + if err != nil { + return 0, err + } + w.headerWrote = true + } + return w.writer.Write(p) +} + +func (w *payloadWriter) Close() error { + var errs error + for _, closer := range w.closers { + if err := closer.Close(); err != nil { + errs = errors.Join(errs, err) + } + } + return errs +} diff --git a/pkg/container/reader.go b/pkg/container/reader.go index 431acf7..37d71d4 100644 --- a/pkg/container/reader.go +++ b/pkg/container/reader.go @@ -2,7 +2,6 @@ package container import ( "bytes" - "encoding/base64" "errors" "fmt" "io" @@ -25,6 +24,72 @@ var ErrMultipleInvocations = fmt.Errorf("multiple invocations") // Reader is a token container reader. It exposes the tokens conveniently decoded. type Reader map[cid.Cid]token.Token +// FromBytes decodes a container from a []byte +func FromBytes(data []byte) (Reader, error) { + return FromReader(bytes.NewReader(data)) +} + +// FromString decodes a container from a string +func FromString(s string) (Reader, error) { + return FromReader(strings.NewReader(s)) +} + +// FromReader decodes a container from an io.Reader. +func FromReader(r io.Reader) (Reader, error) { + payload, err := decodePayload(r) + if err != nil { + return nil, err + } + + n, err := ipld.DecodeStreaming(payload, cbor.Decode) + if err != nil { + return nil, err + } + if n.Kind() != datamodel.Kind_Map { + return nil, fmt.Errorf("invalid container format: expected map") + } + if n.Length() != 1 { + return nil, fmt.Errorf("invalid container format: expected single version key") + } + + // get the first (and only) key-value pair + it := n.MapIterator() + key, tokensNode, err := it.Next() + if err != nil { + return nil, err + } + + version, err := key.AsString() + if err != nil { + return nil, fmt.Errorf("invalid container format: version must be string") + } + if version != containerVersionTag { + return nil, fmt.Errorf("unsupported container version: %s", version) + } + + if tokensNode.Kind() != datamodel.Kind_List { + return nil, fmt.Errorf("invalid container format: tokens must be a list") + } + + ctn := make(Reader, tokensNode.Length()) + it2 := tokensNode.ListIterator() + for !it2.Done() { + _, val, err := it2.Next() + if err != nil { + return nil, err + } + data, err := val.AsBytes() + if err != nil { + return nil, err + } + err = ctn.addToken(data) + if err != nil { + return nil, err + } + } + return ctn, nil +} + // GetToken returns an arbitrary decoded token, from its CID. // If not found, ErrNotFound is returned. func (ctn Reader) GetToken(cid cid.Cid) (token.Token, error) { @@ -65,7 +130,7 @@ func (ctn Reader) GetAllDelegations() iter.Seq2[cid.Cid, *delegation.Token] { // GetInvocation returns a single invocation.Token. // If none are found, ErrNotFound is returned. -// If more than one invocation exist, ErrMultipleInvocations is returned. +// If more than one invocation exists, ErrMultipleInvocations is returned. func (ctn Reader) GetInvocation() (*invocation.Token, error) { var res *invocation.Token for _, t := range ctn { @@ -95,110 +160,6 @@ func (ctn Reader) GetAllInvocations() iter.Seq2[cid.Cid, *invocation.Token] { } } -// FromCbor decodes a DAG-CBOR encoded container. -func FromCbor(data []byte) (Reader, error) { - return FromCborReader(bytes.NewReader(data)) -} - -// FromCborReader is the same as FromCbor, but with an io.Reader. -func FromCborReader(r io.Reader) (Reader, error) { - n, err := ipld.DecodeStreaming(r, cbor.Decode) - if err != nil { - return nil, err - } - if n.Kind() != datamodel.Kind_Map { - return nil, fmt.Errorf("invalid container format: expected map") - } - if n.Length() != 1 { - return nil, fmt.Errorf("invalid container format: expected single version key") - } - - // get the first (and only) key-value pair - it := n.MapIterator() - key, tokensNode, err := it.Next() - if err != nil { - return nil, err - } - - version, err := key.AsString() - if err != nil { - return nil, fmt.Errorf("invalid container format: version must be string") - } - if version != currentContainerVersion { - return nil, fmt.Errorf("unsupported container version: %s", version) - } - - if tokensNode.Kind() != datamodel.Kind_List { - return nil, fmt.Errorf("invalid container format: tokens must be a list") - } - - ctn := make(Reader, tokensNode.Length()) - it2 := tokensNode.ListIterator() - for !it2.Done() { - _, val, err := it2.Next() - if err != nil { - return nil, err - } - data, err := val.AsBytes() - if err != nil { - return nil, err - } - err = ctn.addToken(data) - if err != nil { - return nil, err - } - } - return ctn, nil -} - -// FromCborBase64 decodes a base64 DAG-CBOR encoded container. -func FromCborBase64(data string) (Reader, error) { - return FromCborBase64Reader(strings.NewReader(data)) -} - -// FromCborBase64Reader is the same as FromCborBase64, but with an io.Reader. -func FromCborBase64Reader(r io.Reader) (Reader, error) { - return FromCborReader(base64.NewDecoder(base64.StdEncoding, r)) -} - -// FromCar decodes a CAR file encoded container. -func FromCar(data []byte) (Reader, error) { - return FromCarReader(bytes.NewReader(data)) -} - -// FromCarReader is the same as FromCar, but with an io.Reader. -func FromCarReader(r io.Reader) (Reader, error) { - _, it, err := readCar(r) - if err != nil { - return nil, err - } - - ctn := make(Reader) - - for block, err := range it { - if err != nil { - return nil, err - } - - err = ctn.addToken(block.data) - if err != nil { - return nil, err - } - } - - return ctn, nil -} - -// FromCarBase64 decodes a base64 CAR file encoded container. -func FromCarBase64(data string) (Reader, error) { - return FromCarReader(strings.NewReader(data)) -} - -// FromCarBase64Reader is the same as FromCarBase64, but with an io.Reader. -func FromCarBase64Reader(r io.Reader) (Reader, error) { - return FromCarReader(base64.NewDecoder(base64.StdEncoding, r)) -} - func (ctn Reader) addToken(data []byte) error { tkn, c, err := token.FromSealed(data) if err != nil { diff --git a/pkg/container/serial_test.go b/pkg/container/serial_test.go index 3894e7a..37ed3f7 100644 --- a/pkg/container/serial_test.go +++ b/pkg/container/serial_test.go @@ -22,14 +22,22 @@ import ( func TestContainerRoundTrip(t *testing.T) { for _, tc := range []struct { - name string - writer func(ctn Writer, w io.Writer) error - reader func(io.Reader) (Reader, error) + name string + expectedHeader header + writer any }{ - {"car", Writer.ToCarWriter, FromCarReader}, - {"carBase64", Writer.ToCarBase64Writer, FromCarBase64Reader}, - {"cbor", Writer.ToCborWriter, FromCborReader}, - {"cborBase64", Writer.ToCborBase64Writer, FromCborBase64Reader}, + {"Bytes", headerRawBytes, Writer.ToBytes}, + {"BytesWriter", headerRawBytes, Writer.ToBytesWriter}, + {"BytesGzipped", headerRawBytesGzip, Writer.ToBytesGzipped}, + {"BytesGzippedWriter", headerRawBytesGzip, Writer.ToBytesGzippedWriter}, + {"Base64StdPadding", headerBase64StdPadding, Writer.ToBase64StdPadding}, + {"Base64StdPaddingWriter", headerBase64StdPadding, Writer.ToBase64StdPaddingWriter}, + {"Base64StdPaddingGzipped", headerBase64StdPaddingGzip, Writer.ToBase64StdPaddingGzipped}, + {"Base64StdPaddingGzippedWriter", headerBase64StdPaddingGzip, Writer.ToBase64StdPaddingGzippedWriter}, + {"Base64URL", headerBase64URL, Writer.ToBase64URL}, + {"Base64URLWriter", headerBase64URL, Writer.ToBase64URLWriter}, + {"Base64URLGzip", headerBase64URLGzip, Writer.ToBase64URLGzip}, + {"Base64URLGzipWriter", headerBase64URLGzip, Writer.ToBase64URLGzipWriter}, } { t.Run(tc.name, func(t *testing.T) { tokens := make(map[cid.Cid]*delegation.Token) @@ -44,16 +52,48 @@ func TestContainerRoundTrip(t *testing.T) { dataSize += len(data) } - buf := bytes.NewBuffer(nil) + var reader Reader + var serialLen int - err := tc.writer(writer, buf) - require.NoError(t, err) + switch fn := tc.writer.(type) { + case func(ctn Writer, w io.Writer) error: + buf := bytes.NewBuffer(nil) + err := fn(writer, buf) + require.NoError(t, err) + serialLen = buf.Len() - t.Logf("data size %d", dataSize) - t.Logf("container overhead: %d%%, %d bytes", int(float32(buf.Len()-dataSize)/float32(dataSize)*100.0), buf.Len()-dataSize) + h, err := buf.ReadByte() + require.NoError(t, err) + require.Equal(t, byte(tc.expectedHeader), h) + err = buf.UnreadByte() + require.NoError(t, err) - reader, err := tc.reader(bytes.NewReader(buf.Bytes())) - require.NoError(t, err) + reader, err = FromReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + + case func(ctn Writer) ([]byte, error): + b, err := fn(writer) + require.NoError(t, err) + serialLen = len(b) + + require.Equal(t, byte(tc.expectedHeader), b[0]) + + reader, err = FromBytes(b) + require.NoError(t, err) + + case func(ctn Writer) (string, error): + s, err := fn(writer) + require.NoError(t, err) + serialLen = len(s) + + require.Equal(t, byte(tc.expectedHeader), s[0]) + + reader, err = FromString(s) + require.NoError(t, err) + } + + t.Logf("data size %d, container size %d, overhead: %d%%, %d bytes", + dataSize, serialLen, int(float32(serialLen-dataSize)/float32(dataSize)*100.0), serialLen-dataSize) for c, dlg := range tokens { tknRead, err := reader.GetToken(c) @@ -98,10 +138,12 @@ func BenchmarkContainerSerialisation(b *testing.B) { writer func(ctn Writer, w io.Writer) error reader func(io.Reader) (Reader, error) }{ - {"car", Writer.ToCarWriter, FromCarReader}, - {"carBase64", Writer.ToCarBase64Writer, FromCarBase64Reader}, - {"cbor", Writer.ToCborWriter, FromCborReader}, - {"cborBase64", Writer.ToCborBase64Writer, FromCborBase64Reader}, + {"Bytes", Writer.ToBytesWriter, FromReader}, + {"BytesGzipped", Writer.ToBytesGzippedWriter, FromReader}, + {"Base64StdPadding", Writer.ToBase64StdPaddingWriter, FromReader}, + {"Base64StdPaddingGzipped", Writer.ToBase64StdPaddingGzippedWriter, FromReader}, + {"Base64URL", Writer.ToBase64URLWriter, FromReader}, + {"Base64URLGzip", Writer.ToBase64URLGzipWriter, FromReader}, } { writer := NewWriter() @@ -184,7 +226,7 @@ func FuzzContainerRead(f *testing.F) { _, c, data := randToken() writer.AddSealed(c, data) } - data, err := writer.ToCbor() + data, err := writer.ToBytes() require.NoError(f, err) f.Add(data) @@ -194,7 +236,7 @@ func FuzzContainerRead(f *testing.F) { start := time.Now() // search for panics - _, _ = FromCbor(data) + _, _ = FromBytes(data) if time.Since(start) > 100*time.Millisecond { panic("too long") diff --git a/pkg/container/writer.go b/pkg/container/writer.go index 60a148b..6f366b8 100644 --- a/pkg/container/writer.go +++ b/pkg/container/writer.go @@ -2,7 +2,6 @@ package container import ( "bytes" - "encoding/base64" "io" "github.com/ipfs/go-cid" @@ -25,22 +24,92 @@ func (ctn Writer) AddSealed(cid cid.Cid, data []byte) { ctn[cid] = data } -const currentContainerVersion = "ctn-v1" +// ToBytes encode the container into raw bytes. +func (ctn Writer) ToBytes() ([]byte, error) { + return ctn.toBytes(headerRawBytes) +} -// ToCbor encode the container into a CBOR binary format. -func (ctn Writer) ToCbor() ([]byte, error) { +// ToBytesWriter is the same as ToBytes, but with an io.Writer. +func (ctn Writer) ToBytesWriter(w io.Writer) error { + return ctn.toWriter(headerRawBytes, w) +} + +// ToBytesGzipped encode the container into gzipped bytes. +func (ctn Writer) ToBytesGzipped() ([]byte, error) { + return ctn.toBytes(headerRawBytesGzip) +} + +// ToBytesGzippedWriter is the same as ToBytesGzipped, but with an io.Writer. +func (ctn Writer) ToBytesGzippedWriter(w io.Writer) error { + return ctn.toWriter(headerRawBytesGzip, w) +} + +// ToBase64StdPadding encode the container into a base64 string, with standard encoding and padding. +func (ctn Writer) ToBase64StdPadding() (string, error) { + return ctn.toString(headerBase64StdPadding) +} + +// ToBase64StdPaddingWriter is the same as ToBase64StdPadding, but with an io.Writer. +func (ctn Writer) ToBase64StdPaddingWriter(w io.Writer) error { + return ctn.toWriter(headerBase64StdPadding, w) +} + +// ToBase64StdPaddingGzipped encode the container into a pre-gzipped base64 string, with standard encoding and padding. +func (ctn Writer) ToBase64StdPaddingGzipped() (string, error) { + return ctn.toString(headerBase64StdPaddingGzip) +} + +// ToBase64StdPaddingGzippedWriter is the same as ToBase64StdPaddingGzipped, but with an io.Writer. +func (ctn Writer) ToBase64StdPaddingGzippedWriter(w io.Writer) error { + return ctn.toWriter(headerBase64StdPaddingGzip, w) +} + +// ToBase64URL encode the container into base64 string, with URL-safe encoding and no padding. +func (ctn Writer) ToBase64URL() (string, error) { + return ctn.toString(headerBase64URL) +} + +// ToBase64URLWriter is the same as ToBase64URL, but with an io.Writer. +func (ctn Writer) ToBase64URLWriter(w io.Writer) error { + return ctn.toWriter(headerBase64URL, w) +} + +// ToBase64URL encode the container into pre-gzipped base64 string, with URL-safe encoding and no padding. +func (ctn Writer) ToBase64URLGzip() (string, error) { + return ctn.toString(headerBase64URLGzip) +} + +// ToBase64URLWriter is the same as ToBase64URL, but with an io.Writer. +func (ctn Writer) ToBase64URLGzipWriter(w io.Writer) error { + return ctn.toWriter(headerBase64URLGzip, w) +} + +func (ctn Writer) toBytes(header header) ([]byte, error) { var buf bytes.Buffer - err := ctn.ToCborWriter(&buf) + err := ctn.toWriter(header, &buf) if err != nil { return nil, err } return buf.Bytes(), nil } -// ToCborWriter is the same as ToCbor, but with an io.Writer. -func (ctn Writer) ToCborWriter(w io.Writer) error { +func (ctn Writer) toString(header header) (string, error) { + var buf bytes.Buffer + err := ctn.toWriter(header, &buf) + if err != nil { + return "", err + } + return buf.String(), nil +} + +func (ctn Writer) toWriter(header header, w io.Writer) (err error) { + encoder := header.encoder(w) + + defer func() { + err = encoder.Close() + }() node, err := qp.BuildMap(basicnode.Prototype.Any, 1, func(ma datamodel.MapAssembler) { - qp.MapEntry(ma, currentContainerVersion, qp.List(int64(len(ctn)), func(la datamodel.ListAssembler) { + qp.MapEntry(ma, containerVersionTag, qp.List(int64(len(ctn)), func(la datamodel.ListAssembler) { for _, data := range ctn { qp.ListEntry(la, qp.Bytes(data)) } @@ -49,60 +118,6 @@ func (ctn Writer) ToCborWriter(w io.Writer) error { if err != nil { return err } - return ipld.EncodeStreaming(w, node, cbor.Encode) -} -// ToCborBase64 encode the container into a base64 encoded CBOR binary format. -func (ctn Writer) ToCborBase64() (string, error) { - var buf bytes.Buffer - err := ctn.ToCborBase64Writer(&buf) - if err != nil { - return "", err - } - return buf.String(), nil -} - -// ToCborBase64Writer is the same as ToCborBase64, but with an io.Writer. -func (ctn Writer) ToCborBase64Writer(w io.Writer) error { - w2 := base64.NewEncoder(base64.StdEncoding, w) - defer w2.Close() - return ctn.ToCborWriter(w2) -} - -// ToCar encode the container into a CAR file. -func (ctn Writer) ToCar() ([]byte, error) { - var buf bytes.Buffer - err := ctn.ToCarWriter(&buf) - if err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -// ToCarWriter is the same as ToCar, but with an io.Writer. -func (ctn Writer) ToCarWriter(w io.Writer) error { - return writeCar(w, nil, func(yield func(carBlock, error) bool) { - for c, data := range ctn { - if !yield(carBlock{c: c, data: data}, nil) { - return - } - } - }) -} - -// ToCarBase64 encode the container into a base64 encoded CAR file. -func (ctn Writer) ToCarBase64() (string, error) { - var buf bytes.Buffer - err := ctn.ToCarBase64Writer(&buf) - if err != nil { - return "", err - } - return buf.String(), nil -} - -// ToCarBase64Writer is the same as ToCarBase64, but with an io.Writer. -func (ctn Writer) ToCarBase64Writer(w io.Writer) error { - w2 := base64.NewEncoder(base64.StdEncoding, w) - defer w2.Close() - return ctn.ToCarWriter(w2) + return ipld.EncodeStreaming(encoder, node, cbor.Encode) }