From f7b4b48791087c99307eb065302a574215e7671b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Mur=C3=A9?= Date: Tue, 1 Oct 2024 10:33:38 +0200 Subject: [PATCH] container: more experiments --- pkg/container/container.go | 99 +++---------- pkg/container/container_test.go | 60 -------- pkg/container/serialization.go | 216 ++++++++++++++++++++++++++++ pkg/container/serialization_test.go | 175 ++++++++++++++++++++++ 4 files changed, 411 insertions(+), 139 deletions(-) delete mode 100644 pkg/container/container_test.go create mode 100644 pkg/container/serialization.go create mode 100644 pkg/container/serialization_test.go diff --git a/pkg/container/container.go b/pkg/container/container.go index 58e3f87..472b8c5 100644 --- a/pkg/container/container.go +++ b/pkg/container/container.go @@ -1,72 +1,24 @@ package container import ( - "encoding/base64" - "io" + "fmt" "github.com/ipfs/go-cid" - cbor "github.com/ipfs/go-ipld-cbor" - "github.com/multiformats/go-multihash" + + "github.com/ucan-wg/go-ucan/token" + "github.com/ucan-wg/go-ucan/token/delegation" ) // TODO: should the invocation being set as root in the car file? +var ErrNotFound = fmt.Errorf("not found") + type Container map[cid.Cid][]byte func New() Container { return make(Container) } -func FromCar(r io.Reader) (Container, error) { - _, it, err := readCar(r) - if err != nil { - return nil, err - } - - c := New() - - for block, err := range it { - if err != nil { - return nil, err - } - c[block.c] = block.data - } - - return c, nil -} - -func FromCarBase64(r io.Reader) (Container, error) { - return FromCar(base64.NewDecoder(base64.StdEncoding, r)) -} - -func FromCbor(r io.Reader) (Container, error) { - var raw [][]byte - err := cbor.DecodeReader(r, &raw) - if err != nil { - return nil, err - } - - // TODO: the CID computation will likely be handled in the envelope - // TODO: the envelope should likely be able to deserialize arbitrary types based on the tag value - // TODO: the container should likely expose the decoded token, and have search methods (simple, but also DAG reconstruction, graph path search) - var cidBuilder = cid.V1Builder{Codec: cid.DagCBOR, MhType: multihash.SHA2_256} - - ctn := make(Container, len(raw)) - for _, bytes := range raw { - c, err := cidBuilder.Sum(bytes) - if err != nil { - return nil, err - } - ctn[c] = bytes - } - - return ctn, nil -} - -func FromCborBase64(r io.Reader) (Container, error) { - return FromCbor(base64.NewDecoder(base64.StdEncoding, r)) -} - func (ctn Container) AddBytes(cid cid.Cid, data []byte) { ctn[cid] = data } @@ -76,32 +28,21 @@ func (ctn Container) GetBytes(cid cid.Cid) ([]byte, bool) { return b, ok } -func (ctn Container) ToCar(w io.Writer) error { - return writeCar(w, nil, func(yield func(carBlock) bool) { - for c, bytes := range ctn { - if !yield(carBlock{c: c, data: bytes}) { - return - } - } - }) -} - -func (ctn Container) ToCarBase64(w io.Writer) error { - w2 := base64.NewEncoder(base64.StdEncoding, w) - defer w2.Close() - return ctn.ToCar(w2) -} - -func (ctn Container) ToCbor(w io.Writer) error { - raw := make([][]byte, 0, len(ctn)) - for _, bytes := range ctn { - raw = append(raw, bytes) +func (ctn Container) GetToken(cid cid.Cid) (token.Token, error) { + b, ok := ctn[cid] + if !ok { + return nil, ErrNotFound } - return cbor.EncodeWriter(raw, w) + return token.FromDagCbor(b) } -func (ctn Container) ToCborBase64(w io.Writer) error { - w2 := base64.NewEncoder(base64.StdEncoding, w) - defer w2.Close() - return ctn.ToCbor(w2) +func (ctn Container) GetDelegation(cid cid.Cid) (*delegation.Token, error) { + tkn, err := ctn.GetToken(cid) + if err != nil { + return nil, err + } + if tkn, ok := tkn.(*delegation.Token); ok { + return tkn, nil + } + return nil, fmt.Errorf("not a delegation token") } diff --git a/pkg/container/container_test.go b/pkg/container/container_test.go deleted file mode 100644 index b294b43..0000000 --- a/pkg/container/container_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package container - -import ( - "bytes" - "crypto/rand" - "io" - "testing" - - "github.com/ipfs/go-cid" - mh "github.com/multiformats/go-multihash" - "github.com/stretchr/testify/require" -) - -func TestContainerRoundTrip(t *testing.T) { - for _, tc := range []struct { - name string - writer func(ctn Container, w io.Writer) error - reader func(io.Reader) (Container, error) - }{ - {"carBytes", Container.ToCar, FromCar}, - {"carBase64", Container.ToCarBase64, FromCarBase64}, - {"cbor", Container.ToCbor, FromCbor}, - {"cborBase64", Container.ToCborBase64, FromCborBase64}, - } { - t.Run(tc.name, func(t *testing.T) { - ctn := New() - - builder := cid.V1Builder{Codec: cid.DagCBOR, MhType: mh.SHA2_256} - - var dataSize int - - for i := 0; i < 10; i++ { - data := randBytes(32) - c, err := builder.Sum(data) - require.NoError(t, err) - ctn.AddBytes(c, data) - dataSize += len(data) - } - - buf := bytes.NewBuffer(nil) - - err := tc.writer(ctn, buf) - require.NoError(t, err) - - t.Logf("data size %d", dataSize) - t.Logf("container overhead: %d%%, %d bytes", int(float32(buf.Len()-dataSize)/float32(dataSize)*100.0), buf.Len()-dataSize) - - ctn2, err := tc.reader(bytes.NewReader(buf.Bytes())) - require.NoError(t, err) - - require.Equal(t, ctn, ctn2) - }) - } -} - -func randBytes(n int) []byte { - b := make([]byte, n) - _, _ = rand.Read(b) - return b -} diff --git a/pkg/container/serialization.go b/pkg/container/serialization.go new file mode 100644 index 0000000..c802891 --- /dev/null +++ b/pkg/container/serialization.go @@ -0,0 +1,216 @@ +package container + +import ( + "compress/flate" + "compress/gzip" + "encoding/base64" + "fmt" + "io" + + "github.com/ipfs/go-cid" + cbor "github.com/ipfs/go-ipld-cbor" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec/dagcbor" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/fluent/qp" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/multiformats/go-multihash" +) + +func FromCar(r io.Reader) (Container, error) { + _, it, err := readCar(r) + if err != nil { + return nil, err + } + + c := New() + + for block, err := range it { + if err != nil { + return nil, err + } + c[block.c] = block.data + } + + return c, nil +} + +func (ctn Container) ToCar(w io.Writer) error { + return writeCar(w, nil, func(yield func(carBlock) bool) { + for c, bytes := range ctn { + if !yield(carBlock{c: c, data: bytes}) { + return + } + } + }) +} + +func FromCarBase64(r io.Reader) (Container, error) { + return FromCar(base64.NewDecoder(base64.StdEncoding, r)) +} + +func (ctn Container) ToCarBase64(w io.Writer) error { + w2 := base64.NewEncoder(base64.StdEncoding, w) + defer w2.Close() + return ctn.ToCar(w2) +} + +func FromCarGzip(r io.Reader) (Container, error) { + r2, err := gzip.NewReader(r) + if err != nil { + return nil, err + } + defer r2.Close() + return FromCar(r2) +} + +func (ctn Container) ToCarGzip(w io.Writer) error { + w2 := gzip.NewWriter(w) + defer w2.Close() + return ctn.ToCar(w2) +} + +func FromCarGzipBase64(r io.Reader) (Container, error) { + return FromCarGzip(base64.NewDecoder(base64.StdEncoding, r)) +} + +func (ctn Container) ToCarGzipBase64(w io.Writer) error { + w2 := base64.NewEncoder(base64.StdEncoding, w) + defer w2.Close() + return ctn.ToCarGzip(w2) +} + +func FromCbor(r io.Reader) (Container, error) { + var raw [][]byte + err := cbor.DecodeReader(r, &raw) + if err != nil { + return nil, err + } + + // TODO: the CID computation will likely be handled in the envelope + // TODO: the envelope should likely be able to deserialize arbitrary types based on the tag value + // TODO: the container should likely expose the decoded token, and have search methods (simple, but also DAG reconstruction, graph path search) + cidBuilder := cid.V1Builder{Codec: cid.DagCBOR, MhType: multihash.SHA2_256} + + ctn := make(Container, len(raw)) + for _, bytes := range raw { + c, err := cidBuilder.Sum(bytes) + if err != nil { + return nil, err + } + ctn[c] = bytes + } + + return ctn, nil +} + +func FromCbor2(r io.Reader) (Container, error) { + n, err := ipld.DecodeStreaming(r, dagcbor.Decode) + if err != nil { + return nil, err + } + if n.Kind() != datamodel.Kind_List { + return nil, fmt.Errorf("not a list") + } + + ctn := make(Container, n.Length()) + cidBuilder := cid.V1Builder{Codec: cid.DagCBOR, MhType: multihash.SHA2_256} + + it := n.ListIterator() + for !it.Done() { + _, val, err := it.Next() + if err != nil { + return nil, err + } + bytes, err := val.AsBytes() + if err != nil { + return nil, err + } + c, err := cidBuilder.Sum(bytes) + if err != nil { + return nil, err + } + ctn.AddBytes(c, bytes) + } + return ctn, nil +} + +func (ctn Container) ToCbor2(w io.Writer) error { + node, err := qp.BuildList(basicnode.Prototype.Any, int64(len(ctn)), func(la datamodel.ListAssembler) { + for _, bytes := range ctn { + qp.ListEntry(la, qp.Bytes(bytes)) + } + }) + if err != nil { + return err + } + return ipld.EncodeStreaming(w, node, dagcbor.Encode) +} + +func (ctn Container) ToCbor(w io.Writer) error { + raw := make([][]byte, 0, len(ctn)) + for _, bytes := range ctn { + raw = append(raw, bytes) + } + return cbor.EncodeWriter(raw, w) +} + +func FromCborBase64(r io.Reader) (Container, error) { + return FromCbor(base64.NewDecoder(base64.StdEncoding, r)) +} + +func (ctn Container) ToCborBase64(w io.Writer) error { + w2 := base64.NewEncoder(base64.StdEncoding, w) + defer w2.Close() + return ctn.ToCbor(w2) +} + +func FromCborGzip(r io.Reader) (Container, error) { + r2, err := gzip.NewReader(r) + if err != nil { + return nil, err + } + defer r2.Close() + return FromCbor(r2) +} + +func (ctn Container) ToCborGzip(w io.Writer) error { + w2 := gzip.NewWriter(w) + defer w2.Close() + return ctn.ToCbor(w2) +} + +func FromCborGzipBase64(r io.Reader) (Container, error) { + return FromCborGzip(base64.NewDecoder(base64.StdEncoding, r)) +} + +func (ctn Container) ToCborGzipBase64(w io.Writer) error { + w2 := base64.NewEncoder(base64.StdEncoding, w) + defer w2.Close() + return ctn.ToCborGzip(w2) +} + +func FromCborFlate(r io.Reader) (Container, error) { + r2 := flate.NewReader(r) + defer r2.Close() + return FromCbor(r2) +} + +func (ctn Container) ToCborFlate(w io.Writer) error { + w2, err := flate.NewWriter(w, flate.DefaultCompression) + if err != nil { + return err + } + defer w2.Close() + return ctn.ToCbor(w2) +} + +func FromCborFlateBase64(r io.Reader) (Container, error) { + return FromCborFlate(base64.NewDecoder(base64.StdEncoding, r)) +} + +func (ctn Container) ToCborFlateBase64(w io.Writer) error { + w2 := base64.NewEncoder(base64.StdEncoding, w) + defer w2.Close() + return ctn.ToCborFlate(w2) +} diff --git a/pkg/container/serialization_test.go b/pkg/container/serialization_test.go new file mode 100644 index 0000000..14ac11d --- /dev/null +++ b/pkg/container/serialization_test.go @@ -0,0 +1,175 @@ +package container + +import ( + "bytes" + "crypto/rand" + "fmt" + "io" + "testing" + "time" + + "github.com/ipfs/go-cid" + "github.com/libp2p/go-libp2p/core/crypto" + mh "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/require" + + "github.com/ucan-wg/go-ucan/did" + "github.com/ucan-wg/go-ucan/pkg/command" + "github.com/ucan-wg/go-ucan/pkg/policy" + "github.com/ucan-wg/go-ucan/pkg/policy/literal" + "github.com/ucan-wg/go-ucan/pkg/policy/selector" + "github.com/ucan-wg/go-ucan/token/delegation" +) + +func TestContainerRoundTrip(t *testing.T) { + for _, tc := range []struct { + name string + writer func(ctn Container, w io.Writer) error + reader func(io.Reader) (Container, error) + }{ + {"car", Container.ToCar, FromCar}, + {"carBase64", Container.ToCarBase64, FromCarBase64}, + {"carGzip", Container.ToCarGzip, FromCarGzip}, + {"carGzipBase64", Container.ToCarGzipBase64, FromCarGzipBase64}, + {"cbor", Container.ToCbor, FromCbor}, + {"cborBase64", Container.ToCborBase64, FromCborBase64}, + {"cborGzip", Container.ToCborGzip, FromCborGzip}, + {"cborGzipBase64", Container.ToCborGzipBase64, FromCborGzipBase64}, + {"cborFlate", Container.ToCborFlate, FromCborFlate}, + {"cborFlateBase64", Container.ToCborFlateBase64, FromCborFlateBase64}, + {"cbor2", Container.ToCbor2, FromCbor2}, + } { + t.Run(tc.name, func(t *testing.T) { + ctn := New() + + builder := cid.V1Builder{Codec: cid.DagCBOR, MhType: mh.SHA2_256} + + var dataSize int + + for i := 0; i < 10; i++ { + data := randTokenBytes() + c, err := builder.Sum(data) + require.NoError(t, err) + ctn.AddBytes(c, data) + dataSize += len(data) + } + + buf := bytes.NewBuffer(nil) + + err := tc.writer(ctn, buf) + require.NoError(t, err) + + t.Logf("data size %d", dataSize) + t.Logf("container overhead: %d%%, %d bytes", int(float32(buf.Len()-dataSize)/float32(dataSize)*100.0), buf.Len()-dataSize) + + ctn2, err := tc.reader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + + require.Equal(t, ctn, ctn2) + }) + } +} + +func BenchmarkContainerSerialisation(b *testing.B) { + for _, tc := range []struct { + name string + writer func(ctn Container, w io.Writer) error + reader func(io.Reader) (Container, error) + }{ + {"car", Container.ToCar, FromCar}, + {"carBase64", Container.ToCarBase64, FromCarBase64}, + {"carGzip", Container.ToCarGzip, FromCarGzip}, + {"carGzipBase64", Container.ToCarGzipBase64, FromCarGzipBase64}, + {"cbor", Container.ToCbor, FromCbor}, + {"cborBase64", Container.ToCborBase64, FromCborBase64}, + {"cborGzip", Container.ToCborGzip, FromCborGzip}, + {"cborGzipBase64", Container.ToCborGzipBase64, FromCborGzipBase64}, + {"cborFlate", Container.ToCborFlate, FromCborFlate}, + {"cborFlateBase64", Container.ToCborFlateBase64, FromCborFlateBase64}, + {"cbor2", Container.ToCbor2, FromCbor2}, + } { + ctn := New() + + builder := cid.V1Builder{Codec: cid.DagCBOR, MhType: mh.SHA2_256} + + var dataSize int + + for i := 0; i < 10; i++ { + data := randTokenBytes() + c, err := builder.Sum(data) + require.NoError(b, err) + ctn.AddBytes(c, data) + dataSize += len(data) + } + + buf := bytes.NewBuffer(nil) + _ = tc.writer(ctn, buf) + + b.Run(tc.name+"_write", func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + buf := bytes.NewBuffer(nil) + _ = tc.writer(ctn, buf) + } + }) + + b.Run(tc.name+"_read", func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _, _ = tc.reader(bytes.NewReader(buf.Bytes())) + } + }) + } +} + +func randBytes(n int) []byte { + b := make([]byte, n) + _, _ = rand.Read(b) + return b +} + +func randDID() (crypto.PrivKey, did.DID) { + privKey, _, err := crypto.GenerateEd25519Key(rand.Reader) + if err != nil { + panic(err) + } + d, err := did.FromPrivKey(privKey) + if err != nil { + panic(err) + } + return privKey, d +} + +func randomString(length int) string { + b := make([]byte, length/2+1) + rand.Read(b) + return fmt.Sprintf("%x", b)[0:length] +} + +func randTokenBytes() []byte { + priv, iss := randDID() + _, aud := randDID() + cmd := command.New("foo", "bar") + pol := policy.Policy{policy.All( + selector.MustParse(".[]"), + policy.GreaterThan(selector.MustParse(".value"), literal.Int(2)), + )} + + opts := []delegation.Option{ + delegation.WithExpiration(time.Now().Add(time.Hour)), + delegation.WithSubject(iss), + } + for i := 0; i < 3; i++ { + opts = append(opts, delegation.WithMeta(randomString(8), randomString(10))) + } + + t, err := delegation.New(priv, aud, cmd, pol, opts...) + if err != nil { + panic(err) + } + b, _, err := t.ToSealed(priv) + if err != nil { + panic(err) + } + return b +}