From a5ec3e5519095d7b520eeb20ae878021b27323ae Mon Sep 17 00:00:00 2001 From: Esteban Date: Thu, 21 Mar 2024 22:44:08 +0000 Subject: [PATCH] Userspace Convertor: Add Deduplication Version Support Introduce a simple versioning system for the userspace convertor. This prevents inconsistencies with an existing DB if a new version is used Signed-off-by: Esteban --- .../builder/overlaybd_builder_test.go | 41 +++++- cmd/convertor/database/database.go | 2 + cmd/convertor/database/mysql.go | 33 +++-- cmd/convertor/main.go | 11 +- .../mysql-db-deduplication-sample-workload.sh | 128 ++++++++++++++++++ ...mysql-db-manifest-cache-sample-workload.sh | 21 --- cmd/convertor/resources/samples/mysql.conf | 6 +- cmd/convertor/testingresources/local_db.go | 40 +++--- docs/USERSPACE_CONVERTOR.md | 64 +++++---- pkg/version/version.go | 34 +++++ 10 files changed, 291 insertions(+), 89 deletions(-) create mode 100755 cmd/convertor/resources/samples/mysql-db-deduplication-sample-workload.sh delete mode 100755 cmd/convertor/resources/samples/mysql-db-manifest-cache-sample-workload.sh diff --git a/cmd/convertor/builder/overlaybd_builder_test.go b/cmd/convertor/builder/overlaybd_builder_test.go index 4259e7d4..63b21823 100644 --- a/cmd/convertor/builder/overlaybd_builder_test.go +++ b/cmd/convertor/builder/overlaybd_builder_test.go @@ -22,6 +22,7 @@ import ( "testing" testingresources "github.com/containerd/accelerated-container-image/cmd/convertor/testingresources" + "github.com/containerd/accelerated-container-image/pkg/version" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/images" _ "github.com/containerd/containerd/pkg/testutil" // Handle custom root flag @@ -31,7 +32,10 @@ import ( func Test_overlaybd_builder_CheckForConvertedLayer(t *testing.T) { ctx := context.Background() - db := testingresources.NewLocalDB() + versionDB := testingresources.Localdb{ + Version: version.GetUserSpaceConsistencyVersion(), + } + db := &versionDB // Reset DB resolver := testingresources.GetTestResolver(t, ctx) fetcher := testingresources.GetTestFetcherFromResolver(t, ctx, resolver, testingresources.DockerV2_Manifest_Simple_Ref) base := &builderEngineBase{ @@ -83,6 +87,13 @@ func Test_overlaybd_builder_CheckForConvertedLayer(t *testing.T) { testingresources.Assert(t, desc.Digest == targetDesc.Digest, "CheckForConvertedLayer() returned incorrect digest") }) + versionDB.Version.LayerVersion = "A" // Change version to something else + t.Run("Entry in DB but wrong version (should return not found)", func(t *testing.T) { + _, err := e.CheckForConvertedLayer(ctx, 0) + testingresources.Assert(t, errdefs.IsNotFound(err), fmt.Sprintf("CheckForConvertedLayer() returned an unexpected Error: %v", err)) + }) + versionDB.Version = version.GetUserSpaceConsistencyVersion() // Reset version + // cross repo mount (change target repo) base.repository = "hello-world2" newImageRef := "sample.localstore.io/hello-world2:amd64" @@ -107,7 +118,13 @@ func Test_overlaybd_builder_CheckForConvertedLayer(t *testing.T) { rc.Close() }) - base.db = testingresources.NewLocalDB() // Reset DB + versionDB.Version.LayerVersion = "A" // Change version to something else + t.Run("Cross Repo Entry in DB but wrong version (should return not found)", func(t *testing.T) { + _, err := e.CheckForConvertedLayer(ctx, 0) + testingresources.Assert(t, errdefs.IsNotFound(err), fmt.Sprintf("CheckForConvertedLayer() returned an unexpected Error: %v", err)) + }) + + base.db = testingresources.NewLocalDB(version.GetUserSpaceConsistencyVersion()) // Reset DB digestNotInRegistry := digest.FromString("Not in reg") err = base.db.CreateLayerEntry(ctx, e.host, e.repository, digestNotInRegistry, fakeChainId, 10) if err != nil { @@ -124,7 +141,10 @@ func Test_overlaybd_builder_CheckForConvertedLayer(t *testing.T) { func Test_overlaybd_builder_CheckForConvertedManifest(t *testing.T) { ctx := context.Background() - db := testingresources.NewLocalDB() + versionDB := testingresources.Localdb{ + Version: version.GetUserSpaceConsistencyVersion(), + } + db := &versionDB resolver := testingresources.GetTestResolver(t, ctx) fetcher := testingresources.GetTestFetcherFromResolver(t, ctx, resolver, testingresources.DockerV2_Manifest_Simple_Ref) @@ -178,6 +198,13 @@ func Test_overlaybd_builder_CheckForConvertedManifest(t *testing.T) { testingresources.Assert(t, desc.Digest == outputDesc.Digest, "CheckForConvertedManifest() returned incorrect digest") }) + versionDB.Version.ManifestVersion = "A" // Change version to something else + t.Run("Entry in DB but wrong version (should return not found)", func(t *testing.T) { + _, err := e.CheckForConvertedManifest(ctx) + testingresources.Assert(t, errdefs.IsNotFound(err), fmt.Sprintf("CheckForConvertedManifest() returned an unexpected Error: %v", err)) + }) + versionDB.Version = version.GetUserSpaceConsistencyVersion() // Reset version + // cross repo mount (change target repo) base.repository = "hello-world2" newImageRef := "sample.localstore.io/hello-world2:amd64" @@ -207,7 +234,13 @@ func Test_overlaybd_builder_CheckForConvertedManifest(t *testing.T) { rc.Close() }) - base.db = testingresources.NewLocalDB() // Reset DB + versionDB.Version.ManifestVersion = "A" // Change version to something else + t.Run("Cross Repo Entry in DB but wrong version (should return not found)", func(t *testing.T) { + _, err := e.CheckForConvertedManifest(ctx) + testingresources.Assert(t, errdefs.IsNotFound(err), fmt.Sprintf("CheckForConvertedManifest() returned an unexpected Error: %v", err)) + }) + + base.db = testingresources.NewLocalDB(version.GetUserSpaceConsistencyVersion()) digestNotInRegistry := digest.FromString("Not in reg") err = base.db.CreateManifestEntry(ctx, e.host, e.repository, outputDesc.MediaType, inputDesc.Digest, digestNotInRegistry, outputDesc.Size) if err != nil { diff --git a/cmd/convertor/database/database.go b/cmd/convertor/database/database.go index 045390e4..060cae9a 100644 --- a/cmd/convertor/database/database.go +++ b/cmd/convertor/database/database.go @@ -42,6 +42,7 @@ type LayerEntry struct { Repository string ChainID string Host string + Version string } type ManifestEntry struct { @@ -51,4 +52,5 @@ type ManifestEntry struct { Repository string Host string MediaType string + Version string } diff --git a/cmd/convertor/database/mysql.go b/cmd/convertor/database/mysql.go index 30663371..6fe95d47 100644 --- a/cmd/convertor/database/mysql.go +++ b/cmd/convertor/database/mysql.go @@ -21,36 +21,39 @@ import ( "database/sql" "fmt" + "github.com/containerd/accelerated-container-image/pkg/version" "github.com/containerd/containerd/log" "github.com/opencontainers/go-digest" ) type sqldb struct { - db *sql.DB + db *sql.DB + version version.UserspaceVersion } -func NewSqlDB(db *sql.DB) ConversionDatabase { +func NewSqlDB(db *sql.DB, version version.UserspaceVersion) ConversionDatabase { return &sqldb{ - db: db, + db: db, + version: version, } } func (m *sqldb) CreateLayerEntry(ctx context.Context, host, repository string, convertedDigest digest.Digest, chainID string, size int64) error { - _, err := m.db.ExecContext(ctx, "insert into overlaybd_layers(host, repo, chain_id, data_digest, data_size) values(?, ?, ?, ?, ?)", host, repository, chainID, convertedDigest, size) + _, err := m.db.ExecContext(ctx, "insert into overlaybd_layers(host, repo, chain_id, data_digest, data_size, version) values(?, ?, ?, ?, ?, ?)", host, repository, chainID, convertedDigest, size, m.version.LayerVersion) return err } func (m *sqldb) GetLayerEntryForRepo(ctx context.Context, host, repository, chainID string) *LayerEntry { var entry LayerEntry - row := m.db.QueryRowContext(ctx, "select host, repo, chain_id, data_digest, data_size from overlaybd_layers where host=? and repo=? and chain_id=?", host, repository, chainID) - if err := row.Scan(&entry.Host, &entry.Repository, &entry.ChainID, &entry.ConvertedDigest, &entry.DataSize); err != nil { + row := m.db.QueryRowContext(ctx, "select host, repo, chain_id, data_digest, data_size, version from overlaybd_layers where host=? and repo=? and chain_id=? and version=?", host, repository, chainID, m.version.LayerVersion) + if err := row.Scan(&entry.Host, &entry.Repository, &entry.ChainID, &entry.ConvertedDigest, &entry.DataSize, &entry.Version); err != nil { return nil } return &entry } func (m *sqldb) GetCrossRepoLayerEntries(ctx context.Context, host, chainID string) []*LayerEntry { - rows, err := m.db.QueryContext(ctx, "select host, repo, chain_id, data_digest, data_size from overlaybd_layers where host=? and chain_id=?", host, chainID) + rows, err := m.db.QueryContext(ctx, "select host, repo, chain_id, data_digest, data_size, version from overlaybd_layers where host=? and chain_id=? and version=?", host, chainID, m.version.LayerVersion) if err != nil { if err == sql.ErrNoRows { return nil @@ -61,7 +64,7 @@ func (m *sqldb) GetCrossRepoLayerEntries(ctx context.Context, host, chainID stri var entries []*LayerEntry for rows.Next() { var entry LayerEntry - err = rows.Scan(&entry.Host, &entry.Repository, &entry.ChainID, &entry.ConvertedDigest, &entry.DataSize) + err = rows.Scan(&entry.Host, &entry.Repository, &entry.ChainID, &entry.ConvertedDigest, &entry.DataSize, &entry.Version) if err != nil { continue } @@ -72,7 +75,7 @@ func (m *sqldb) GetCrossRepoLayerEntries(ctx context.Context, host, chainID stri } func (m *sqldb) DeleteLayerEntry(ctx context.Context, host, repository string, chainID string) error { - _, err := m.db.Exec("delete from overlaybd_layers where host=? and repo=? and chain_id=?", host, repository, chainID) + _, err := m.db.Exec("delete from overlaybd_layers where host=? and repo=? and chain_id=? and version=?", host, repository, chainID, m.version.LayerVersion) if err != nil { return fmt.Errorf("failed to remove invalid record in db: %w", err) } @@ -80,21 +83,21 @@ func (m *sqldb) DeleteLayerEntry(ctx context.Context, host, repository string, c } func (m *sqldb) CreateManifestEntry(ctx context.Context, host, repository, mediaType string, original, convertedDigest digest.Digest, size int64) error { - _, err := m.db.ExecContext(ctx, "insert into overlaybd_manifests(host, repo, src_digest, out_digest, data_size, mediatype) values(?, ?, ?, ?, ?, ?)", host, repository, original, convertedDigest, size, mediaType) + _, err := m.db.ExecContext(ctx, "insert into overlaybd_manifests(host, repo, src_digest, out_digest, data_size, mediatype, version) values(?, ?, ?, ?, ?, ?, ?)", host, repository, original, convertedDigest, size, mediaType, m.version.ManifestVersion) return err } func (m *sqldb) GetManifestEntryForRepo(ctx context.Context, host, repository, mediaType string, original digest.Digest) *ManifestEntry { var entry ManifestEntry - row := m.db.QueryRowContext(ctx, "select host, repo, src_digest, out_digest, data_size, mediatype from overlaybd_manifests where host=? and repo=? and src_digest=? and mediatype=?", host, repository, original, mediaType) - if err := row.Scan(&entry.Host, &entry.Repository, &entry.OriginalDigest, &entry.ConvertedDigest, &entry.DataSize, &entry.MediaType); err != nil { + row := m.db.QueryRowContext(ctx, "select host, repo, src_digest, out_digest, data_size, mediatype, version from overlaybd_manifests where host=? and repo=? and src_digest=? and mediatype=? and version=?", host, repository, original, mediaType, m.version.ManifestVersion) + if err := row.Scan(&entry.Host, &entry.Repository, &entry.OriginalDigest, &entry.ConvertedDigest, &entry.DataSize, &entry.MediaType, &entry.Version); err != nil { return nil } return &entry } func (m *sqldb) GetCrossRepoManifestEntries(ctx context.Context, host, mediaType string, original digest.Digest) []*ManifestEntry { - rows, err := m.db.QueryContext(ctx, "select host, repo, src_digest, out_digest, data_size, mediatype from overlaybd_manifests where host=? and src_digest=? and mediatype=?", host, original, mediaType) + rows, err := m.db.QueryContext(ctx, "select host, repo, src_digest, out_digest, data_size, mediatype, version from overlaybd_manifests where host=? and src_digest=? and mediatype=? and version=?", host, original, mediaType, m.version.ManifestVersion) if err != nil { if err == sql.ErrNoRows { return nil @@ -105,7 +108,7 @@ func (m *sqldb) GetCrossRepoManifestEntries(ctx context.Context, host, mediaType var entries []*ManifestEntry for rows.Next() { var entry ManifestEntry - err = rows.Scan(&entry.Host, &entry.Repository, &entry.OriginalDigest, &entry.ConvertedDigest, &entry.DataSize, &entry.MediaType) + err = rows.Scan(&entry.Host, &entry.Repository, &entry.OriginalDigest, &entry.ConvertedDigest, &entry.DataSize, &entry.MediaType, &entry.Version) if err != nil { continue } @@ -116,7 +119,7 @@ func (m *sqldb) GetCrossRepoManifestEntries(ctx context.Context, host, mediaType } func (m *sqldb) DeleteManifestEntry(ctx context.Context, host, repository, mediaType string, original digest.Digest) error { - _, err := m.db.Exec("delete from overlaybd_manifests where host=? and repo=? and src_digest=? and mediatype=?", host, repository, original, mediaType) + _, err := m.db.Exec("delete from overlaybd_manifests where host=? and repo=? and src_digest=? and mediatype=? and version=?", host, repository, original, mediaType, m.version.ManifestVersion) if err != nil { return fmt.Errorf("failed to remove invalid record in db: %w", err) } diff --git a/cmd/convertor/main.go b/cmd/convertor/main.go index 374412a6..cc461163 100644 --- a/cmd/convertor/main.go +++ b/cmd/convertor/main.go @@ -24,6 +24,7 @@ import ( "github.com/containerd/accelerated-container-image/cmd/convertor/builder" "github.com/containerd/accelerated-container-image/cmd/convertor/database" + "github.com/containerd/accelerated-container-image/pkg/version" _ "github.com/go-sql-driver/mysql" "github.com/sirupsen/logrus" @@ -46,6 +47,8 @@ var ( overlaybd string dbstr string dbType string + dbLayerVersion string + dbMnfstVersion string concurrencyLimit int // certification @@ -116,7 +119,11 @@ var ( logrus.Errorf("failed to open the provided mysql db: %v", err) os.Exit(1) } - opt.DB = database.NewSqlDB(db) + userspaceVersion := version.UserspaceVersion{ + LayerVersion: dbLayerVersion, + ManifestVersion: dbMnfstVersion, + } + opt.DB = database.NewSqlDB(db, userspaceVersion) case "": default: logrus.Warnf("db-type %s was provided but is not one of known db types. Available: mysql", dbType) @@ -172,6 +179,8 @@ func init() { rootCmd.Flags().BoolVar(&reserve, "reserve", false, "reserve tmp data") rootCmd.Flags().BoolVar(&noUpload, "no-upload", false, "don't upload layer and manifest") rootCmd.Flags().BoolVar(&dumpManifest, "dump-manifest", false, "dump manifest") + rootCmd.Flags().StringVar(&dbLayerVersion, "db-layer-version", version.GetUserSpaceConsistencyVersion().LayerVersion, "version of db to use for conversion deduplication. Default 1") + rootCmd.Flags().StringVar(&dbMnfstVersion, "db-manifest-version", version.GetUserSpaceConsistencyVersion().ManifestVersion, "version of db to use for conversion deduplication. Default 1-1") rootCmd.MarkFlagRequired("repository") rootCmd.MarkFlagRequired("input-tag") diff --git a/cmd/convertor/resources/samples/mysql-db-deduplication-sample-workload.sh b/cmd/convertor/resources/samples/mysql-db-deduplication-sample-workload.sh new file mode 100755 index 00000000..dc4e70dc --- /dev/null +++ b/cmd/convertor/resources/samples/mysql-db-deduplication-sample-workload.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# This script is a sample workload for the convertor that demonstrates the use of the manifest / layer caching for +# deduplication. +# Validation -> All obd images should be the same with the exception of the layer cache test images + +# This sample results in the following registry layout: +# $registry +# ├── $repository +# │ ├── $inputtag // Original image +# │ ├── $inputtag-extra-layer // Original image with one added random extra layer +# │ ├── $inputtag-obd // Regular Converted image +# │ ├── $inputtag-obd-manifest-cache // Converted image with cached manifest +# │ └── $inputtag-obd-extra-layer-cache // Converted image using the cached layers +# └── $repository-2 +# ├── $inputtag // Original image +# ├── $inputtag-obd-manifest-cross-repo-cache // Converted image with cached manifest + cross repo mount +# ├── $inputtag-extra-layer-2 // Original image with one added random extra layer +# └── $inputtag-obd-layer-cache-cross-repo-mount // Converted image using the cached layers + cross repo mount + +# Validation Examples +registry=$1 # registry to push to +username=$2 # username for registry +password=$3 # password for registry +sourceImage=$4 # public image to convert +repository=$5 # repository to push to +inputtag=$6 # input tag +mysqldbuser=$7 # mysql user +mysqldbpassword=$8 # mysql password + +oras login $registry -u $username -p $password +oras cp $sourceImage $registry/$repository:$inputtag + +# Manifest Cache Workloads +# Try one conversion +echo "" && echo "___REGULAR__CONVERSION___" +./bin/convertor --repository $registry/$repository -u $username:$password --input-tag $inputtag --oci --overlaybd $inputtag-obd --db-str "$mysqldbuser:$mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql + +echo "" && echo "___CACHED_MANIFEST_CONVERSION___" +# Retry, result manifest should be cached the tag doesnt seem to be getting recreated. Might need a push nonetheless to guarantee the tag? +./bin/convertor --repository $registry/$repository -u $username:$password --input-tag $inputtag --oci --overlaybd $inputtag-obd-manifest-cache --db-str "$mysqldbuser:$mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql + +echo "" && echo "___CACHED_MANIFEST_CONVERSION_CROSS_REPO___" +# Retry, cross repo mount +oras cp $sourceImage $registry/$repository-2:$inputtag +./bin/convertor --repository $registry/$repository-2 -u $username:$password --input-tag $inputtag --oci --overlaybd $inputtag-obd-manifest-cross-repo-cache --db-str "$mysqldbuser:$mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql + +# Layer Cache Workloads +# Cache from the same repo +echo "" && echo "___CACHED_LAYER_CONVERSION___" +dt=$(date +%s) +echo "FROM $sourceImage" > sample.Dockerfile +data=$(echo "RUN echo \"Random value: $dt-$RANDOM\" > random-file.txt") +echo $data >> sample.Dockerfile +docker build -f sample.Dockerfile -t "$registry/$repository:$inputtag-extra-layer" . +docker push "$registry/$repository:$inputtag-extra-layer" +./bin/convertor --repository $registry/$repository -u $username:$password --input-tag $inputtag-extra-layer --oci --overlaybd $inputtag-obd-extra-layer-cache --db-str "$mysqldbuser:$mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql +rm sample.Dockerfile + +# Retry with cross repo cache +echo "" && echo "___CACHED_LAYER_CONVERSION_CROSS_REPO___" +echo "FROM $sourceImage" > sample.Dockerfile +data=$(echo "RUN echo \"Random value: $dt-$RANDOM\" > random-file.txt") +echo "$data" >> sample.Dockerfile +docker build -f sample.Dockerfile -t "$registry/$repository-2:$inputtag-extra-layer-2" . +docker push "$registry/$repository-2:$inputtag-extra-layer-2" +./bin/convertor --repository $registry/$repository-2 -u $username:$password --input-tag $inputtag-extra-layer-2 --oci --overlaybd $inputtag-obd-layer-cache-cross-repo-mount --db-str "$mysqldbuser:$mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql +rm sample.Dockerfile + +# Converted descriptors for the original image should match +echo "" && echo "___VALIDATE_CACHED_MANIFEST_CONVERSIONS___" +desc_obd=$(oras manifest fetch $registry/$repository:$inputtag-obd --descriptor | jq -r '.digest') +# desc_obd_manifest_cache=$(oras manifest fetch $registry/$repository:$inputtag-obd-manifest-cache --descriptor) +desc_obd_manifest_cross_repo_cache=$(oras manifest fetch $registry/$repository-2:$inputtag-obd-manifest-cross-repo-cache --descriptor | jq -r '.digest') + +if [[ $desc_obd == $desc_obd_manifest_cross_repo_cache ]]; then + echo "All three images have matching descriptors:" + echo "$inputtag-obd: $desc_obd" + echo "$inputtag-obd-manifest-cache: $desc_obd_manifest_cache" + echo "$inputtag-obd-manifest-cross-repo-cache: $desc_obd_manifest_cross_repo_cache" +else + echo "Digests do not match for manifest cached images" + echo "$inputtag-obd: $desc_obd" + echo "$inputtag-obd-manifest-cache: $desc_obd_manifest_cache" + echo "$inputtag-obd-manifest-cross-repo-cache: $desc_obd_manifest_cross_repo_cache" +fi +echo "SUCCESS" + +echo "" && echo "___VALIDATE_CACHED_LAYER_CONVERSIONS___" +# Converted descriptors for the extra layer images wont match, but all their layers minus the last one should match +mnfst_obd=$(oras manifest fetch $registry/$repository:$inputtag-obd) +mnfst_obd_extra_layer_cache=$(oras manifest fetch $registry/$repository:$inputtag-obd-extra-layer-cache ) +mnfst_obd_extra_layer_cache_cross_repo=$(oras manifest fetch $registry/$repository-2:$inputtag-obd-layer-cache-cross-repo-mount) + +# Extract layers +layers_obd=$(echo "$mnfst_obd" | jq -r '.layers') +layers_obd_extra_layer_cache=$(echo "$mnfst_obd_extra_layer_cache" | jq -r '.layers') +layers_obd_extra_layer_cache_cross_repo=$(echo "$mnfst_obd_extra_layer_cache_cross_repo" | jq -r '.layers') + +# Check that all layers except the last one match, manifest digests should also be different +index=0 +for layer_obd_original in $(echo "$layers_obd" | jq -r '.[].digest'); do + layer_obd_extra_layer_cache=$(echo "$layers_obd_extra_layer_cache" | jq -r ".[$index].digest") + layer_obd_extra_layer_cache_cross_repo=$(echo "$layers_obd_extra_layer_cache_cross_repo" | jq -r ".[$index].digest") + + if [[ "$layer_obd_original" != "$layer_obd_extra_layer_cache" && "$layer_obd_original" != "$layer_obd_extra_layer_cache_cross_repo" ]]; then + echo "Layers differ at index $index." + echo "Original: $layer_obd_original" + echo "Extra Layer Cache: $layer_obd_extra_layer_cache" + echo "Extra Layer Cache Cross Repo: $layer_obd_extra_layer_cache_cross_repo" + exit 1 + fi + index=$(($index+1)) +done +echo "All layers except the last one match for the extra layer cache images" + +desc_obd_extra_layer_cache=$(oras manifest fetch $registry/$repository:$inputtag-obd-extra-layer-cache --descriptor | jq -r '.digest') +desc_obd_extra_layer_cache_cross_repo=$(oras manifest fetch $registry/$repository-2:$inputtag-obd-layer-cache-cross-repo-mount --descriptor | jq -r '.digest') + +if [[ "$desc_obd" == "$desc_obd_extra_layer_cache" || "$desc_obd" == "$desc_obd_extra_layer_cache_cross_repo" || "$desc_obd_extra_layer_cache" == "$desc_obd_extra_layer_cache_cross_repo" ]]; then + echo "Extra layer images are somehow identical to the original obd image or to each other" + echo "Original $desc_obd" + echo "Extra Layer Cache: $desc_obd_extra_layer_cache" + echo "Extra Layer Cache Cross Repo: $desc_obd_extra_layer_cache_cross_repo" + exit 1 +fi + +echo "Extra layer cache images properly have different manifest digests" +echo "SUCCESS" \ No newline at end of file diff --git a/cmd/convertor/resources/samples/mysql-db-manifest-cache-sample-workload.sh b/cmd/convertor/resources/samples/mysql-db-manifest-cache-sample-workload.sh deleted file mode 100755 index 50318507..00000000 --- a/cmd/convertor/resources/samples/mysql-db-manifest-cache-sample-workload.sh +++ /dev/null @@ -1,21 +0,0 @@ -# Validation Examples -registry=$1 # registry to push to -username=$2 # username for registry -password=$3 # password for registry -sourceImage=$4 # public image to convert -repository=$5 # repository to push to -tag=$6 # tag to push to -mysqldbuser=$7 # mysql user -mysqldbpassword=$8 # mysql password - -oras login $registry -u $username -p $password -oras cp $sourceImage $registry/$repository:$tag -# Try one conversion -./bin/convertor --repository $registry/$repository -u $username:$password --input-tag $tag --oci --overlaybd $tag-obd-cache --db-str "$mysqldbuser:mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql - -# Retry, result manifest should be cached -./bin/convertor --repository $registry/$repository -u $username:$password --input-tag $tag --oci --overlaybd $tag-obd-cache-2 --db-str "$mysqldbuser:mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql - -# Retry, cross repo mount -oras cp $sourceImage $registry/$repository-2:$tag -./bin/convertor --repository $registry/$repository -u $username:$password --input-tag $tag --oci --overlaybd $tag-obd-cache-2 --db-str "$mysqldbuser:mysqldbpassword@tcp(127.0.0.1:3306)/conversioncache" --db-type mysql diff --git a/cmd/convertor/resources/samples/mysql.conf b/cmd/convertor/resources/samples/mysql.conf index 63a86648..701b2854 100644 --- a/cmd/convertor/resources/samples/mysql.conf +++ b/cmd/convertor/resources/samples/mysql.conf @@ -1,16 +1,18 @@ CREATE database conversioncache; USE conversioncache; CREATE TABLE `overlaybd_layers` ( + `version` varchar(6) NOT NULL, `host` varchar(255) NOT NULL, `repo` varchar(255) NOT NULL, `chain_id` varchar(255) NOT NULL COMMENT 'chain-id of the normal image layer', `data_digest` varchar(255) NOT NULL COMMENT 'digest of overlaybd layer', `data_size` bigint(20) NOT NULL COMMENT 'size of overlaybd layer', PRIMARY KEY (`host`,`repo`,`chain_id`), - KEY `index_registry_chainId` (`host`,`chain_id`) USING BTREE + KEY `index_registry_chainId` (`version`,`host`,`chain_id`) USING BTREE ) DEFAULT CHARSET=utf8; CREATE TABLE `overlaybd_manifests` ( + `version` varchar(6) NOT NULL, `host` varchar(255) NOT NULL, `repo` varchar(255) NOT NULL, `src_digest` varchar(255) NOT NULL COMMENT 'digest of the normal image manifest', @@ -18,5 +20,5 @@ CREATE TABLE `overlaybd_manifests` ( `data_size` bigint(20) NOT NULL COMMENT 'size of overlaybd manifest', `mediatype` varchar(255) NOT NULL COMMENT 'mediatype of the converted image manifest', PRIMARY KEY (`host`,`repo`,`src_digest`, `mediatype`), - KEY `index_registry_src_digest` (`host`,`src_digest`, `mediatype`) USING BTREE + KEY `index_registry_src_digest` (`version`,`host`,`src_digest`, `mediatype`) USING BTREE ) DEFAULT CHARSET=utf8; \ No newline at end of file diff --git a/cmd/convertor/testingresources/local_db.go b/cmd/convertor/testingresources/local_db.go index b442852a..2143225f 100644 --- a/cmd/convertor/testingresources/local_db.go +++ b/cmd/convertor/testingresources/local_db.go @@ -21,22 +21,26 @@ import ( "sync" "github.com/containerd/accelerated-container-image/cmd/convertor/database" + "github.com/containerd/accelerated-container-image/pkg/version" "github.com/opencontainers/go-digest" ) -type localdb struct { +type Localdb struct { layerRecords []*database.LayerEntry manifestRecords []*database.ManifestEntry layerLock sync.Mutex // Protects layerRecords manifestLock sync.Mutex // Protects manifestRecords + Version version.UserspaceVersion } // NewLocalDB returns a new local database for testing. This is a simple unoptimized in-memory database. -func NewLocalDB() database.ConversionDatabase { - return &localdb{} +func NewLocalDB(ver version.UserspaceVersion) database.ConversionDatabase { + return &Localdb{ + Version: ver, + } } -func (l *localdb) CreateLayerEntry(ctx context.Context, host string, repository string, convertedDigest digest.Digest, chainID string, size int64) error { +func (l *Localdb) CreateLayerEntry(ctx context.Context, host string, repository string, convertedDigest digest.Digest, chainID string, size int64) error { l.layerLock.Lock() defer l.layerLock.Unlock() l.layerRecords = append(l.layerRecords, &database.LayerEntry{ @@ -45,39 +49,40 @@ func (l *localdb) CreateLayerEntry(ctx context.Context, host string, repository ChainID: chainID, ConvertedDigest: convertedDigest, DataSize: size, + Version: l.Version.LayerVersion, }) return nil } -func (l *localdb) GetLayerEntryForRepo(ctx context.Context, host string, repository string, chainID string) *database.LayerEntry { +func (l *Localdb) GetLayerEntryForRepo(ctx context.Context, host string, repository string, chainID string) *database.LayerEntry { l.layerLock.Lock() defer l.layerLock.Unlock() for _, entry := range l.layerRecords { - if entry.Host == host && entry.ChainID == chainID && entry.Repository == repository { + if entry.Host == host && entry.ChainID == chainID && entry.Repository == repository && entry.Version == l.Version.LayerVersion { return entry } } return nil } -func (l *localdb) GetCrossRepoLayerEntries(ctx context.Context, host, chainID string) []*database.LayerEntry { +func (l *Localdb) GetCrossRepoLayerEntries(ctx context.Context, host, chainID string) []*database.LayerEntry { l.layerLock.Lock() defer l.layerLock.Unlock() var entries []*database.LayerEntry for _, entry := range l.layerRecords { - if entry.Host == host && entry.ChainID == chainID { + if entry.Host == host && entry.ChainID == chainID && entry.Version == l.Version.LayerVersion { entries = append(entries, entry) } } return entries } -func (l *localdb) DeleteLayerEntry(ctx context.Context, host, repository, chainID string) error { +func (l *Localdb) DeleteLayerEntry(ctx context.Context, host, repository, chainID string) error { l.layerLock.Lock() defer l.layerLock.Unlock() // host - repo - chainID should be unique for i, entry := range l.layerRecords { - if entry.Host == host && entry.ChainID == chainID && entry.Repository == repository { + if entry.Host == host && entry.ChainID == chainID && entry.Repository == repository && entry.Version == l.Version.LayerVersion { l.layerRecords = append(l.layerRecords[:i], l.layerRecords[i+1:]...) return nil } @@ -85,7 +90,7 @@ func (l *localdb) DeleteLayerEntry(ctx context.Context, host, repository, chainI return nil // No error if entry not found } -func (l *localdb) CreateManifestEntry(ctx context.Context, host, repository, mediaType string, original, convertedDigest digest.Digest, size int64) error { +func (l *Localdb) CreateManifestEntry(ctx context.Context, host, repository, mediaType string, original, convertedDigest digest.Digest, size int64) error { l.manifestLock.Lock() defer l.manifestLock.Unlock() l.manifestRecords = append(l.manifestRecords, &database.ManifestEntry{ @@ -95,39 +100,40 @@ func (l *localdb) CreateManifestEntry(ctx context.Context, host, repository, med ConvertedDigest: convertedDigest, DataSize: size, MediaType: mediaType, + Version: l.Version.LayerVersion, }) return nil } -func (l *localdb) GetManifestEntryForRepo(ctx context.Context, host, repository, mediaType string, original digest.Digest) *database.ManifestEntry { +func (l *Localdb) GetManifestEntryForRepo(ctx context.Context, host, repository, mediaType string, original digest.Digest) *database.ManifestEntry { l.manifestLock.Lock() defer l.manifestLock.Unlock() for _, entry := range l.manifestRecords { - if entry.Host == host && entry.OriginalDigest == original && entry.Repository == repository && entry.MediaType == mediaType { + if entry.Host == host && entry.OriginalDigest == original && entry.Repository == repository && entry.MediaType == mediaType && entry.Version == l.Version.ManifestVersion { return entry } } return nil } -func (l *localdb) GetCrossRepoManifestEntries(ctx context.Context, host, mediaType string, original digest.Digest) []*database.ManifestEntry { +func (l *Localdb) GetCrossRepoManifestEntries(ctx context.Context, host, mediaType string, original digest.Digest) []*database.ManifestEntry { l.manifestLock.Lock() defer l.manifestLock.Unlock() var entries []*database.ManifestEntry for _, entry := range l.manifestRecords { - if entry.Host == host && entry.OriginalDigest == original && entry.MediaType == mediaType { + if entry.Host == host && entry.OriginalDigest == original && entry.MediaType == mediaType && entry.Version == l.Version.ManifestVersion { entries = append(entries, entry) } } return entries } -func (l *localdb) DeleteManifestEntry(ctx context.Context, host, repository, mediaType string, original digest.Digest) error { +func (l *Localdb) DeleteManifestEntry(ctx context.Context, host, repository, mediaType string, original digest.Digest) error { l.manifestLock.Lock() defer l.manifestLock.Unlock() // Identify indices of items to be deleted. for i, entry := range l.manifestRecords { - if entry.Host == host && entry.OriginalDigest == original && entry.Repository == repository && entry.MediaType == mediaType { + if entry.Host == host && entry.OriginalDigest == original && entry.Repository == repository && entry.MediaType == mediaType && entry.Version == l.Version.ManifestVersion { l.manifestRecords = append(l.manifestRecords[:i], l.manifestRecords[i+1:]...) } } diff --git a/docs/USERSPACE_CONVERTOR.md b/docs/USERSPACE_CONVERTOR.md index d0b35dd7..408bb486 100644 --- a/docs/USERSPACE_CONVERTOR.md +++ b/docs/USERSPACE_CONVERTOR.md @@ -34,30 +34,32 @@ Usage: convertor [flags] Flags: - -r, --repository string repository for converting image (required) - -u, --username string user[:password] Registry user and password - --plain connections using plain HTTP - --verbose show debug log - -i, --input-tag string tag for image converting from (required) - -o, --output-tag string tag for image converting to - -d, --dir string directory used for temporary data (default "tmp_conv") - --oci export image with oci spec - --mkfs make ext4 fs in bottom layer (default true) - --vsize int virtual block device size (GB) (default 64) - --fastoci string build 'Overlaybd-Turbo OCIv1' format (old name of turboOCIv1. deprecated) - --turboOCI string build 'Overlaybd-Turbo OCIv1' format - --overlaybd string build overlaybd format - --db-str string db str for overlaybd conversion - --db-type string type of db to use for conversion deduplication. Available: mysql. Default none - --concurrency-limit int the number of manifests that can be built at the same time, used for multi-arch images, 0 means no limit (default 4) - --cert-dir stringArray In these directories, root CA should be named as *.crt and client cert should be named as *.cert, *.key - --root-ca stringArray root CA certificates - --client-cert stringArray client cert certificates, should form in ${cert-file}:${key-file} - --insecure don't verify the server's certificate chain and host name - --reserve reserve tmp data - --no-upload don't upload layer and manifest - --dump-manifest dump manifest - -h, --help help for convertor + -r, --repository string repository for converting image (required) + -u, --username string user[:password] Registry user and password + --plain connections using plain HTTP + --verbose show debug log + -i, --input-tag string tag for image converting from (required) + -o, --output-tag string tag for image converting to + -d, --dir string directory used for temporary data (default "tmp_conv") + --oci export image with oci spec + --mkfs make ext4 fs in bottom layer (default true) + --vsize int virtual block device size (GB) (default 64) + --fastoci string build 'Overlaybd-Turbo OCIv1' format (old name of turboOCIv1. deprecated) + --turboOCI string build 'Overlaybd-Turbo OCIv1' format + --overlaybd string build overlaybd format + --db-str string db str for overlaybd conversion + --db-type string type of db to use for conversion deduplication. Available: mysql. Default none + --db-layer-version string version override for layer deduplication + --db-manifest-version string version override for manifest deduplication + --concurrency-limit int the number of manifests that can be built at the same time, used for multi-arch images, 0 means no limit (default 4) + --cert-dir stringArray In these directories, root CA should be named as *.crt and client cert should be named as *.cert, *.key + --root-ca stringArray root CA certificates + --client-cert stringArray client cert certificates, should form in ${cert-file}:${key-file} + --insecure don't verify the server's certificate chain and host name + --reserve reserve tmp data + --no-upload don't upload layer and manifest + --dump-manifest dump manifest + -h, --help help for convertor # examples $ bin/convertor -r docker.io/overlaybd/redis -u user:pass -i 6.2.6 -o 6.2.6_obd @@ -76,13 +78,14 @@ First, create a database and the `overlaybd_layers` table, the table schema is a ```sql CREATE TABLE `overlaybd_layers` ( + `version` varchar(6) NOT NULL, `host` varchar(255) NOT NULL, `repo` varchar(255) NOT NULL, `chain_id` varchar(255) NOT NULL COMMENT 'chain-id of the normal image layer', `data_digest` varchar(255) NOT NULL COMMENT 'digest of overlaybd layer', `data_size` bigint(20) NOT NULL COMMENT 'size of overlaybd layer', PRIMARY KEY (`host`,`repo`,`chain_id`), - KEY `index_registry_chainId` (`host`,`chain_id`) USING BTREE + KEY `index_registry_chainId` (`version`,`host`,`chain_id`) USING BTREE ) DEFAULT CHARSET=utf8; ``` @@ -90,6 +93,7 @@ If you also want caching for manifests to avoid reconverting the same manifest t ```sql CREATE TABLE `overlaybd_manifests` ( + `version` varchar(6) NOT NULL, `host` varchar(255) NOT NULL, `repo` varchar(255) NOT NULL, `src_digest` varchar(255) NOT NULL COMMENT 'digest of the normal image manifest', @@ -97,7 +101,7 @@ CREATE TABLE `overlaybd_manifests` ( `data_size` bigint(20) NOT NULL COMMENT 'size of overlaybd manifest', `mediatype` varchar(255) NOT NULL COMMENT 'mediatype of the converted image manifest', PRIMARY KEY (`host`,`repo`,`src_digest`, `mediatype`), - KEY `index_registry_src_digest` (`host`,`src_digest`) USING BTREE + KEY `index_registry_src_digest` (`version`,`host`,`src_digest`, `mediatype`) USING BTREE ) DEFAULT CHARSET=utf8; ``` @@ -105,14 +109,16 @@ with this database you can then provide the following flags: ```bash Flags: - --db-str db str for overlaybd conversion - --db-type type of db to use for conversion deduplication. Available: mysql. Default none - + --db-str string db str for overlaybd conversion + --db-type string type of db to use for conversion deduplication. Available: mysql. Default none + --db-layer-version string version override for layer deduplication + --db-manifest-version string version override for manifest deduplication # example $ bin/convertor -r docker.io/overlaybd/redis -u user:pass -i 6.2.6 -o 6.2.6_obd --db-str "dbuser:dbpass@tcp(127.0.0.1:3306)/dedup" --db-type mysql ``` * Note that we have also provided some tools to create such a database and examples of usage as well as a dockerfile that could be used to setup a simple converter with caching capabilities, see [samples](../cmd/convertor/resources/samples). +* Note that ```--db-layer-version``` and ```--db-manifest-version``` do not need to be provided unless a custom version is required ## libext2fs diff --git a/pkg/version/version.go b/pkg/version/version.go index b77e5a5e..39619cc5 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -16,8 +16,42 @@ package version +import ( + "fmt" + + "golang.org/x/mod/semver" +) + const ( OverlayBDVersionNumber = "0.1.0" TurboOCIVersionNumber = "0.1.0-turbo.ociv1" DeprecatedOCIVersionNumber = "0.1.0-fastoci" // old version of turboOCI ) + +const ( + UserspaceConsistencyLayerVersion = "1" // This should be updated when the layer format changes from userspace convertor side independent of the underlying overlaybd tools + UserspaceConsistencyManifestVersion = "1" // This should be updated when the manifest format changes from userspace convertor side independent of the underlying overlaybd tools +) + +// Compound version to be used for the database version +type UserspaceVersion struct { + LayerVersion string + ManifestVersion string +} + +// GetUserSpaceConsistencyVersion returns the version of the userspace conversion for use with manifest and layer deduplication. +func GetUserSpaceConsistencyVersion() UserspaceVersion { + // Only the major version should denote a breaking change on the layer format + toolsMajorVersion := semver.Major(GetOverlaybdToolsVersion()) + + return UserspaceVersion{ + LayerVersion: fmt.Sprintf("%s-%s", UserspaceConsistencyLayerVersion, toolsMajorVersion), + ManifestVersion: UserspaceConsistencyManifestVersion, + } +} + +// GetOVerlaybdVersion returns the version of the overlaybd tools. This value should be obtained from the tools +// themselves, and not hardcoded in this file. This is a placeholder value. +func GetOverlaybdToolsVersion() string { + return "v0.1.0" // This is a placeholder value +}