diff --git a/api/api.go b/api/api.go index c15be6e2d2314d3e95ade03a14c36542a49e16c8..f931be7dc0ed4ece677a19b4649467f032be371a 100644 --- a/api/api.go +++ b/api/api.go @@ -181,7 +181,7 @@ func maybeRunTaskInBackground(c *gin.Context, name string, resources []string, p // Common piece of code to show list of packages, // with searching & details if requested -func showPackages(c *gin.Context, reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) { +func showPackages(c *gin.Context, reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) { result := []*deb.Package{} list, err := deb.NewPackageListFromRefList(reflist, collectionFactory.PackageCollection(), nil) diff --git a/api/db.go b/api/db.go index 3f8b826ddc34894885f90b55cd509e802cd2b760..8869fbd99183d964ee2167ff12feeda419d10660 100644 --- a/api/db.go +++ b/api/db.go @@ -5,6 +5,7 @@ import ( "sort" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/task" "github.com/aptly-dev/aptly/utils" @@ -20,18 +21,22 @@ func apiDbCleanup(c *gin.Context) { collectionFactory := context.NewCollectionFactory() - // collect information about referenced packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about referenced packages and their reflist buckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() + + reflistMigration := collectionFactory.RefListCollection().NewMigration() out.Printf("Loading mirrors, local repos, snapshots and published repos...") err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -40,14 +45,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -56,12 +61,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -73,13 +80,16 @@ func apiDbCleanup(c *gin.Context) { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) } return nil }) @@ -87,11 +97,20 @@ func apiDbCleanup(c *gin.Context) { return nil, err } + err = reflistMigration.Flush() + if err != nil { + return nil, err + } + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + out.Printf("Split %d reflist(s) into %d bucket(s) (%d segment(s))", + stats.Reflists, stats.Buckets, stats.Segments) + } + // ... and compare it to the list of all packages out.Printf("Loading list of all packages...") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced out.Printf("Deleting unreferenced packages (%d)...", toDelete.Len()) @@ -112,6 +131,28 @@ func apiDbCleanup(c *gin.Context) { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return nil, err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + out.Printf("Deleting unreferenced reflist buckets (%d)...", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return nil, err + } + + if err := batch.Write(); err != nil { + return nil, err + } + } + // now, build a list of files that should be present in Repository (package pool) out.Printf("Building list of files referenced by packages...") referencedFiles := make([]string, 0, existingPackageRefs.Len()) diff --git a/api/metrics.go b/api/metrics.go index 94a9dc25252fef273c24770c30fb43dad8c325fe..875c3c196d2f40f74d08aafe503b7f2d87888cd7 100644 --- a/api/metrics.go +++ b/api/metrics.go @@ -102,7 +102,7 @@ func countPackagesByRepos() { components := repo.Components() for _, c := range components { - count := float64(len(repo.RefList(c).Refs)) + count := float64(repo.RefList(c).Len()) apiReposPackageCountGauge.WithLabelValues(fmt.Sprintf("%s", (repo.SourceNames())), repo.Distribution, c).Set(count) } diff --git a/api/mirror.go b/api/mirror.go index 285e09e7e4624ff6e624f8ee8912b8f74ed3cb14..f0b9aa2820af04071366548fdd95a7c0878fc53f 100644 --- a/api/mirror.go +++ b/api/mirror.go @@ -123,7 +123,7 @@ func apiMirrorsCreate(c *gin.Context) { return } - err = collection.Add(repo) + err = collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to add mirror: %s", err)) return @@ -183,7 +183,7 @@ func apiMirrorsShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -203,7 +203,7 @@ func apiMirrorsPackages(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -397,12 +397,12 @@ func apiMirrorsUpdate(c *gin.Context) { e := context.ReOpenDatabase() if e == nil { remote.MarkAsIdle() - collection.Update(remote) + collection.Update(remote, collectionFactory.RefListCollection()) } }() remote.MarkAsUpdating() - err = collection.Update(remote) + err = collection.Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } @@ -568,7 +568,7 @@ func apiMirrorsUpdate(c *gin.Context) { log.Info().Msgf("%s: Finalizing download\n", b.Name) remote.FinalizeDownload(collectionFactory, out) - err = collectionFactory.RemoteRepoCollection().Update(remote) + err = collectionFactory.RemoteRepoCollection().Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } diff --git a/api/publish.go b/api/publish.go index 110dbe5469d6a72eef43fe914447df53442ce045..f646e6ca2fb82fe1322c716a25c71743b78a6315 100644 --- a/api/publish.go +++ b/api/publish.go @@ -61,7 +61,7 @@ func apiPublishList(c *gin.Context) { result := make([]*deb.PublishedRepo, 0, collection.Len()) err := collection.ForEach(func(repo *deb.PublishedRepo) error { - err := collection.LoadComplete(repo, collectionFactory) + err := collection.LoadShallow(repo, collectionFactory) if err != nil { return err } @@ -140,7 +140,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { } resources = append(resources, string(snapshot.ResourceKey())) - err = snapshotCollection.LoadComplete(snapshot) + err = snapshotCollection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to publish: %s", err)) return @@ -164,7 +164,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { } resources = append(resources, string(localRepo.Key())) - err = localCollection.LoadComplete(localRepo) + err = localCollection.LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to publish: %s", err)) } @@ -231,7 +231,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to publish: %s", err) } - err = collection.Add(published) + err = collection.Add(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } @@ -311,7 +311,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return } - err2 = snapshotCollection.LoadComplete(snapshot) + err2 = snapshotCollection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err2 != nil { AbortWithJSONError(c, 500, err2) return @@ -346,7 +346,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } - err = collection.Update(published) + err = collection.Update(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } diff --git a/api/repos.go b/api/repos.go index fe6206d36e7304cbcc1ab3fd611dab50b74191e2..ca5d62f8c167cf71fbfa210e9d57d24ea1ab9e22 100644 --- a/api/repos.go +++ b/api/repos.go @@ -82,7 +82,7 @@ func apiReposCreate(c *gin.Context) { collectionFactory := context.NewCollectionFactory() collection := collectionFactory.LocalRepoCollection() - err := collection.Add(repo) + err := collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 400, err) return @@ -132,7 +132,7 @@ func apiReposEdit(c *gin.Context) { repo.DefaultComponent = *b.DefaultComponent } - err = collection.Update(repo) + err = collection.Update(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -201,7 +201,7 @@ func apiReposPackagesShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -229,7 +229,7 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -261,9 +261,9 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li } } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -320,7 +320,7 @@ func apiReposPackageFromDir(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -369,9 +369,9 @@ func apiReposPackageFromDir(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -489,7 +489,7 @@ func apiReposIncludePackageFromDir(c *gin.Context) { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignature, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) failedFiles = append(failedFiles, failedFiles2...) if err != nil { diff --git a/api/snapshot.go b/api/snapshot.go index af90522113a0f2b3b2cd484c7f30684f884e34c5..a58fe53b2b76faf63dd32bc3401c6e115d172701 100644 --- a/api/snapshot.go +++ b/api/snapshot.go @@ -69,7 +69,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusConflict, Value: nil}, err } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -83,7 +83,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -128,7 +128,7 @@ func apiSnapshotsCreate(c *gin.Context) { return } - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -155,9 +155,9 @@ func apiSnapshotsCreate(c *gin.Context) { } } - snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewPackageRefListFromPackageList(list), b.Description) + snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewSplitRefListFromPackageList(list), b.Description) - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -197,7 +197,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { resources := []string{string(repo.Key()), "S" + b.Name} taskName := fmt.Sprintf("Create snapshot of repo %s", name) maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { - err := collection.LoadComplete(repo) + err := collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -211,7 +211,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -261,7 +261,7 @@ func apiSnapshotsUpdate(c *gin.Context) { snapshot.Description = b.Description } - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -280,7 +280,7 @@ func apiSnapshotsShow(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -347,20 +347,20 @@ func apiSnapshotsDiff(c *gin.Context) { return } - err = collection.LoadComplete(snapshotA) + err = collection.LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } - err = collection.LoadComplete(snapshotB) + err = collection.LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { AbortWithJSONError(c, 500, err) return @@ -390,7 +390,7 @@ func apiSnapshotsSearchPackages(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -441,7 +441,7 @@ func apiSnapshotsMerge(c *gin.Context) { return } - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, http.StatusInternalServerError, err) return @@ -467,7 +467,7 @@ func apiSnapshotsMerge(c *gin.Context) { snapshot = deb.NewSnapshotFromRefList(body.Destination, sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/cmd.go b/cmd/cmd.go index 14a0efd166ece49c45135a0dd84df5d0dff66384..b70bf145c84839067ce85aeff37252e6b13633d2 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -21,7 +21,7 @@ const ( ) // ListPackagesRefList shows list of packages in PackageRefList -func ListPackagesRefList(reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) (err error) { +func ListPackagesRefList(reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) (err error) { fmt.Printf("Packages:\n") if reflist == nil { diff --git a/cmd/db_cleanup.go b/cmd/db_cleanup.go index 66fece677a720cec3217b179958af975e728a344..ec19068591c310c1ab9ac023cdf910fe3b3f5f1d 100644 --- a/cmd/db_cleanup.go +++ b/cmd/db_cleanup.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/utils" "github.com/smira/commander" @@ -24,12 +25,20 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { dryRun := context.Flags().Lookup("dry-run").Value.Get().(bool) collectionFactory := context.NewCollectionFactory() - // collect information about references packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about references packages and their reflistbuckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() // used only in verbose mode to report package use source packageRefSources := map[string][]string{} + var reflistMigration *deb.RefListMigration + if !dryRun { + reflistMigration = collectionFactory.RefListCollection().NewMigration() + } else { + reflistMigration = collectionFactory.RefListCollection().NewMigrationDryRun() + } + context.Progress().ColoredPrintf("@{w!}Loading mirrors, local repos, snapshots and published repos...@|") if verbose { context.Progress().ColoredPrintf("@{y}Loading mirrors:@|") @@ -39,20 +48,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - if verbose { - description := fmt.Sprintf("mirror %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + + if verbose { + description := fmt.Sprintf("mirror %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -71,21 +81,23 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) - if verbose { - description := fmt.Sprintf("local repo %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + + if verbose { + description := fmt.Sprintf("local repo %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -104,16 +116,18 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", snapshot.Name) } - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) if verbose { description := fmt.Sprintf("snapshot %s", snapshot.Name) - snapshot.RefList().ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -136,17 +150,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + if verbose { description := fmt.Sprintf("published repository %s:%s/%s component %s", published.Storage, published.Prefix, published.Distribution, component) - published.RefList(component).ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -160,11 +178,29 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { collectionFactory.Flush() + err = reflistMigration.Flush() + if err != nil { + return err + } + + if verbose { + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + if !dryRun { + context.Progress().ColoredPrintf("@{w!}Split %d reflist(s) into %d bucket(s) (%d segment(s))@|", + stats.Reflists, stats.Buckets, stats.Segments) + } else { + context.Progress().ColoredPrintf( + "@{y!}Skipped splitting %d reflist(s) into %d bucket(s) (%d segment(s)), as -dry-run has been requested.@|", + stats.Reflists, stats.Buckets, stats.Segments) + } + } + } + // ... and compare it to the list of all packages context.Progress().ColoredPrintf("@{w!}Loading list of all packages...@|") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced context.Progress().ColoredPrintf("@{r!}Deleting unreferenced packages (%d)...@|", toDelete.Len()) @@ -202,6 +238,32 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + context.Progress().ColoredPrintf("@{r!}Deleting unreferenced reflist buckets (%d)...@|", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + if !dryRun { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return err + } + + if err := batch.Write(); err != nil { + return err + } + } else { + context.Progress().ColoredPrintf("@{y!}Skipped reflist deletion, as -dry-run has been requested.@|") + } + } + collectionFactory.Flush() // now, build a list of files that should be present in Repository (package pool) diff --git a/cmd/mirror_create.go b/cmd/mirror_create.go index 78d91b5823f5c830b522779115d3319199912c53..eb2ba7cab88ec392a5b57538881da5c1136b69b9 100644 --- a/cmd/mirror_create.go +++ b/cmd/mirror_create.go @@ -65,7 +65,7 @@ func aptlyMirrorCreate(cmd *commander.Command, args []string) error { } collectionFactory := context.NewCollectionFactory() - err = collectionFactory.RemoteRepoCollection().Add(repo) + err = collectionFactory.RemoteRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add mirror: %s", err) } diff --git a/cmd/mirror_edit.go b/cmd/mirror_edit.go index 86462c4c0a9db1528502afd099adbc60ff3f446d..93986c062398f3fdc1867c21bc2205557ac06148 100644 --- a/cmd/mirror_edit.go +++ b/cmd/mirror_edit.go @@ -75,7 +75,7 @@ func aptlyMirrorEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/mirror_rename.go b/cmd/mirror_rename.go index 2ff9f92041dc036af7a0645b0c6dcd50b1f2e562..ff453b857fe9688c446aedd68fb06cd17de5ba8c 100644 --- a/cmd/mirror_rename.go +++ b/cmd/mirror_rename.go @@ -37,7 +37,7 @@ func aptlyMirrorRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/mirror_show.go b/cmd/mirror_show.go index 03179161a393b42c12445a58ff201ecbdcb4344b..3c52d6e37d61ecf80dba88eaa5d4c20c45186f1e 100644 --- a/cmd/mirror_show.go +++ b/cmd/mirror_show.go @@ -38,7 +38,7 @@ func aptlyMirrorShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -98,12 +98,13 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().RemoteRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.RemoteRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -113,7 +114,7 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/mirror_update.go b/cmd/mirror_update.go index 2bca585af20af5aa3aa46120b0eb9fc22dfe1543..29a05c788448975facb5f95696f819763ca6db21 100644 --- a/cmd/mirror_update.go +++ b/cmd/mirror_update.go @@ -30,7 +30,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to update: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -98,12 +98,12 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { err = context.ReOpenDatabase() if err == nil { repo.MarkAsIdle() - collectionFactory.RemoteRepoCollection().Update(repo) + collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) } }() repo.MarkAsUpdating() - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -259,7 +259,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { } repo.FinalizeDownload(collectionFactory, context.Progress()) - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } diff --git a/cmd/package_show.go b/cmd/package_show.go index 37f07e9b04fcc1b97b83c47aeb43d7c2b94b81fd..6bbec98496be7669ab3c466caa339f25373a6513 100644 --- a/cmd/package_show.go +++ b/cmd/package_show.go @@ -14,7 +14,7 @@ import ( func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) (err error) { err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -30,7 +30,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -46,7 +46,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/publish_list.go b/cmd/publish_list.go index f4bbb2afb24cd8f50fb3806006e7d7c65e33ca9b..e3a1d1a8f343ab23ad86e174f959dd7c524a919b 100644 --- a/cmd/publish_list.go +++ b/cmd/publish_list.go @@ -34,7 +34,7 @@ func aptlyPublishListTxt(cmd *commander.Command, _ []string) error { published := make([]string, 0, collectionFactory.PublishedRepoCollection().Len()) err = collectionFactory.PublishedRepoCollection().ForEach(func(repo *deb.PublishedRepo) error { - e := collectionFactory.PublishedRepoCollection().LoadComplete(repo, collectionFactory) + e := collectionFactory.PublishedRepoCollection().LoadShallow(repo, collectionFactory) if e != nil { fmt.Fprintf(os.Stderr, "Error found on one publish (prefix:%s / distribution:%s / component:%s\n)", repo.StoragePrefix(), repo.Distribution, repo.Components()) diff --git a/cmd/publish_snapshot.go b/cmd/publish_snapshot.go index 322479aacc665fb85c25408bef0d327089645c07..a1e89ea199a864a14b3b3c935863e472c762cbf0 100644 --- a/cmd/publish_snapshot.go +++ b/cmd/publish_snapshot.go @@ -49,7 +49,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -85,7 +85,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -171,7 +171,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Add(published) + err = collectionFactory.PublishedRepoCollection().Add(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_switch.go b/cmd/publish_switch.go index 0784fba3fb835ed47a6a20feddc3afbd7eaa96c5..db98ed4da16acbd590ba4027b4639ea498ca13ab 100644 --- a/cmd/publish_switch.go +++ b/cmd/publish_switch.go @@ -73,7 +73,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to switch: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to switch: %s", err) } @@ -105,7 +105,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_update.go b/cmd/publish_update.go index fcdea8ed65ef2a0518e76c46725ca88c37b1e76d..3e043666d370cf96e6c3f302906c8b5a453c96ee 100644 --- a/cmd/publish_update.go +++ b/cmd/publish_update.go @@ -69,7 +69,7 @@ func aptlyPublishUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/repo_add.go b/cmd/repo_add.go index 8189e7834252c3d0ca0b2389e8dc49bbb6437b7e..0263879f3115f80190dc5e0bee65f4d10324a315 100644 --- a/cmd/repo_add.go +++ b/cmd/repo_add.go @@ -28,7 +28,7 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to add: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add: %s", err) } @@ -58,9 +58,9 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { processedFiles = append(processedFiles, otherFiles...) - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_create.go b/cmd/repo_create.go index 5fef46d922ae6c45a7402837cf9b6e68a1f8ac1d..0e3a1e52b223e44221b71a5b6f5afb8da75c5caa 100644 --- a/cmd/repo_create.go +++ b/cmd/repo_create.go @@ -36,7 +36,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load source snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load source snapshot: %s", err) } @@ -44,7 +44,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { repo.UpdateRefList(snapshot.RefList()) } - err = collectionFactory.LocalRepoCollection().Add(repo) + err = collectionFactory.LocalRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add local repo: %s", err) } diff --git a/cmd/repo_edit.go b/cmd/repo_edit.go index bc81dc4a1264c388df15f5d0347a7e83227397cd..c7fbc419642c0e597afc841f6dbf8e37b09e8f8b 100644 --- a/cmd/repo_edit.go +++ b/cmd/repo_edit.go @@ -22,7 +22,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to edit: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } @@ -53,7 +53,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/repo_include.go b/cmd/repo_include.go index b84b96a389ad8e802a18bcad44e73ef0ff45ffef..4a750db1f9cee97bd20952c4e8526197290ffe6a 100644 --- a/cmd/repo_include.go +++ b/cmd/repo_include.go @@ -64,7 +64,7 @@ func aptlyRepoInclude(cmd *commander.Command, args []string) error { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, uploaders, query.Parse) failedFiles = append(failedFiles, failedFiles2...) diff --git a/cmd/repo_list.go b/cmd/repo_list.go index 9c4b0d47ebce7170b38187cec73c1fdc01c09ad0..f3ca4a8baeb23341d28e49a7b1e40df2e8657d1d 100644 --- a/cmd/repo_list.go +++ b/cmd/repo_list.go @@ -36,7 +36,7 @@ func aptlyRepoListTxt(cmd *commander.Command, _ []string) error { if raw { repos[i] = repo.Name } else { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -77,7 +77,8 @@ func aptlyRepoListJSON(_ *commander.Command, _ []string) error { repos := make([]*deb.LocalRepo, context.NewCollectionFactory().LocalRepoCollection().Len()) i := 0 context.NewCollectionFactory().LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + collectionFactory := context.NewCollectionFactory() + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/repo_move.go b/cmd/repo_move.go index 8be6698b6b70567dcaf189d302b3fb73f23c1ee1..004b4af0a98df54813e9c295b4d0d94b1b72c4d2 100644 --- a/cmd/repo_move.go +++ b/cmd/repo_move.go @@ -25,13 +25,13 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } var ( - srcRefList *deb.PackageRefList + srcRefList *deb.SplitRefList srcRepo *deb.LocalRepo ) @@ -45,7 +45,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: source and destination are the same", command) } - err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -59,7 +59,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -150,17 +150,17 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - dstRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(dstList)) + dstRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(dstList)) - err = collectionFactory.LocalRepoCollection().Update(dstRepo) + err = collectionFactory.LocalRepoCollection().Update(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } if command == "move" { // nolint: goconst - srcRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(srcList)) + srcRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(srcList)) - err = collectionFactory.LocalRepoCollection().Update(srcRepo) + err = collectionFactory.LocalRepoCollection().Update(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_remove.go b/cmd/repo_remove.go index 93e8535c71b00bbef2feccfe9182f82f94b04d3b..d3a1159ee30e6f4200fb546bbac86b7fbb66ada9 100644 --- a/cmd/repo_remove.go +++ b/cmd/repo_remove.go @@ -24,7 +24,7 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to remove: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to remove: %s", err) } @@ -59,9 +59,9 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_rename.go b/cmd/repo_rename.go index 9234b7c722f72a4ade0e7a498f6dc4580e1942b5..459afcbbb3c5a25c2791afaff77b158b0404c1e4 100644 --- a/cmd/repo_rename.go +++ b/cmd/repo_rename.go @@ -32,7 +32,7 @@ func aptlyRepoRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/repo_show.go b/cmd/repo_show.go index a61a5f1f922142b708f4c51cdbc819be0755362b..741915d123e936e9d27321817059696db6926b98 100644 --- a/cmd/repo_show.go +++ b/cmd/repo_show.go @@ -36,7 +36,7 @@ func aptlyRepoShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -63,12 +63,13 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().LocalRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.LocalRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -79,7 +80,7 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err == nil { packageList = list.FullNames() } diff --git a/cmd/snapshot_create.go b/cmd/snapshot_create.go index 000a78d9b91294b393e7b5b2e797b3d864ae1148..6bc319de3a9fa2a9303a689f8510e2085b07fd6d 100644 --- a/cmd/snapshot_create.go +++ b/cmd/snapshot_create.go @@ -30,7 +30,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -50,7 +50,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -71,7 +71,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return commander.ErrCommandError } - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add snapshot: %s", err) } diff --git a/cmd/snapshot_diff.go b/cmd/snapshot_diff.go index ccbea32ee066dea185eed25792cc75b4506448db..19da7fcc9985a91bfc99c1d3ef97a2d3faf3759d 100644 --- a/cmd/snapshot_diff.go +++ b/cmd/snapshot_diff.go @@ -23,7 +23,7 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot A: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot A: %s", err) } @@ -34,13 +34,13 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot B: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot B: %s", err) } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { return fmt.Errorf("unable to calculate diff: %s", err) } diff --git a/cmd/snapshot_filter.go b/cmd/snapshot_filter.go index b81a9cfc236a700bd9b345c159867b26348fa618..5aed03b95b5657a53592513d603e906faa8900af 100644 --- a/cmd/snapshot_filter.go +++ b/cmd/snapshot_filter.go @@ -27,7 +27,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to filter: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to filter: %s", err) } @@ -76,7 +76,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[1], []*deb.Snapshot{source}, result, fmt.Sprintf("Filtered '%s', query was: '%s'", source.Name, strings.Join(args[2:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_merge.go b/cmd/snapshot_merge.go index 0a319a5ab24812da1a491ff8e089562f1478cda7..e9eb0c779308ca4b676440d0a0c669250af7eb7b 100644 --- a/cmd/snapshot_merge.go +++ b/cmd/snapshot_merge.go @@ -24,7 +24,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(sources[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot: %s", err) } @@ -57,7 +57,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromRefList(args[0], sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_pull.go b/cmd/snapshot_pull.go index 884b50ffb23f1e1279f379edcf57b80c0b27d0fd..50e8488b84acce8b82e7356327e99cc526343d41 100644 --- a/cmd/snapshot_pull.go +++ b/cmd/snapshot_pull.go @@ -29,7 +29,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -40,7 +40,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -138,7 +138,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[2], []*deb.Snapshot{snapshot, source}, packageList, fmt.Sprintf("Pulled into '%s' with '%s' as source, pull request was: '%s'", snapshot.Name, source.Name, strings.Join(args[3:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_rename.go b/cmd/snapshot_rename.go index b8ac74cf33ba86e94ce0ca664b729154ebd4f957..b13b7dca6bca4bcaee7427e72237f44c5ea69692 100644 --- a/cmd/snapshot_rename.go +++ b/cmd/snapshot_rename.go @@ -32,7 +32,7 @@ func aptlySnapshotRename(cmd *commander.Command, args []string) error { } snapshot.Name = newName - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/snapshot_search.go b/cmd/snapshot_search.go index d771af7ce5113e06c2dc592a33f7856989a20ab8..c0244f11c03227948fc8eac38857eb98308a69c4 100644 --- a/cmd/snapshot_search.go +++ b/cmd/snapshot_search.go @@ -25,7 +25,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error command := cmd.Parent.Name() collectionFactory := context.NewCollectionFactory() - var reflist *deb.PackageRefList + var reflist *deb.SplitRefList if command == "snapshot" { // nolint: goconst var snapshot *deb.Snapshot @@ -34,7 +34,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -47,7 +47,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -60,7 +60,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } diff --git a/cmd/snapshot_show.go b/cmd/snapshot_show.go index e03a49e5868c6bce0536668408497d66b706630f..582b6a9e37927a73bd53ecbee56d24628cab0a76 100644 --- a/cmd/snapshot_show.go +++ b/cmd/snapshot_show.go @@ -35,7 +35,7 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -86,16 +86,17 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { } func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { + collectionFactory := context.NewCollectionFactory() var err error name := args[0] - snapshot, err := context.NewCollectionFactory().SnapshotCollection().ByName(name) + snapshot, err := collectionFactory.SnapshotCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -105,14 +106,14 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { for _, sourceID := range snapshot.SourceIDs { if snapshot.SourceKind == deb.SourceSnapshot { var source *deb.Snapshot - source, err = context.NewCollectionFactory().SnapshotCollection().ByUUID(sourceID) + source, err = collectionFactory.SnapshotCollection().ByUUID(sourceID) if err != nil { continue } snapshot.Snapshots = append(snapshot.Snapshots, source) } else if snapshot.SourceKind == deb.SourceLocalRepo { var source *deb.LocalRepo - source, err = context.NewCollectionFactory().LocalRepoCollection().ByUUID(sourceID) + source, err = collectionFactory.LocalRepoCollection().ByUUID(sourceID) if err != nil { continue } @@ -133,7 +134,7 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { if withPackages { if snapshot.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(snapshot.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(snapshot.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/snapshot_verify.go b/cmd/snapshot_verify.go index f815f29ce605d93eae6b03200c40e6fea02ed757..fc566aae3903a49705e91678d47d8dddba02b319 100644 --- a/cmd/snapshot_verify.go +++ b/cmd/snapshot_verify.go @@ -23,7 +23,7 @@ func aptlySnapshotVerify(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to verify: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to verify: %s", err) } diff --git a/database/database.go b/database/database.go index 709a1aa80351d75eb00c1f303e2d66a7d95d2a97..03d896b20159a96505990dc6a1a5a83ec7e5ca1b 100644 --- a/database/database.go +++ b/database/database.go @@ -48,6 +48,8 @@ type Storage interface { CreateTemporary() (Storage, error) + GetRecommendedMaxKVSize() int + Open() error Close() error CompactDB() error diff --git a/database/goleveldb/database.go b/database/goleveldb/database.go index a2874a6e61273589b69b6b06939252b18829e7e4..011681a6bfc037891b27ca243e70631b86af22ab 100644 --- a/database/goleveldb/database.go +++ b/database/goleveldb/database.go @@ -9,10 +9,13 @@ import ( "github.com/aptly-dev/aptly/database" ) +const blockSize = 4 * 1024 + func internalOpen(path string, throttleCompaction bool) (*leveldb.DB, error) { o := &opt.Options{ Filter: filter.NewBloomFilter(10), OpenFilesCacheCapacity: 256, + BlockSize: blockSize, } if throttleCompaction { diff --git a/database/goleveldb/storage.go b/database/goleveldb/storage.go index 37acf3d830707de0e414ef37bd750a6157f6d176..1281f3fbc6176265b2168c7732b7dad709b37cde 100644 --- a/database/goleveldb/storage.go +++ b/database/goleveldb/storage.go @@ -16,6 +16,17 @@ type storage struct { db *leveldb.DB } +func (s *storage) GetRecommendedMaxKVSize() int { + // The block size configured is not actually a *set* block size, but rather a + // *minimum*. LevelDB only checks if a block is full after a new key/value pair is + // written, meaning that blocks will tend to overflow a bit. + // Therefore, using the default block size as the max value size will ensure + // that a new block will only contain a single value and that the size will + // only ever be as large as around double the block size (if the block was + // nearly full before the new items were added). + return blockSize +} + // CreateTemporary creates new DB of the same type in temp dir func (s *storage) CreateTemporary() (database.Storage, error) { tempdir, err := os.MkdirTemp("", "aptly") diff --git a/deb/changes.go b/deb/changes.go index c264986abb6386e5de2e5e4b1e64fc0dc4542cb8..6c8bf8812fb5b363fa8c4e110b772d6b67611940 100644 --- a/deb/changes.go +++ b/deb/changes.go @@ -291,7 +291,8 @@ func CollectChangesFiles(locations []string, reporter aptly.ResultReporter) (cha // ImportChangesFiles imports referenced files in changes files into local repository func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles bool, verifier pgp.Verifier, repoTemplate *template.Template, progress aptly.Progress, localRepoCollection *LocalRepoCollection, packageCollection *PackageCollection, - pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { + reflistCollection *RefListCollection, pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, + parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { for _, path := range changesFiles { var changes *Changes @@ -359,7 +360,7 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac } } - err = localRepoCollection.LoadComplete(repo) + err = localRepoCollection.LoadComplete(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to load repo: %s", err) } @@ -382,9 +383,9 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac return nil, nil, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(NewSplitRefListFromPackageList(list)) - err = localRepoCollection.Update(repo) + err = localRepoCollection.Update(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to save: %s", err) } diff --git a/deb/changes_test.go b/deb/changes_test.go index b7dc4d95d04764d15cb0bb2e2f4bccbf02921870..1d50d610a5b9d348bbdd692e73d422232ee6fab7 100644 --- a/deb/changes_test.go +++ b/deb/changes_test.go @@ -21,6 +21,7 @@ type ChangesSuite struct { db database.Storage localRepoCollection *LocalRepoCollection packageCollection *PackageCollection + reflistCollection *RefListCollection packagePool aptly.PackagePool checksumStorage aptly.ChecksumStorage progress aptly.Progress @@ -42,6 +43,7 @@ func (s *ChangesSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.localRepoCollection = NewLocalRepoCollection(s.db) s.packageCollection = NewPackageCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.checksumStorage = files.NewMockChecksumStorage() s.packagePool = files.NewPackagePool(s.Dir, false) @@ -88,7 +90,7 @@ func (s *ChangesSuite) TestCollectChangesFiles(c *C) { func (s *ChangesSuite) TestImportChangesFiles(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) origFailedFiles := []string{ "testdata/changes/calamares.changes", @@ -124,7 +126,8 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { processedFiles, failedFiles, err := ImportChangesFiles( append(changesFiles, "testdata/changes/notexistent.changes"), s.Reporter, true, true, false, false, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, DeepEquals, append(expectedFailedFiles, "testdata/changes/notexistent.changes")) @@ -133,7 +136,7 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) changesFiles, failedFiles := CollectChangesFiles( []string{"testdata/dbgsym-with-source-version"}, s.Reporter) @@ -142,7 +145,8 @@ func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { _, failedFiles, err := ImportChangesFiles( changesFiles, s.Reporter, true, true, false, true, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, IsNil) diff --git a/deb/collections.go b/deb/collections.go index 7dfe8523500861f55c42553cf1b61b746a64e834..ff711e2650ce367888e32553dca19a97acc0b3d8 100644 --- a/deb/collections.go +++ b/deb/collections.go @@ -16,6 +16,7 @@ type CollectionFactory struct { snapshots *SnapshotCollection localRepos *LocalRepoCollection publishedRepos *PublishedRepoCollection + reflists *RefListCollection checksums *ChecksumCollection } @@ -91,6 +92,17 @@ func (factory *CollectionFactory) PublishedRepoCollection() *PublishedRepoCollec return factory.publishedRepos } +func (factory *CollectionFactory) RefListCollection() *RefListCollection { + factory.Lock() + defer factory.Unlock() + + if factory.reflists == nil { + factory.reflists = NewRefListCollection(factory.db) + } + + return factory.reflists +} + // ChecksumCollection returns (or creates) new ChecksumCollection func (factory *CollectionFactory) ChecksumCollection(db database.ReaderWriter) aptly.ChecksumStorage { factory.Lock() diff --git a/deb/graph.go b/deb/graph.go index 16a7ce85435c8713c1fb40c0ae84ad843de476a4..77421c3a57e621a9d9dc892f03a93cc4df3fa675 100644 --- a/deb/graph.go +++ b/deb/graph.go @@ -33,7 +33,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz existingNodes := map[string]bool{} err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -55,7 +55,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -81,7 +81,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz }) err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/deb/list.go b/deb/list.go index ec14bb982cdb5ecc4fe3b88fd647c9f9f2ac91a9..e59390f6bee405ecedd42c0316600409d39078a4 100644 --- a/deb/list.go +++ b/deb/list.go @@ -90,7 +90,7 @@ func NewPackageListWithDuplicates(duplicates bool, capacity int) *PackageList { } // NewPackageListFromRefList loads packages list from PackageRefList -func NewPackageListFromRefList(reflist *PackageRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { +func NewPackageListFromRefList(reflist AnyRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { // empty reflist if reflist == nil { return NewPackageList(), nil diff --git a/deb/local.go b/deb/local.go index 2a15c734b2f761deed9124ff48efc388e1c9efff..acae46646f926045f75772ae0fee3e49890a4b37 100644 --- a/deb/local.go +++ b/deb/local.go @@ -26,7 +26,7 @@ type LocalRepo struct { // Uploaders configuration Uploaders *Uploaders `codec:"Uploaders,omitempty" json:"-"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewLocalRepo creates new instance of Debian local repository @@ -55,13 +55,13 @@ func (repo *LocalRepo) NumPackages() int { } // RefList returns package list for repo -func (repo *LocalRepo) RefList() *PackageRefList { +func (repo *LocalRepo) RefList() *SplitRefList { return repo.packageRefs } // UpdateRefList changes package list for local repo -func (repo *LocalRepo) UpdateRefList(reflist *PackageRefList) { - repo.packageRefs = reflist +func (repo *LocalRepo) UpdateRefList(sl *SplitRefList) { + repo.packageRefs = sl } // Encode does msgpack encoding of LocalRepo @@ -140,14 +140,14 @@ func (collection *LocalRepoCollection) search(filter func(*LocalRepo) bool, uniq } // Add appends new repo to collection and saves it -func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Add(repo *LocalRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("local repo with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -157,27 +157,25 @@ func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { } // Update stores updated information about repo in DB -func (collection *LocalRepoCollection) Update(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Update(repo *LocalRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + bc := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), bc) } return batch.Write() } // LoadComplete loads additional information for local repo -func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/local_test.go b/deb/local_test.go index c9072b7dc07449d14f78bc92baa298359a2b0554..b87b1b624a4163a897407d97e16d9f1ed5d1526d 100644 --- a/deb/local_test.go +++ b/deb/local_test.go @@ -12,7 +12,7 @@ import ( type LocalRepoSuite struct { db database.Storage list *PackageList - reflist *PackageRefList + reflist *SplitRefList repo *LocalRepo } @@ -24,7 +24,7 @@ func (s *LocalRepoSuite) SetUpTest(c *C) { s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) s.repo = NewLocalRepo("lrepo", "Super repo") s.repo.packageRefs = s.reflist @@ -75,10 +75,11 @@ func (s *LocalRepoSuite) TestRefKey(c *C) { } type LocalRepoCollectionSuite struct { - db database.Storage - collection *LocalRepoCollection - list *PackageList - reflist *PackageRefList + db database.Storage + collection *LocalRepoCollection + reflistCollection *RefListCollection + list *PackageList + reflist *SplitRefList } var _ = Suite(&LocalRepoCollectionSuite{}) @@ -86,12 +87,13 @@ var _ = Suite(&LocalRepoCollectionSuite{}) func (s *LocalRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewLocalRepoCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.list = NewPackageList() s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromRefList(NewPackageRefListFromPackageList(s.list)) } func (s *LocalRepoCollectionSuite) TearDownTest(c *C) { @@ -103,8 +105,8 @@ func (s *LocalRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("local1") c.Assert(err, IsNil) @@ -121,7 +123,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -135,7 +137,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection := NewLocalRepoCollection(s.db) r, err := collection.ByName("local1") @@ -143,20 +145,20 @@ func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection = NewLocalRepoCollection(s.db) r, err = collection.ByName("local1") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.reflistCollection), IsNil) c.Assert(r.NumPackages(), Equals, 2) } func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { repo := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo) + s.collection.Add(repo, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*LocalRepo) error { @@ -178,10 +180,10 @@ func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *LocalRepoCollectionSuite) TestDrop(c *C) { repo1 := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo1) + s.collection.Add(repo1, s.reflistCollection) repo2 := NewLocalRepo("local2", "Comment 2") - s.collection.Add(repo2) + s.collection.Add(repo2, s.reflistCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/publish.go b/deb/publish.go index 826f936a4f6701216217a4c43fa5abecf612e15a..c929c6ec87e9dbbe045be01d2e60cfba1470bc59 100644 --- a/deb/publish.go +++ b/deb/publish.go @@ -27,7 +27,7 @@ type repoSourceItem struct { // Pointer to local repo if SourceKind == "local" localRepo *LocalRepo // Package references is SourceKind == "local" - packageRefs *PackageRefList + packageRefs *SplitRefList } // PublishedRepo is a published for http/ftp representation of snapshot as Debian repository @@ -280,7 +280,7 @@ func NewPublishedRepo(storage, prefix, distribution string, architectures []stri return result, nil } -// MarshalJSON requires object to be "loaded completely" +// MarshalJSON requires object to filled by "LoadShallow" or "LoadComplete" func (p *PublishedRepo) MarshalJSON() ([]byte, error) { type sourceInfo struct { Component, Name string @@ -397,7 +397,7 @@ func (p *PublishedRepo) RefKey(component string) []byte { } // RefList returns list of package refs in local repo -func (p *PublishedRepo) RefList(component string) *PackageRefList { +func (p *PublishedRepo) RefList(component string) *SplitRefList { item := p.sourceItems[component] if p.SourceKind == SourceLocalRepo { return item.packageRefs @@ -944,14 +944,14 @@ func (collection *PublishedRepoCollection) loadList() { } // Add appends new repo to collection and saves it -func (collection *PublishedRepoCollection) Add(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Add(repo *PublishedRepo, reflistCollection *RefListCollection) error { collection.loadList() if collection.CheckDuplicate(repo) != nil { return fmt.Errorf("published repo with storage/prefix/distribution %s/%s/%s already exists", repo.Storage, repo.Prefix, repo.Distribution) } - err := collection.Update(repo) + err := collection.Update(repo, reflistCollection) if err != nil { return err } @@ -974,20 +974,24 @@ func (collection *PublishedRepoCollection) CheckDuplicate(repo *PublishedRepo) * } // Update stores updated information about repo in DB -func (collection *PublishedRepoCollection) Update(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Update(repo *PublishedRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.SourceKind == SourceLocalRepo { + rb := reflistCollection.NewBatch(batch) for component, item := range repo.sourceItems { - batch.Put(repo.RefKey(component), item.packageRefs.Encode()) + reflistCollection.UpdateInBatch(item.packageRefs, repo.RefKey(component), rb) } } return batch.Write() } -// LoadComplete loads additional information for remote repo -func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, collectionFactory *CollectionFactory) (err error) { +// LoadShallow loads basic information on the repo's sources +// +// This does not *fully* load in the sources themselves and their packages. +// It's useful if you just want to use JSON serialization without loading in unnecessary things. +func (collection *PublishedRepoCollection) LoadShallow(repo *PublishedRepo, collectionFactory *CollectionFactory) (err error) { repo.sourceItems = make(map[string]repoSourceItem) if repo.SourceKind == SourceSnapshot { @@ -998,10 +1002,6 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if err != nil { return } - err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot) - if err != nil { - return - } repo.sourceItems[component] = item } @@ -1013,31 +1013,46 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if err != nil { return } - err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo) + + item.packageRefs = NewSplitRefList() + repo.sourceItems[component] = item + } + } else { + panic("unknown SourceKind") + } + + return +} + +// LoadComplete loads complete information on the sources of the repo *and* their packages +func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, collectionFactory *CollectionFactory) (err error) { + collection.LoadShallow(repo, collectionFactory) + + if repo.SourceKind == SourceSnapshot { + for _, item := range repo.sourceItems { + err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot, collectionFactory.RefListCollection()) + if err != nil { + return + } + } + } else if repo.SourceKind == SourceLocalRepo { + for component, item := range repo.sourceItems { + err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo, collectionFactory.RefListCollection()) if err != nil { return } - var encoded []byte - encoded, err = collection.db.Get(repo.RefKey(component)) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey(component)) if err != nil { // < 0.6 saving w/o component name if err == database.ErrNotFound && len(repo.Sources) == 1 { - encoded, err = collection.db.Get(repo.RefKey("")) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey("")) } if err != nil { return } } - - item.packageRefs = &PackageRefList{} - err = item.packageRefs.Decode(encoded) - if err != nil { - return - } - - repo.sourceItems[component] = item } } else { panic("unknown SourceKind") @@ -1142,6 +1157,11 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix referencedFiles := map[string][]string{} processedComponentRefs := map[string]*PackageRefList{} + processedComponentBuckets := map[string]*RefListDigestSet{} + for _, component := range components { + processedComponentBuckets[component] = NewRefListDigestSet() + } + for _, r := range collection.list { if r.Prefix == prefix { matches := false @@ -1165,36 +1185,51 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix for _, component := range components { if utils.StrSliceHasItem(repoComponents, component) { - unseenRefs := r.RefList(component) - processedRefs := processedComponentRefs[component] - if processedRefs != nil { - unseenRefs = unseenRefs.Subtract(processedRefs) - } else { - processedRefs = NewPackageRefList() - } + processedBuckets := processedComponentBuckets[component] - if unseenRefs.Len() == 0 { - continue - } - processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) + err := r.RefList(component).ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + if processedBuckets.Has(digest) { + return nil + } + processedBuckets.Add(digest) + + unseenRefs := bucket + processedRefs := processedComponentRefs[component] + if processedRefs != nil { + unseenRefs = unseenRefs.Subtract(processedRefs) + } else { + processedRefs = NewPackageRefList() + } - packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) - if err != nil { - return nil, err - } + if unseenRefs.Len() == 0 { + return nil + } + processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) - packageList.ForEach(func(p *Package) error { - poolDir, err := p.PoolDirectory() + packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) if err != nil { return err } - for _, f := range p.Files() { - referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) - } + packageList.ForEach(func(p *Package) error { + poolDir, err := p.PoolDirectory() + if err != nil { + return err + } + + for _, f := range p.Files() { + referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) + } + + return nil + }) return nil }) + + if err != nil { + return nil, err + } } } } diff --git a/deb/publish_bench_test.go b/deb/publish_bench_test.go index b135a8b8cbe15ee06bd2f244c2d6df4ef9794fb0..f1d87e1ebe34da2b1de96354921edb905581c5f1 100644 --- a/deb/publish_bench_test.go +++ b/deb/publish_bench_test.go @@ -31,6 +31,7 @@ func BenchmarkListReferencedFiles(b *testing.B) { packageCollection := factory.PackageCollection() repoCollection := factory.LocalRepoCollection() publishCollection := factory.PublishedRepoCollection() + reflistCollection := factory.RefListCollection() sharedRefs := NewPackageRefList() { @@ -91,14 +92,14 @@ func BenchmarkListReferencedFiles(b *testing.B) { repo := NewLocalRepo(fmt.Sprintf("repo%d", repoIndex), "comment") repo.DefaultDistribution = fmt.Sprintf("dist%d", repoIndex) repo.DefaultComponent = defaultComponent - repo.UpdateRefList(refs.Merge(sharedRefs, false, true)) - repoCollection.Add(repo) + repo.UpdateRefList(NewSplitRefListFromRefList(refs.Merge(sharedRefs, false, true))) + repoCollection.Add(repo, reflistCollection) publish, err := NewPublishedRepo("", "test", "", nil, []string{defaultComponent}, []interface{}{repo}, factory) if err != nil { b.Fatal(err) } - publishCollection.Add(publish) + publishCollection.Add(publish, reflistCollection) } db.CompactDB() diff --git a/deb/publish_test.go b/deb/publish_test.go index 15e3be925d9d98c4da74c8d6bec721346b4ed706..8d0d7ae188bf6cb1ad250667ed6bf3f3f979b2f7 100644 --- a/deb/publish_test.go +++ b/deb/publish_test.go @@ -82,6 +82,7 @@ type PublishedRepoSuite struct { db database.Storage factory *CollectionFactory packageCollection *PackageCollection + reflistCollection *RefListCollection } var _ = Suite(&PublishedRepoSuite{}) @@ -113,21 +114,22 @@ func (s *PublishedRepoSuite) SetUpTest(c *C) { s.p2.UpdateFiles(s.p1.Files()) s.p3.UpdateFiles(s.p1.Files()) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) + s.reflistCollection = s.factory.RefListCollection() repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) repo.packageRefs = s.reflist - s.factory.RemoteRepoCollection().Add(repo) + s.factory.RemoteRepoCollection().Add(repo, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") s.localRepo.packageRefs = s.reflist - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.snapshot, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot) + s.factory.SnapshotCollection().Add(s.snapshot, s.reflistCollection) s.snapshot2, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot2) + s.factory.SnapshotCollection().Add(s.snapshot2, s.reflistCollection) s.packageCollection = s.factory.PackageCollection() s.packageCollection.Update(s.p1) @@ -284,7 +286,7 @@ func (s *PublishedRepoSuite) TestDistributionComponentGuessing(c *C) { s.localRepo.DefaultDistribution = "precise" s.localRepo.DefaultComponent = "contrib" - s.factory.LocalRepoCollection().Update(s.localRepo) + s.factory.LocalRepoCollection().Update(s.localRepo, s.reflistCollection) repo, err = NewPublishedRepo("", "ppa", "", nil, []string{""}, []interface{}{s.localRepo}, s.factory) c.Check(err, IsNil) @@ -442,6 +444,7 @@ type PublishedRepoCollectionSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection snap1, snap2 *Snapshot localRepo *LocalRepo @@ -457,22 +460,23 @@ func (s *PublishedRepoCollectionSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() snap1Refs := NewPackageRefList() snap1Refs.Refs = [][]byte{s.p1.Key(""), s.p2.Key("")} sort.Sort(snap1Refs) - s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, snap1Refs, "desc1") + s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, NewSplitRefListFromRefList(snap1Refs), "desc1") snap2Refs := NewPackageRefList() snap2Refs.Refs = [][]byte{s.p3.Key("")} sort.Sort(snap2Refs) - s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, snap2Refs, "desc2") + s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, NewSplitRefListFromRefList(snap2Refs), "desc2") - s.snapshotCollection.Add(s.snap1) - s.snapshotCollection.Add(s.snap2) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) + s.snapshotCollection.Add(s.snap2, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main", "contrib"}, []interface{}{s.snap2, s.snap1}, s.factory) @@ -491,14 +495,14 @@ func (s *PublishedRepoCollectionSuite) TestAddByStoragePrefixDistribution(c *C) _, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) - c.Assert(s.collection.Add(s.repo1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo3), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo3, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo3), Equals, s.repo1) - c.Assert(s.collection.Add(s.repo4), IsNil) - c.Assert(s.collection.Add(s.repo5), IsNil) + c.Assert(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo5, s.reflistCollection), IsNil) r, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, IsNil) @@ -524,7 +528,7 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { _, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, IsNil) @@ -535,8 +539,8 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { } func (s *PublishedRepoCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.repo1), IsNil) - c.Assert(s.collection.Update(s.repo4), IsNil) + c.Assert(s.collection.Update(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Update(s.repo4, s.reflistCollection), IsNil) collection := NewPublishedRepoCollection(s.db) r, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -584,7 +588,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { encoder.Encode(&old) c.Assert(s.db.Put(s.repo1.Key(), buf.Bytes()), IsNil) - c.Assert(s.db.Put(s.repo1.RefKey(""), s.localRepo.RefList().Encode()), IsNil) + c.Assert(s.db.Put(s.repo1.RefKey(""), NewPackageRefList().Encode()), IsNil) collection := NewPublishedRepoCollection(s.db) repo, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -599,7 +603,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { } func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.repo1) + s.collection.Add(s.repo1, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*PublishedRepo) error { @@ -620,17 +624,17 @@ func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { } func (s *PublishedRepoCollectionSuite) TestBySnapshot(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) c.Check(s.collection.BySnapshot(s.snap1), DeepEquals, []*PublishedRepo{s.repo1, s.repo2}) c.Check(s.collection.BySnapshot(s.snap2), DeepEquals, []*PublishedRepo{s.repo2}) } func (s *PublishedRepoCollectionSuite) TestByLocalRepo(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepo(s.localRepo), DeepEquals, []*PublishedRepo{s.repo4, s.repo5}) } @@ -640,10 +644,10 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { c.Check(s.factory.PackageCollection().Update(s.p2), IsNil) c.Check(s.factory.PackageCollection().Update(s.p3), IsNil) - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) files, err := s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -656,12 +660,12 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { }) snap3 := NewSnapshotFromRefList("snap3", []*Snapshot{}, s.snap2.RefList(), "desc3") - s.snapshotCollection.Add(snap3) + s.snapshotCollection.Add(snap3, s.reflistCollection) // Ensure that adding a second publish point with matching files doesn't give duplicate results. repo3, err := NewPublishedRepo("", "", "anaconda-2", []string{}, []string{"main"}, []interface{}{snap3}, s.factory) c.Check(err, IsNil) - c.Check(s.collection.Add(repo3), IsNil) + c.Check(s.collection.Add(repo3, s.reflistCollection), IsNil) files, err = s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -679,6 +683,7 @@ type PublishedRepoRemoveSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection root, root2 string provider *FakeStorageProvider @@ -694,10 +699,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() s.snap1 = NewSnapshotFromPackageList("snap1", []*Snapshot{}, NewPackageList(), "desc1") - s.snapshotCollection.Add(s.snap1) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) @@ -706,11 +712,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.repo5, _ = NewPublishedRepo("files:other", "ppa", "osminog", []string{}, []string{"contrib"}, []interface{}{s.snap1}, s.factory) s.collection = s.factory.PublishedRepoCollection() - s.collection.Add(s.repo1) - s.collection.Add(s.repo2) - s.collection.Add(s.repo3) - s.collection.Add(s.repo4) - s.collection.Add(s.repo5) + s.collection.Add(s.repo1, s.reflistCollection) + s.collection.Add(s.repo2, s.reflistCollection) + s.collection.Add(s.repo3, s.reflistCollection) + s.collection.Add(s.repo4, s.reflistCollection) + s.collection.Add(s.repo5, s.reflistCollection) s.root = c.MkDir() s.publishedStorage = files.NewPublishedStorage(s.root, "", "") diff --git a/deb/reflist.go b/deb/reflist.go index 25cb0b6e1d82a0ec9c9ea463b63112a09b39cffe..e039de12742faef310ee868489bb04323450ca64 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -2,10 +2,15 @@ package deb import ( "bytes" + "crypto/sha256" + "encoding/base64" "encoding/json" + "fmt" "sort" "github.com/AlekSi/pointer" + "github.com/aptly-dev/aptly/database" + "github.com/cespare/xxhash/v2" "github.com/ugorji/go/codec" ) @@ -44,6 +49,13 @@ func NewPackageRefListFromPackageList(list *PackageList) *PackageRefList { return reflist } +func (l *PackageRefList) Clone() *PackageRefList { + clone := &PackageRefList{} + clone.Refs = make([][]byte, l.Len()) + copy(clone.Refs, l.Refs) + return clone +} + // Len returns number of refs func (l *PackageRefList) Len() int { return len(l.Refs) @@ -184,8 +196,12 @@ func (d PackageDiff) MarshalJSON() ([]byte, error) { type PackageDiffs []PackageDiff // Diff calculates difference between two reflists -func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection) (result PackageDiffs, err error) { - result = make(PackageDiffs, 0, 128) +func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } // pointer to left and right reflists il, ir := 0, 0 @@ -196,31 +212,21 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle // until we reached end of both lists for il < ll || ir < lr { - // if we've exhausted left list, pull the rest from the right - if il == ll { - pr, err = packageCollection.ByKey(r.Refs[ir]) - if err != nil { - return nil, err - } - result = append(result, PackageDiff{Left: nil, Right: pr}) - ir++ - continue + var rl, rr []byte + if il < ll { + rl = l.Refs[il] } - // if we've exhausted right list, pull the rest from the left - if ir == lr { - pl, err = packageCollection.ByKey(l.Refs[il]) - if err != nil { - return nil, err - } - result = append(result, PackageDiff{Left: pl, Right: nil}) - il++ - continue + if ir < lr { + rr = r.Refs[ir] } - // refs on both sides are present, load them - rl, rr := l.Refs[il], r.Refs[ir] // compare refs rel := bytes.Compare(rl, rr) + // an unset ref is less than all others, but since it represents the end + // of a reflist, it should be *greater*, so flip the comparison result + if rl == nil || rr == nil { + rel = -rel + } if rel == 0 { // refs are identical, so are packages, advance pointer @@ -229,14 +235,14 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle pl, pr = nil, nil } else { // load pl & pr if they haven't been loaded before - if pl == nil { + if pl == nil && rl != nil { pl, err = packageCollection.ByKey(rl) if err != nil { return nil, err } } - if pr == nil { + if pr == nil && rr != nil { pr, err = packageCollection.ByKey(rr) if err != nil { return nil, err @@ -268,7 +274,7 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle } } - return + return result, nil } // Merge merges reflist r into current reflist. If overrideMatching, merge @@ -401,3 +407,753 @@ func (l *PackageRefList) FilterLatestRefs() { lastArch, lastName, lastVer = arch, name, ver } } + +const ( + reflistBucketCount = 1 << 6 + reflistBucketMask = reflistBucketCount - 1 +) + +type reflistDigestArray [sha256.Size]byte + +func bucketRefPrefix(ref []byte) []byte { + const maxPrefixLen = 3 + + // Cut out the arch, leaving behind the package name and subsequent info. + _, ref, _ = bytes.Cut(ref, []byte{' '}) + + // Strip off the lib prefix, so that "libxyz" and "xyz", which are likely + // to be updated together, go in the same bucket. + libPrefix := []byte("lib") + if bytes.HasPrefix(ref, libPrefix) { + ref = ref[len(libPrefix):] + } + + prefixLen := min(maxPrefixLen, len(ref)) + prefix, _, _ := bytes.Cut(ref[:prefixLen], []byte{' '}) + return prefix +} + +func bucketIdxForRef(ref []byte) int { + return int(xxhash.Sum64(bucketRefPrefix(ref))) & reflistBucketMask +} + +// SplitRefList is a list of package refs, similar to a PackageRefList. However, +// instead of storing a linear array of refs, SplitRefList splits the refs into +// PackageRefList "buckets", based on a hash of the package name inside the ref. +// Each bucket has a digest of its contents that serves as its key in the database. +// +// When serialized, a SplitRefList just becomes an array of bucket digests, and +// the buckets themselves are stored separately. Because the buckets are then +// referenced by their digests, multiple independent reflists can share buckets, +// if their buckets have matching digests. +// +// Buckets themselves may not be confirmed to a single database value; instead, +// they're split into "segments", based on the database's preferred maximum +// value size. This prevents large buckets from slowing down the database. +type SplitRefList struct { + Buckets [][]byte + + bucketRefs []*PackageRefList +} + +// NewSplitRefList creates empty SplitRefList +func NewSplitRefList() *SplitRefList { + sl := &SplitRefList{} + sl.reset() + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageRefList +func NewSplitRefListFromRefList(reflist *PackageRefList) *SplitRefList { + sl := NewSplitRefList() + sl.Replace(reflist) + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageList +func NewSplitRefListFromPackageList(list *PackageList) *SplitRefList { + return NewSplitRefListFromRefList(NewPackageRefListFromPackageList(list)) +} + +func (sl *SplitRefList) reset() { + sl.Buckets = make([][]byte, reflistBucketCount) + sl.bucketRefs = make([]*PackageRefList, reflistBucketCount) +} + +// Has checks whether package is part of reflist +func (sl *SplitRefList) Has(p *Package) bool { + idx := bucketIdxForRef(p.Key("")) + if bucket := sl.bucketRefs[idx]; bucket != nil { + return bucket.Has(p) + } + return false +} + +// Len returns number of refs +func (sl *SplitRefList) Len() int { + total := 0 + for _, bucket := range sl.bucketRefs { + if bucket != nil { + total += bucket.Len() + } + } + return total +} + +func reflistDigest(l *PackageRefList) []byte { + // Different algorithms on PackageRefLists will sometimes return a nil slice + // of refs and other times return an empty slice. Regardless, they should + // both be treated identically and be given an empty digest. + if len(l.Refs) == 0 { + return nil + } + + h := sha256.New() + for _, ref := range l.Refs { + h.Write(ref) + h.Write([]byte{0}) + } + return h.Sum(nil) +} + +// Removes all the refs inside and replaces them with those in the given reflist +func (sl *SplitRefList) Replace(reflist *PackageRefList) { + sl.reset() + + for _, ref := range reflist.Refs { + idx := bucketIdxForRef(ref) + + bucket := sl.bucketRefs[idx] + if bucket == nil { + bucket = NewPackageRefList() + sl.bucketRefs[idx] = bucket + } + + bucket.Refs = append(bucket.Refs, ref) + } + + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + sort.Sort(bucket) + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Merge merges reflist r into current reflist (see PackageRefList.Merge) +func (sl *SplitRefList) Merge(r *SplitRefList, overrideMatching, ignoreConflicting bool) (result *SplitRefList) { + result = NewSplitRefList() + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket == nil && rbucket == nil { + continue + } + + if lbucket == nil { + lbucket = &empty + } else if rbucket == nil { + rbucket = &empty + } + + result.bucketRefs[idx] = lbucket.Merge(rbucket, overrideMatching, ignoreConflicting) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } + + return +} + +// Subtract returns all packages in l that are not in r +func (sl *SplitRefList) Subtract(r *SplitRefList) (result *SplitRefList) { + result = NewSplitRefList() + + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result.bucketRefs[idx] = lbucket.Subtract(rbucket) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } else { + result.bucketRefs[idx] = lbucket.Clone() + result.Buckets[idx] = sl.Buckets[idx] + } + } + } + + return +} + +// Diff calculates difference between two reflists +func (sl *SplitRefList) Diff(r *SplitRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result, err = lbucket.Diff(rbucket, packageCollection, result) + } else { + result, err = lbucket.Diff(&empty, packageCollection, result) + } + } else if rbucket != nil { + result, err = empty.Diff(rbucket, packageCollection, result) + } + + if err != nil { + return nil, err + } + } + + sort.Slice(result, func(i, j int) bool { + var ri, rj []byte + if result[i].Left != nil { + ri = result[i].Left.Key("") + } else { + ri = result[i].Right.Key("") + } + if result[j].Left != nil { + rj = result[j].Left.Key("") + } else { + rj = result[j].Right.Key("") + } + + return bytes.Compare(ri, rj) < 0 + }) + + return result, nil +} + +// FilterLatestRefs reduces a reflist to the latest of each package (see PackageRefList.FilterLatestRefs) +func (sl *SplitRefList) FilterLatestRefs() { + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + bucket.FilterLatestRefs() + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Flatten creates a flat PackageRefList containing all the refs in this reflist +func (sl *SplitRefList) Flatten() *PackageRefList { + reflist := NewPackageRefList() + sl.ForEach(func(ref []byte) error { + reflist.Refs = append(reflist.Refs, ref) + return nil + }) + sort.Sort(reflist) + return reflist +} + +// ForEachBucket calls handler for each bucket in list +func (sl *SplitRefList) ForEachBucket(handler func(digest []byte, bucket *PackageRefList) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := handler(digest, bucket); err != nil { + return err + } + } + } + + return nil +} + +// ForEach calls handler for each package ref in list +// +// IMPORTANT: unlike PackageRefList.ForEach, the order of handler invocations +// is *not* guaranteed to be sorted. +func (sl *SplitRefList) ForEach(handler func([]byte) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := bucket.ForEach(handler); err != nil { + return err + } + } + } + + return nil +} + +// RefListDigestSet is a set of SplitRefList bucket digests +type RefListDigestSet struct { + items map[reflistDigestArray]struct{} +} + +// NewRefListDigestSet creates empty RefListDigestSet +func NewRefListDigestSet() *RefListDigestSet { + return &RefListDigestSet{items: map[reflistDigestArray]struct{}{}} +} + +// Len returns number of digests in the set +func (set *RefListDigestSet) Len() int { + return len(set.items) +} + +// ForEach calls handler for each digest in the set +func (set *RefListDigestSet) ForEach(handler func(digest []byte) error) error { + for digest := range set.items { + if err := handler(digest[:]); err != nil { + return err + } + } + + return nil +} + +// Add adds digest to set, doing nothing if the digest was already present +func (set *RefListDigestSet) Add(digest []byte) { + set.items[reflistDigestArray(digest)] = struct{}{} +} + +// AddAllInRefList adds all the bucket digests in a SplitRefList to the set +func (set *RefListDigestSet) AddAllInRefList(sl *SplitRefList) { + for _, digest := range sl.Buckets { + if len(digest) > 0 { + set.Add(digest) + } + } +} + +// Has checks whether a digest is part of set +func (set *RefListDigestSet) Has(digest []byte) bool { + _, ok := set.items[reflistDigestArray(digest)] + return ok +} + +// Remove removes a digest from set +func (set *RefListDigestSet) Remove(digest []byte) { + delete(set.items, reflistDigestArray(digest)) +} + +// RemoveAll removes all the digests in other from the current set +func (set *RefListDigestSet) RemoveAll(other *RefListDigestSet) { + for digest := range other.items { + delete(set.items, digest) + } +} + +// RefListCollection does listing, updating/adding/deleting of SplitRefLists +type RefListCollection struct { + db database.Storage + + cache map[reflistDigestArray]*PackageRefList +} + +// NewRefListCollection creates a RefListCollection +func NewRefListCollection(db database.Storage) *RefListCollection { + return &RefListCollection{db: db, cache: make(map[reflistDigestArray]*PackageRefList)} +} + +type reflistStorageFormat int + +const ( + // (legacy format) all the refs are stored inline in a single value + reflistStorageFormatInline reflistStorageFormat = iota + // the refs are split into buckets that are stored externally from the value + reflistStorageFormatSplit +) + +// NoPadding is used because all digests are the same length, so the padding +// is useless and only serves to muddy the output. +var bucketDigestEncoding = base64.StdEncoding.WithPadding(base64.NoPadding) + +func segmentPrefix(encodedDigest string) []byte { + return []byte(fmt.Sprintf("F%s-", encodedDigest)) +} + +func segmentIndexKey(prefix []byte, idx int) []byte { + // Assume most buckets won't have more than 0xFFFF = ~65k segments (which + // would be an extremely large bucket!). + return append(bytes.Clone(prefix), []byte(fmt.Sprintf("%04x", idx))...) +} + +// AllBucketDigests returns a set of all the bucket digests in the database +func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, error) { + digests := NewRefListDigestSet() + + err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, value []byte) error { + if !bytes.HasSuffix(key, []byte("-0000")) { + // Ignore additional segments for the same digest. + return nil + } + + encodedDigest, _, foundDash := bytes.Cut(key[1:], []byte("-")) + if !foundDash { + return fmt.Errorf("invalid key: %s", string(key)) + } + digest := make([]byte, bucketDigestEncoding.DecodedLen(len(encodedDigest))) + if _, err := bucketDigestEncoding.Decode(digest, encodedDigest); err != nil { + return fmt.Errorf("decoding key %s: %w", string(key), err) + } + + digests.Add(digest) + return nil + }) + + if err != nil { + return nil, err + } + return digests, nil +} + +// UnsafeDropBucket drops the bucket associated with digest from the database, +// doing so inside batch +// +// This is considered "unsafe" because no checks are performed to ensure that +// the bucket is no longer referenced by any saved reflists. +func (collection *RefListCollection) UnsafeDropBucket(digest []byte, batch database.Batch) error { + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + return collection.db.ProcessByPrefix(prefix, func(key []byte, value []byte) error { + return batch.Delete(key) + }) +} + +func (collection *RefListCollection) load(sl *SplitRefList, key []byte) (reflistStorageFormat, error) { + sl.reset() + + data, err := collection.db.Get(key) + if err != nil { + return 0, err + } + + var splitOrInlineRefList struct { + *SplitRefList + *PackageRefList + } + handle := &codec.MsgpackHandle{} + handle.ZeroCopy = true + decoder := codec.NewDecoderBytes(data, handle) + if err := decoder.Decode(&splitOrInlineRefList); err != nil { + return 0, err + } + + if splitOrInlineRefList.SplitRefList != nil { + sl.Buckets = splitOrInlineRefList.Buckets + } else if splitOrInlineRefList.PackageRefList != nil { + sl.Replace(splitOrInlineRefList.PackageRefList) + return reflistStorageFormatInline, nil + } + + return reflistStorageFormatSplit, nil +} + +func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { + for idx := range sl.Buckets { + if sl.bucketRefs[idx] != nil { + continue + } + + var bucket *PackageRefList + + if digest := sl.Buckets[idx]; len(digest) > 0 { + cacheKey := reflistDigestArray(digest) + bucket = collection.cache[cacheKey] + if bucket == nil { + bucket = NewPackageRefList() + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + err := collection.db.ProcessByPrefix(prefix, func(digest []byte, value []byte) error { + var l PackageRefList + if err := l.Decode(append([]byte{}, value...)); err != nil { + return err + } + + bucket.Refs = append(bucket.Refs, l.Refs...) + return nil + }) + + if err != nil { + return err + } + + // The segments may not have been iterated in order, so make sure to re-sort + // here. + sort.Sort(bucket) + collection.cache[cacheKey] = bucket + } + + actualDigest := reflistDigest(bucket) + if !bytes.Equal(actualDigest, digest) { + return fmt.Errorf("corrupt reflist bucket %d: expected digest %s, got %s", + idx, + bucketDigestEncoding.EncodeToString(digest), + bucketDigestEncoding.EncodeToString(actualDigest)) + } + } + + sl.bucketRefs[idx] = bucket + } + + return nil +} + +// LoadComplete loads the reflist stored at the given key, as well as all the +// buckets referenced by a split reflist +func (collection *RefListCollection) LoadComplete(sl *SplitRefList, key []byte) error { + if _, err := collection.load(sl, key); err != nil { + return err + } + + return collection.loadBuckets(sl) +} + +// RefListBatch is a wrapper over a database.Batch that tracks already-written +// reflists to avoid writing them multiple times +// +// It is *not* safe to use the same underlying database.Batch that has already +// been given to UnsafeDropBucket. +type RefListBatch struct { + batch database.Batch + + alreadyWritten *RefListDigestSet +} + +// NewBatch creates a new RefListBatch wrapping the given database.Batch +func (collection *RefListCollection) NewBatch(batch database.Batch) *RefListBatch { + return &RefListBatch{ + batch: batch, + alreadyWritten: NewRefListDigestSet(), + } +} + +type reflistUpdateContext struct { + rb *RefListBatch + stats *RefListMigrationStats +} + +func clearSegmentRefs(reflist *PackageRefList, recommendedMaxKVSize int) { + avgRefsInSegment := recommendedMaxKVSize / 70 + reflist.Refs = make([][]byte, 0, avgRefsInSegment) +} + +func flushSegmentRefs(uctx *reflistUpdateContext, prefix []byte, segment int, reflist *PackageRefList) error { + encoded := reflist.Encode() + err := uctx.rb.batch.Put(segmentIndexKey(prefix, segment), encoded) + if err == nil && uctx.stats != nil { + uctx.stats.Segments++ + } + return err +} + +func (collection *RefListCollection) updateWithContext(sl *SplitRefList, key []byte, uctx *reflistUpdateContext) error { + if sl != nil { + recommendedMaxKVSize := collection.db.GetRecommendedMaxKVSize() + + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + if uctx.rb.alreadyWritten.Has(digest) { + continue + } + + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + if collection.db.HasPrefix(prefix) { + continue + } + + // All the sizing information taken from the msgpack spec: + // https://github.com/msgpack/msgpack/blob/master/spec.md + + // Assume that a segment will have [16,2^16) elements, which would + // fit into an array 16 and thus have 3 bytes of overhead. + // (A database would need a massive recommendedMaxKVSize to pass + // that limit.) + size := len(segmentIndexKey(prefix, 0)) + 3 + segment := 0 + + var reflist PackageRefList + clearSegmentRefs(&reflist, recommendedMaxKVSize) + for _, ref := range sl.bucketRefs[idx].Refs { + // In order to determine the size of the ref in the database, + // we need to know how much overhead will be added with by msgpack + // encoding. + requiredSize := len(ref) + if requiredSize < 1<<5 { + requiredSize++ + } else if requiredSize < 1<<8 { + requiredSize += 2 + } else if requiredSize < 1<<16 { + requiredSize += 3 + } else { + requiredSize += 4 + } + if size+requiredSize > recommendedMaxKVSize { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + clearSegmentRefs(&reflist, recommendedMaxKVSize) + segment++ + } + + reflist.Refs = append(reflist.Refs, ref) + size += requiredSize + } + + if len(reflist.Refs) > 0 { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + } + + uctx.rb.alreadyWritten.Add(digest) + if uctx.stats != nil { + uctx.stats.Buckets++ + } + } + } + + var buf bytes.Buffer + encoder := codec.NewEncoder(&buf, &codec.MsgpackHandle{}) + encoder.Encode(sl) + err := uctx.rb.batch.Put(key, buf.Bytes()) + if err == nil && uctx.stats != nil { + uctx.stats.Reflists++ + } + return err +} + +// UpdateInBatch will save or update the SplitRefList at key, as well as save the buckets inside, +// as part of the given batch +func (collection *RefListCollection) UpdateInBatch(sl *SplitRefList, key []byte, batch *RefListBatch) error { + return collection.updateWithContext(sl, key, &reflistUpdateContext{rb: batch}) +} + +// Update will save or update the SplitRefList at key, as well as save the buckets inside +func (collection *RefListCollection) Update(sl *SplitRefList, key []byte) error { + rb := collection.NewBatch(collection.db.CreateBatch()) + err := collection.UpdateInBatch(sl, key, rb) + if err == nil { + err = rb.batch.Write() + } + return err +} + +// RefListMigrationStats counts a number of reflists, buckets, and segments +type RefListMigrationStats struct { + Reflists, Buckets, Segments int +} + +// RefListMigration wraps a RefListBatch for the purpose of migrating inline format +// reflists to split reflists +// +// Once the batch gets too large, it will automatically be flushed to the database, +// and a new batch will be created in its place. +type RefListMigration struct { + rb *RefListBatch + + dryRun bool + + // current number of reflists/buckets/segments queued in the current, unwritten batch + batchStats RefListMigrationStats + flushStats RefListMigrationStats +} + +// NewMigration creates an empty RefListMigration +func (collection *RefListCollection) NewMigration() *RefListMigration { + return &RefListMigration{} +} + +// NewMigrationDryRun creates an empty RefListMigration that will track the +// changes to make as usual but avoid actually writing to the db +func (collection *RefListCollection) NewMigrationDryRun() *RefListMigration { + return &RefListMigration{dryRun: true} +} + +// Stats returns statistics on the written values in the current migration +func (migration *RefListMigration) Stats() RefListMigrationStats { + return migration.flushStats +} + +// Flush will flush the current batch in the migration to the database +func (migration *RefListMigration) Flush() error { + if migration.batchStats.Segments > 0 { + if !migration.dryRun { + if err := migration.rb.batch.Write(); err != nil { + return err + } + + // It's important that we don't clear the batch on dry runs, because + // the batch is what contains the list of already-written buckets. + // If we're not writing to the database, and we clear that list, + // duplicate "writes" will occur. + migration.rb = nil + } + + migration.flushStats.Reflists += migration.batchStats.Reflists + migration.flushStats.Buckets += migration.batchStats.Buckets + migration.flushStats.Segments += migration.batchStats.Segments + migration.batchStats = RefListMigrationStats{} + } + + return nil +} + +// LoadCompleteAndMigrate will load the reflist and its buckets as RefListCollection.LoadComplete, +// migrating any inline reflists to split ones along the way +func (collection *RefListCollection) LoadCompleteAndMigrate(sl *SplitRefList, key []byte, migration *RefListMigration) error { + // Given enough reflists, the memory used by a batch starts to become massive, so + // make sure to flush the written segments periodically. Note that this is only + // checked *after* a migration of a full bucket (and all the segments inside) + // takes place, as splitting a single bucket write into multiple batches would + // be unsafe if an interruption occurs midway. + const maxMigratorBatch = 50000 + + format, err := collection.load(sl, key) + if err != nil { + return err + } + + switch format { + case reflistStorageFormatInline: + if migration.rb == nil { + migration.rb = collection.NewBatch(collection.db.CreateBatch()) + } + + collection.updateWithContext(sl, key, &reflistUpdateContext{ + rb: migration.rb, + stats: &migration.batchStats, + }) + + if migration.batchStats.Segments > maxMigratorBatch { + if err := migration.Flush(); err != nil { + return err + } + } + + return nil + case reflistStorageFormatSplit: + return collection.loadBuckets(sl) + default: + panic(fmt.Sprintf("unexpected format %v", format)) + } +} + +// AnyRefList is implemented by both PackageRefList and SplitRefList +type AnyRefList interface { + Has(p *Package) bool + Len() int + ForEach(handler func([]byte) error) error + FilterLatestRefs() +} + +// Check interface +var ( + _ AnyRefList = (*PackageRefList)(nil) + _ AnyRefList = (*SplitRefList)(nil) +) diff --git a/deb/reflist_bench_test.go b/deb/reflist_bench_test.go index b377574ce437ad7984e164292d00c736233b51e3..f81a84d0ae78f315a42e0a23df6119d2f3060a91 100644 --- a/deb/reflist_bench_test.go +++ b/deb/reflist_bench_test.go @@ -45,3 +45,41 @@ func BenchmarkReflistDecode(b *testing.B) { (&PackageRefList{}).Decode(data) } } + +func BenchmarkSplitRefListCreationSmall(b *testing.B) { + const count = 400 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} + +func BenchmarkSplitRefListCreationLarge(b *testing.B) { + const count = 4096 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} diff --git a/deb/reflist_test.go b/deb/reflist_test.go index d0ce21f5ef469f64b8c9adac4478c9929a909ab3..bfc6411312f92acffb25da78c25cbf2ec8c7cb80 100644 --- a/deb/reflist_test.go +++ b/deb/reflist_test.go @@ -1,7 +1,10 @@ package deb import ( + "bytes" + "encoding/hex" "errors" + "fmt" "github.com/aptly-dev/aptly/database/goleveldb" @@ -9,24 +12,83 @@ import ( ) type PackageRefListSuite struct { - // Simple list with "real" packages from stanzas - list *PackageList p1, p2, p3, p4, p5, p6 *Package } var _ = Suite(&PackageRefListSuite{}) -func toStrSlice(reflist *PackageRefList) (result []string) { +func verifyRefListIntegrity(c *C, rl AnyRefList) AnyRefList { + if rl, ok := rl.(*SplitRefList); ok { + for idx, bucket := range rl.bucketRefs { + if bucket == nil { + bucket = NewPackageRefList() + } + c.Check(rl.Buckets[idx], DeepEquals, reflistDigest(bucket)) + } + } + + return rl +} + +func getRefs(rl AnyRefList) (refs [][]byte) { + switch rl := rl.(type) { + case *PackageRefList: + refs = rl.Refs + case *SplitRefList: + refs = rl.Flatten().Refs + default: + panic(fmt.Sprintf("unexpected reflist type %t", rl)) + } + + // Hack so that passing getRefs-returned slices to DeepEquals won't fail given a nil + // slice and an empty slice. + if len(refs) == 0 { + refs = nil + } + return +} + +func toStrSlice(reflist AnyRefList) (result []string) { result = make([]string, reflist.Len()) - for i, r := range reflist.Refs { + for i, r := range getRefs(reflist) { result[i] = string(r) } return } -func (s *PackageRefListSuite) SetUpTest(c *C) { - s.list = NewPackageList() +type reflistFactory struct { + new func() AnyRefList + newFromRefs func(refs ...[]byte) AnyRefList + newFromPackageList func(list *PackageList) AnyRefList +} + +func forEachRefList(test func(f reflistFactory)) { + test(reflistFactory{ + new: func() AnyRefList { + return NewPackageRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return &PackageRefList{Refs: refs} + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewPackageRefListFromPackageList(list) + }, + }) + test(reflistFactory{ + new: func() AnyRefList { + return NewSplitRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return NewSplitRefListFromRefList(&PackageRefList{Refs: refs}) + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewSplitRefListFromPackageList(list) + }, + }) +} + +func (s *PackageRefListSuite) SetUpTest(c *C) { s.p1 = NewPackageFromControlFile(packageStanza.Copy()) s.p2 = NewPackageFromControlFile(packageStanza.Copy()) stanza := packageStanza.Copy() @@ -44,311 +106,600 @@ func (s *PackageRefListSuite) SetUpTest(c *C) { } func (s *PackageRefListSuite) TestNewPackageListFromRefList(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - coll.Update(s.p1) - coll.Update(s.p3) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + coll.Update(s.p1) + coll.Update(s.p3) - reflist := NewPackageRefListFromPackageList(s.list) + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - _, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, ErrorMatches, "unable to load package with key.*") + reflist := f.newFromPackageList(list) - coll.Update(s.p5) - coll.Update(s.p6) + _, err := NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, ErrorMatches, "unable to load package with key.*") - list, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 4) - c.Check(list.Add(s.p4), ErrorMatches, "conflict in package.*") + coll.Update(s.p5) + coll.Update(s.p6) - list, err = NewPackageListFromRefList(nil, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 0) + list, err = NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 4) + c.Check(list.Add(s.p4), ErrorMatches, "conflict in package.*") + + list, err = NewPackageListFromRefList(nil, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 0) + }) } func (s *PackageRefListSuite) TestNewPackageRefList(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) - - reflist := NewPackageRefListFromPackageList(s.list) - c.Assert(reflist.Len(), Equals, 4) - c.Check(reflist.Refs[0], DeepEquals, []byte(s.p1.Key(""))) - c.Check(reflist.Refs[1], DeepEquals, []byte(s.p6.Key(""))) - c.Check(reflist.Refs[2], DeepEquals, []byte(s.p5.Key(""))) - c.Check(reflist.Refs[3], DeepEquals, []byte(s.p3.Key(""))) - - reflist = NewPackageRefList() - c.Check(reflist.Len(), Equals, 0) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) + + reflist := f.newFromPackageList(list) + verifyRefListIntegrity(c, reflist) + c.Assert(reflist.Len(), Equals, 4) + refs := getRefs(reflist) + c.Check(refs[0], DeepEquals, []byte(s.p1.Key(""))) + c.Check(refs[1], DeepEquals, []byte(s.p6.Key(""))) + c.Check(refs[2], DeepEquals, []byte(s.p5.Key(""))) + c.Check(refs[3], DeepEquals, []byte(s.p3.Key(""))) + + reflist = f.new() + c.Check(reflist.Len(), Equals, 0) + }) } -func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) +func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - reflist := NewPackageRefListFromPackageList(s.list) + reflist := f.newFromPackageList(list) - reflist2 := &PackageRefList{} - err := reflist2.Decode(reflist.Encode()) - c.Assert(err, IsNil) - c.Check(reflist2.Len(), Equals, reflist.Len()) - c.Check(reflist2.Refs, DeepEquals, reflist.Refs) -} + Len := 0 + err := reflist.ForEach(func([]byte) error { + Len++ + return nil + }) -func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + c.Check(Len, Equals, 4) + c.Check(err, IsNil) - reflist := NewPackageRefListFromPackageList(s.list) + e := errors.New("b") - Len := 0 - err := reflist.ForEach(func([]byte) error { - Len++ - return nil + err = reflist.ForEach(func([]byte) error { + return e + }) + + c.Check(err, Equals, e) }) +} - c.Check(Len, Equals, 4) - c.Check(err, IsNil) +func (s *PackageRefListSuite) TestHas(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + reflist := f.newFromPackageList(list) + + c.Check(reflist.Has(s.p1), Equals, true) + c.Check(reflist.Has(s.p3), Equals, true) + c.Check(reflist.Has(s.p5), Equals, true) + c.Check(reflist.Has(s.p2), Equals, true) + c.Check(reflist.Has(s.p6), Equals, false) + }) +} - e := errors.New("b") +func subtractRefLists(l, r AnyRefList) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Subtract(r.(*PackageRefList)) + case *SplitRefList: + return l.Subtract(r.(*SplitRefList)) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } +} - err = reflist.ForEach(func([]byte) error { - return e +func (s *PackageRefListSuite) TestSubtract(c *C) { + forEachRefList(func(f reflistFactory) { + r1 := []byte("Pall r1") + r2 := []byte("Pall r2") + r3 := []byte("Pall r3") + r4 := []byte("Pall r4") + r5 := []byte("Pall r5") + + empty := f.newFromRefs() + l1 := f.newFromRefs(r1, r2, r3, r4) + l2 := f.newFromRefs(r1, r3) + l3 := f.newFromRefs(r2, r4) + l4 := f.newFromRefs(r4, r5) + l5 := f.newFromRefs(r1, r2, r3) + + c.Check(getRefs(subtractRefLists(l1, empty)), DeepEquals, getRefs(l1)) + c.Check(getRefs(subtractRefLists(l1, l2)), DeepEquals, getRefs(l3)) + c.Check(getRefs(subtractRefLists(l1, l3)), DeepEquals, getRefs(l2)) + c.Check(getRefs(subtractRefLists(l1, l4)), DeepEquals, getRefs(l5)) + c.Check(getRefs(subtractRefLists(empty, l1)), DeepEquals, getRefs(empty)) + c.Check(getRefs(subtractRefLists(l2, l3)), DeepEquals, getRefs(l2)) }) +} - c.Check(err, Equals, e) +func diffRefLists(l, r AnyRefList, packageCollection *PackageCollection) (PackageDiffs, error) { + switch l := l.(type) { + case *PackageRefList: + return l.Diff(r.(*PackageRefList), packageCollection, nil) + case *SplitRefList: + return l.Diff(r.(*SplitRefList), packageCollection, nil) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } -func (s *PackageRefListSuite) TestHas(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - reflist := NewPackageRefListFromPackageList(s.list) - - c.Check(reflist.Has(s.p1), Equals, true) - c.Check(reflist.Has(s.p3), Equals, true) - c.Check(reflist.Has(s.p5), Equals, true) - c.Check(reflist.Has(s.p2), Equals, true) - c.Check(reflist.Has(s.p6), Equals, false) +func (s *PackageRefListSuite) TestDiff(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 + } + + for _, p := range packages { + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[6]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + + diffAA, err := diffRefLists(reflistA, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffAA, HasLen, 0) + + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 4) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") + c.Check(diffAB[2].Right, IsNil) + + c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") + c.Check(diffAB[3].Right, IsNil) + + diffBA, err := diffRefLists(reflistB, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffBA, HasLen, 4) + + c.Check(diffBA[0].Right, IsNil) + c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") + c.Check(diffBA[2].Left, IsNil) + + c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") + c.Check(diffBA[3].Left, IsNil) + }) } -func (s *PackageRefListSuite) TestSubstract(c *C) { - r1 := []byte("r1") - r2 := []byte("r2") - r3 := []byte("r3") - r4 := []byte("r4") - r5 := []byte("r5") - - empty := &PackageRefList{Refs: [][]byte{}} - l1 := &PackageRefList{Refs: [][]byte{r1, r2, r3, r4}} - l2 := &PackageRefList{Refs: [][]byte{r1, r3}} - l3 := &PackageRefList{Refs: [][]byte{r2, r4}} - l4 := &PackageRefList{Refs: [][]byte{r4, r5}} - l5 := &PackageRefList{Refs: [][]byte{r1, r2, r3}} - - c.Check(l1.Subtract(empty), DeepEquals, l1) - c.Check(l1.Subtract(l2), DeepEquals, l3) - c.Check(l1.Subtract(l3), DeepEquals, l2) - c.Check(l1.Subtract(l4), DeepEquals, l5) - c.Check(empty.Subtract(l1), DeepEquals, empty) - c.Check(l2.Subtract(l3), DeepEquals, l2) +func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 + } + + for _, p := range packages { + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + + listB := NewPackageList() + listB.Add(packages[1]) + listB.Add(packages[2]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 2) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + }) } -func (s *PackageRefListSuite) TestDiff(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 +func mergeRefLists(l, r AnyRefList, overrideMatching, ignoreConflicting bool) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Merge(r.(*PackageRefList), overrideMatching, ignoreConflicting) + case *SplitRefList: + return l.Merge(r.(*SplitRefList), overrideMatching, ignoreConflicting) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) } +} - for _, p := range packages { - coll.Update(p) - } +func (s *PackageRefListSuite) TestMerge(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 + {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 + {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 + {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 + } + + for _, p := range packages { + p.V06Plus = true + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[7]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + listB.Add(packages[6]) + + listC := NewPackageList() + listC.Add(packages[0]) + listC.Add(packages[8]) + listC.Add(packages[9]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + reflistC := f.newFromPackageList(listC) + + mergeAB := mergeRefLists(reflistA, reflistB, true, false) + mergeBA := mergeRefLists(reflistB, reflistA, true, false) + mergeAC := mergeRefLists(reflistA, reflistC, true, false) + mergeBC := mergeRefLists(reflistB, reflistC, true, false) + mergeCB := mergeRefLists(reflistC, reflistB, true, false) + + verifyRefListIntegrity(c, mergeAB) + verifyRefListIntegrity(c, mergeBA) + verifyRefListIntegrity(c, mergeAC) + verifyRefListIntegrity(c, mergeBC) + verifyRefListIntegrity(c, mergeCB) + + c.Check(toStrSlice(mergeAB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBA), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeAC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + c.Check(toStrSlice(mergeCB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) + + mergeABall := mergeRefLists(reflistA, reflistB, false, false) + mergeBAall := mergeRefLists(reflistB, reflistA, false, false) + mergeACall := mergeRefLists(reflistA, reflistC, false, false) + mergeBCall := mergeRefLists(reflistB, reflistC, false, false) + mergeCBall := mergeRefLists(reflistC, reflistB, false, false) + + verifyRefListIntegrity(c, mergeABall) + verifyRefListIntegrity(c, mergeBAall) + verifyRefListIntegrity(c, mergeACall) + verifyRefListIntegrity(c, mergeBCall) + verifyRefListIntegrity(c, mergeCBall) + + c.Check(mergeABall, DeepEquals, mergeBAall) + c.Check(toStrSlice(mergeBAall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + + c.Check(mergeBCall, Not(DeepEquals), mergeCBall) + c.Check(toStrSlice(mergeACall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBCall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 lib 1.0 00000000"}) + + mergeBCwithConflicts := mergeRefLists(reflistB, reflistC, false, true) + c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + }) +} - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[6]) +func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { + forEachRefList(func(f reflistFactory) { + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, + {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, + {Name: "lib", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.5", Architecture: "i386"}, + {Name: "dpkg", Version: "1.6", Architecture: "i386"}, + } + + rl := NewPackageList() + rl.Add(packages[0]) + rl.Add(packages[1]) + rl.Add(packages[2]) + rl.Add(packages[3]) + rl.Add(packages[4]) + rl.Add(packages[5]) + rl.Add(packages[6]) + rl.Add(packages[7]) + + result := f.newFromPackageList(rl) + result.FilterLatestRefs() + + verifyRefListIntegrity(c, result) + c.Check(toStrSlice(result), DeepEquals, + []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + }) +} - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) +func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + reflist := NewPackageRefListFromPackageList(list) - diffAA, err := reflistA.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffAA, HasLen, 0) + reflist2 := &PackageRefList{} + err := reflist2.Decode(reflist.Encode()) + c.Assert(err, IsNil) + c.Check(reflist2.Len(), Equals, reflist.Len()) + c.Check(reflist2.Refs, DeepEquals, reflist.Refs) +} - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 4) +func (s *PackageRefListSuite) TestRefListBucketPrefix(c *C) { + c.Check(bucketRefPrefix([]byte("Pall abcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pall libabcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pamd64 xy 1.0")), DeepEquals, []byte("xy")) +} - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") +func (s *PackageRefListSuite) TestRefListBucketIdx(c *C) { + c.Check(bucketIdxForRef(s.p1.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p2.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p3.Key("")), Equals, 26) + c.Check(bucketIdxForRef(s.p4.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p5.Key("")), Equals, 4) + c.Check(bucketIdxForRef(s.p6.Key("")), Equals, 46) +} - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") +func (s *PackageRefListSuite) TestSplitRefListBuckets(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) + + sl := NewSplitRefListFromPackageList(list) + verifyRefListIntegrity(c, sl) + + c.Check(hex.EncodeToString(sl.Buckets[4]), Equals, "7287aed32daad5d1aab4e89533bde135381d932e60548cfc00b882ca8858ae07") + c.Check(toStrSlice(sl.bucketRefs[4]), DeepEquals, []string{string(s.p5.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[26]), Equals, "f31fc28e82368b63c8be47eefc64b8e217e2e5349c7e3827b98f80536b956f6e") + c.Check(toStrSlice(sl.bucketRefs[26]), DeepEquals, []string{string(s.p3.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[46]), Equals, "55e70286393afc5da5046d68c632d35f98bec24781ae433bd1a1069b52853367") + c.Check(toStrSlice(sl.bucketRefs[46]), DeepEquals, []string{string(s.p1.Key("")), string(s.p6.Key(""))}) +} - c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") - c.Check(diffAB[2].Right, IsNil) +func (s *PackageRefListSuite) TestRefListDigestSet(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) - c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") - c.Check(diffAB[3].Right, IsNil) + sl := NewSplitRefListFromPackageList(list) - diffBA, err := reflistB.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffBA, HasLen, 4) + set := NewRefListDigestSet() + c.Check(set.Len(), Equals, 0) - c.Check(diffBA[0].Right, IsNil) - c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + err := sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, false) + return nil + }) + c.Assert(err, IsNil) - c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + set.AddAllInRefList(sl) + c.Check(set.Len(), Equals, 3) - c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") - c.Check(diffBA[2].Left, IsNil) + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, true) + return nil + }) + c.Assert(err, IsNil) - c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") - c.Check(diffBA[3].Left, IsNil) + firstDigest := sl.Buckets[bucketIdxForRef(s.p1.Key(""))] + set.Remove(firstDigest) + c.Check(set.Len(), Equals, 2) + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, !bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) + + set2 := NewRefListDigestSet() + set2.AddAllInRefList(sl) + set2.RemoveAll(set) + + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set2.Has(digest), Equals, bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) } -func (s *PackageRefListSuite) TestMerge(c *C) { +func (s *PackageRefListSuite) TestRefListCollectionLoadSave(c *C) { db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 - {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 - {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 - {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 - } + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + err := reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(reflist)) - for _, p := range packages { - p.V06Plus = true - coll.Update(p) - } + list.Add(s.p6) + sl = NewSplitRefListFromPackageList(list) + err = reflistCollection.Update(sl, key) + c.Assert(err, IsNil) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[7]) - - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) - listB.Add(packages[6]) - - listC := NewPackageList() - listC.Add(packages[0]) - listC.Add(packages[8]) - listC.Add(packages[9]) - - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) - reflistC := NewPackageRefListFromPackageList(listC) - - mergeAB := reflistA.Merge(reflistB, true, false) - mergeBA := reflistB.Merge(reflistA, true, false) - mergeAC := reflistA.Merge(reflistC, true, false) - mergeBC := reflistB.Merge(reflistC, true, false) - mergeCB := reflistC.Merge(reflistB, true, false) - - c.Check(toStrSlice(mergeAB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBA), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeAC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) - c.Check(toStrSlice(mergeCB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) - - mergeABall := reflistA.Merge(reflistB, false, false) - mergeBAall := reflistB.Merge(reflistA, false, false) - mergeACall := reflistA.Merge(reflistC, false, false) - mergeBCall := reflistB.Merge(reflistC, false, false) - mergeCBall := reflistC.Merge(reflistB, false, false) - - c.Check(mergeABall, DeepEquals, mergeBAall) - c.Check(toStrSlice(mergeBAall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - - c.Check(mergeBCall, Not(DeepEquals), mergeCBall) - c.Check(toStrSlice(mergeACall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBCall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 lib 1.0 00000000"}) - - mergeBCwithConflicts := reflistB.Merge(reflistC, false, true) - c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) } -func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, - {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, - {Name: "lib", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.5", Architecture: "i386"}, - {Name: "dpkg", Version: "1.6", Architecture: "i386"}, - } +func (s *PackageRefListSuite) TestRefListCollectionMigrate(c *C) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + format, err := reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatInline) + + migrator := reflistCollection.NewMigration() + err = reflistCollection.LoadCompleteAndMigrate(sl, key, migrator) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) - rl := NewPackageList() - rl.Add(packages[0]) - rl.Add(packages[1]) - rl.Add(packages[2]) - rl.Add(packages[3]) - rl.Add(packages[4]) - rl.Add(packages[5]) - rl.Add(packages[6]) - rl.Add(packages[7]) - - result := NewPackageRefListFromPackageList(rl) - result.FilterLatestRefs() - - c.Check(toStrSlice(result), DeepEquals, - []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + stats := migrator.Stats() + c.Check(stats.Reflists, Equals, 0) + c.Check(stats.Buckets, Equals, 0) + c.Check(stats.Segments, Equals, 0) + + err = migrator.Flush() + c.Assert(err, IsNil) + stats = migrator.Stats() + c.Check(stats.Reflists, Equals, 1) + c.Check(stats.Buckets, Not(Equals), 0) + c.Check(stats.Segments, Equals, stats.Segments) + + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) + + format, err = reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatSplit) } diff --git a/deb/remote.go b/deb/remote.go index 72deb7afda7034d86ffeb86066ea951d85b193b3..ce54ea6365d758bcd73eef38485147365b792c00 100644 --- a/deb/remote.go +++ b/deb/remote.go @@ -73,7 +73,7 @@ type RemoteRepo struct { // Packages for json output Packages []string `codec:"-" json:",omitempty"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList // Parsed archived root archiveRootURL *url.URL // Current list of packages (filled while updating mirror) @@ -171,7 +171,7 @@ func (repo *RemoteRepo) NumPackages() int { } // RefList returns package list for repo -func (repo *RemoteRepo) RefList() *PackageRefList { +func (repo *RemoteRepo) RefList() *SplitRefList { return repo.packageRefs } @@ -659,7 +659,7 @@ func (repo *RemoteRepo) FinalizeDownload(collectionFactory *CollectionFactory, p }) if err == nil { - repo.packageRefs = NewPackageRefListFromPackageList(repo.packageList) + repo.packageRefs = NewSplitRefListFromPackageList(repo.packageList) repo.packageList = nil } @@ -801,14 +801,14 @@ func (collection *RemoteRepoCollection) search(filter func(*RemoteRepo) bool, un } // Add appends new repo to collection and saves it -func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Add(repo *RemoteRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("mirror with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -818,28 +818,26 @@ func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { } // Update stores updated information about repo in DB -func (collection *RemoteRepoCollection) Update(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Update(repo *RemoteRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information for remote repo -func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/remote_test.go b/deb/remote_test.go index c331579f548f9724acc21cccee31a54328a0f704..3e05ef7e569f44babed76773b1610e2d084d60ba 100644 --- a/deb/remote_test.go +++ b/deb/remote_test.go @@ -52,7 +52,7 @@ func (n *NullVerifier) IsClearSigned(clearsign io.Reader) (bool, error) { type PackageListMixinSuite struct { p1, p2, p3 *Package list *PackageList - reflist *PackageRefList + reflist *SplitRefList } func (s *PackageListMixinSuite) SetUpPackages() { @@ -72,7 +72,7 @@ func (s *PackageListMixinSuite) SetUpPackages() { s.list.Add(s.p2) s.list.Add(s.p3) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) } type RemoteRepoSuite struct { @@ -291,7 +291,7 @@ func (s *RemoteRepoSuite) TestDownload(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -373,12 +373,12 @@ func (s *RemoteRepoSuite) TestDownloadWithInstaller(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "installer") } @@ -419,12 +419,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSources(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -503,7 +503,7 @@ func (s *RemoteRepoSuite) TestDownloadFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -593,12 +593,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -658,8 +658,9 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { type RemoteRepoCollectionSuite struct { PackageListMixinSuite - db database.Storage - collection *RemoteRepoCollection + db database.Storage + collection *RemoteRepoCollection + refListCollection *RefListCollection } var _ = Suite(&RemoteRepoCollectionSuite{}) @@ -667,6 +668,7 @@ var _ = Suite(&RemoteRepoCollectionSuite{}) func (s *RemoteRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewRemoteRepoCollection(s.db) + s.refListCollection = NewRefListCollection(s.db) s.SetUpPackages() } @@ -679,8 +681,8 @@ func (s *RemoteRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("yandex") c.Assert(err, IsNil) @@ -697,7 +699,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -711,7 +713,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection := NewRemoteRepoCollection(s.db) r, err := collection.ByName("yandex") @@ -719,20 +721,20 @@ func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection = NewRemoteRepoCollection(s.db) r, err = collection.ByName("yandex") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.refListCollection), IsNil) c.Assert(r.NumPackages(), Equals, 3) } func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo) + s.collection.Add(repo, s.refListCollection) count := 0 err := s.collection.ForEach(func(*RemoteRepo) error { @@ -754,10 +756,10 @@ func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *RemoteRepoCollectionSuite) TestDrop(c *C) { repo1, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo1) + s.collection.Add(repo1, s.refListCollection) repo2, _ := NewRemoteRepo("tyndex", "http://mirror.yandex.ru/debian/", "wheezy", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo2) + s.collection.Add(repo2, s.refListCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/snapshot.go b/deb/snapshot.go index f351b87f3b07f4d6d70c79bbf050d78bb720b2a9..a9a8655ceb86c21b3a57defce2f85314995feab6 100644 --- a/deb/snapshot.go +++ b/deb/snapshot.go @@ -40,7 +40,7 @@ type Snapshot struct { NotAutomatic string ButAutomaticUpgrades string - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewSnapshotFromRepository creates snapshot from current state of repository @@ -76,7 +76,7 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { } if snap.packageRefs == nil { - snap.packageRefs = NewPackageRefList() + snap.packageRefs = NewSplitRefList() } return snap, nil @@ -84,11 +84,13 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { // NewSnapshotFromPackageList creates snapshot from PackageList func NewSnapshotFromPackageList(name string, sources []*Snapshot, list *PackageList, description string) *Snapshot { - return NewSnapshotFromRefList(name, sources, NewPackageRefListFromPackageList(list), description) + sl := NewSplitRefList() + sl.Replace(NewPackageRefListFromPackageList(list)) + return NewSnapshotFromRefList(name, sources, sl, description) } -// NewSnapshotFromRefList creates snapshot from PackageRefList -func NewSnapshotFromRefList(name string, sources []*Snapshot, list *PackageRefList, description string) *Snapshot { +// NewSnapshotFromRefList creates snapshot from SplitRefList +func NewSnapshotFromRefList(name string, sources []*Snapshot, list *SplitRefList, description string) *Snapshot { sourceUUIDs := make([]string, len(sources)) for i := range sources { sourceUUIDs[i] = sources[i].UUID @@ -116,7 +118,7 @@ func (s *Snapshot) NumPackages() int { } // RefList returns list of package refs in snapshot -func (s *Snapshot) RefList() *PackageRefList { +func (s *Snapshot) RefList() *SplitRefList { return s.packageRefs } @@ -209,13 +211,13 @@ func NewSnapshotCollection(db database.Storage) *SnapshotCollection { } // Add appends new repo to collection and saves it -func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Add(snapshot *Snapshot, reflistCollection *RefListCollection) error { _, err := collection.ByName(snapshot.Name) if err == nil { return fmt.Errorf("snapshot with name %s already exists", snapshot.Name) } - err = collection.Update(snapshot) + err = collection.Update(snapshot, reflistCollection) if err != nil { return err } @@ -225,26 +227,22 @@ func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { } // Update stores updated information about snapshot in DB -func (collection *SnapshotCollection) Update(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Update(snapshot *Snapshot, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(snapshot.Key(), snapshot.Encode()) if snapshot.packageRefs != nil { - batch.Put(snapshot.RefKey(), snapshot.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(snapshot.packageRefs, snapshot.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information about snapshot -func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot) error { - encoded, err := collection.db.Get(snapshot.RefKey()) - if err != nil { - return err - } - - snapshot.packageRefs = &PackageRefList{} - return snapshot.packageRefs.Decode(encoded) +func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot, reflistCollection *RefListCollection) error { + snapshot.packageRefs = NewSplitRefList() + return reflistCollection.LoadComplete(snapshot.packageRefs, snapshot.RefKey()) } func (collection *SnapshotCollection) search(filter func(*Snapshot) bool, unique bool) []*Snapshot { diff --git a/deb/snapshot_bench_test.go b/deb/snapshot_bench_test.go index c6bb94a2c610264e5440477a82a3cc96894032d4..4475ca57b5cd4eda6acfd3cb9cf91988db788eed 100644 --- a/deb/snapshot_bench_test.go +++ b/deb/snapshot_bench_test.go @@ -18,10 +18,11 @@ func BenchmarkSnapshotCollectionForEach(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } @@ -47,11 +48,12 @@ func BenchmarkSnapshotCollectionByUUID(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) uuids := []string{} for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } uuids = append(uuids, snapshot.UUID) @@ -78,10 +80,11 @@ func BenchmarkSnapshotCollectionByName(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } diff --git a/deb/snapshot_test.go b/deb/snapshot_test.go index d27c4226907c5f341a2333e6d56743c7edeacc90..805ccc8e5bc1b04fa6fdce6be8847adad55005d2 100644 --- a/deb/snapshot_test.go +++ b/deb/snapshot_test.go @@ -109,6 +109,7 @@ type SnapshotCollectionSuite struct { snapshot1, snapshot2 *Snapshot snapshot3, snapshot4 *Snapshot collection *SnapshotCollection + reflistCollection *RefListCollection } var _ = Suite(&SnapshotCollectionSuite{}) @@ -116,6 +117,7 @@ var _ = Suite(&SnapshotCollectionSuite{}) func (s *SnapshotCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewSnapshotCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.SetUpPackages() s.repo1, _ = NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) @@ -143,10 +145,10 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { _, err := s.collection.ByName("snap1") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), ErrorMatches, ".*already exists") - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) snapshot, err := s.collection.ByName("snap1") c.Assert(err, IsNil) @@ -167,20 +169,20 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { } func (s *SnapshotCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.snapshot1), IsNil) + c.Assert(s.collection.Update(s.snapshot1, s.reflistCollection), IsNil) collection := NewSnapshotCollection(s.db) snapshot, err := collection.ByName("snap1") c.Assert(err, IsNil) c.Assert(snapshot.packageRefs, IsNil) - c.Assert(s.collection.LoadComplete(snapshot), IsNil) + c.Assert(s.collection.LoadComplete(snapshot, s.reflistCollection), IsNil) c.Assert(snapshot.NumPackages(), Equals, 3) } func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*Snapshot) error { @@ -200,10 +202,10 @@ func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { } func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { - s.collection.Add(s.snapshot2) - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot4) - s.collection.Add(s.snapshot3) + s.collection.Add(s.snapshot2, s.reflistCollection) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot4, s.reflistCollection) + s.collection.Add(s.snapshot3, s.reflistCollection) names := []string{} @@ -217,8 +219,8 @@ func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { } func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) c.Check(s.collection.ByRemoteRepoSource(s.repo1), DeepEquals, []*Snapshot{s.snapshot1}) c.Check(s.collection.ByRemoteRepoSource(s.repo2), DeepEquals, []*Snapshot{s.snapshot2}) @@ -229,10 +231,10 @@ func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { } func (s *SnapshotCollectionSuite) TestFindByLocalRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(s.snapshot3), IsNil) - c.Assert(s.collection.Add(s.snapshot4), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot4, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepoSource(s.lrepo1), DeepEquals, []*Snapshot{s.snapshot3}) c.Check(s.collection.ByLocalRepoSource(s.lrepo2), DeepEquals, []*Snapshot{s.snapshot4}) @@ -247,11 +249,11 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { snapshot4 := NewSnapshotFromRefList("snap4", []*Snapshot{s.snapshot1}, s.reflist, "desc2") snapshot5 := NewSnapshotFromRefList("snap5", []*Snapshot{snapshot3}, s.reflist, "desc3") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(snapshot3), IsNil) - c.Assert(s.collection.Add(snapshot4), IsNil) - c.Assert(s.collection.Add(snapshot5), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot5, s.reflistCollection), IsNil) list := s.collection.BySnapshotSource(s.snapshot1) sorter, _ := newSnapshotSorter("name", list) @@ -263,8 +265,8 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { } func (s *SnapshotCollectionSuite) TestDrop(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) snap, _ := s.collection.ByUUID(s.snapshot1.UUID) c.Check(snap, Equals, s.snapshot1) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3a6b3c090bcd8266f9ebe7e92463bf2eb9e3be7f..a0ca55b08a755613d45d468bd3c2ba569d5da48f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,15 +1,13 @@ # Global ARGs shared by all stages ARG DEBIAN_FRONTEND=noninteractive -ARG GOPATH=/usr/local/go # Build aptly -FROM debian:bookworm-slim as builder +FROM golang:1.21-bookworm as builder ENV LC_ALL=C.UTF-8 ARG APTLY_DEBUG=false ARG DEBIAN_FRONTEND # Useful for passing flags down for development purposes. ARG GOFLAGS -ARG GOPATH RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -17,7 +15,6 @@ RUN apt-get update && \ ca-certificates \ gcc \ git \ - golang-go \ libc6-dev COPY . /work @@ -28,7 +25,6 @@ RUN sed -i "s/\\(EnableDebug = \\).*/\1$APTLY_DEBUG/" aptly/version.go \ FROM debian:bookworm-slim as server ENV LC_ALL=C.UTF-8 ARG DEBIAN_FRONTEND -ARG GOPATH RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -38,7 +34,7 @@ RUN apt-get update && \ gpgv \ xz-utils -COPY --from=builder $GOPATH/bin/aptly /usr/local/bin/aptly +COPY --from=builder /go/bin/aptly /usr/local/bin/aptly COPY docker/aptly.conf /etc/aptly.conf COPY docker/start-aptly.sh /usr/local/bin/ diff --git a/go.mod b/go.mod index e1ce930c09a0685ac23fabcde6835b983d7523a3..8b9194b718457c1462999e31d104d9368f7149f8 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/DisposaBoy/JsonConfigReader v0.0.0-20201129172854-99cf318d67e7 github.com/awalterschulze/gographviz v2.0.3+incompatible github.com/cavaliergopher/grab/v3 v3.0.1 + github.com/cespare/xxhash/v2 v2.2.0 github.com/cheggaaa/pb v1.0.29 github.com/gin-gonic/gin v1.9.1 github.com/go-playground/validator/v10 v10.15.4 // indirect @@ -59,7 +60,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.23.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/sonic v1.10.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect github.com/chenzhuoyu/iasm v0.9.0 // indirect github.com/cloudflare/circl v1.3.3 // indirect diff --git a/system/t08_db/CleanupDB10Test_gold b/system/t08_db/CleanupDB10Test_gold index 138adc2947775925c7066e4f3f035629d2f2d1ca..faa25944e39a3947682179fa340aa037fcc17e90 100644 --- a/system/t08_db/CleanupDB10Test_gold +++ b/system/t08_db/CleanupDB10Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB11Test_gold b/system/t08_db/CleanupDB11Test_gold index be3767e9cf2263292da66829d058bf0d3a08d2f4..33be8b2a4e83ccc54526cfbbbc7c5723a5d7516c 100644 --- a/system/t08_db/CleanupDB11Test_gold +++ b/system/t08_db/CleanupDB11Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Split 11 reflist(s) into 510 bucket(s) (123181 segment(s)) Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -24,6 +25,7 @@ List of package keys to delete: - Pi386 gnuplot-nox 4.6.1-1~maverick2 17785995cf0f815 - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB12Test_gold b/system/t08_db/CleanupDB12Test_gold index 31da9b23c40924fe834c21533c40121a02eba4b0..4e88abc2129998735d45ead7fc3024ffd0034bd4 100644 --- a/system/t08_db/CleanupDB12Test_gold +++ b/system/t08_db/CleanupDB12Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Skipped splitting 11 reflist(s) into 510 bucket(s) (123181 segment(s)), as -dry-run has been requested. Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -25,6 +26,7 @@ List of package keys to delete: - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f Skipped deletion, as -dry-run has been requested. +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB1Test_gold b/system/t08_db/CleanupDB1Test_gold index 138adc2947775925c7066e4f3f035629d2f2d1ca..faa25944e39a3947682179fa340aa037fcc17e90 100644 --- a/system/t08_db/CleanupDB1Test_gold +++ b/system/t08_db/CleanupDB1Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB2Test_gold b/system/t08_db/CleanupDB2Test_gold index 1f289e677bb12f7111a9b9e52c5b47df2eca0eb5..4e84de6a82213ac990b3b676e85f415b9943c99e 100644 --- a/system/t08_db/CleanupDB2Test_gold +++ b/system/t08_db/CleanupDB2Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (73270)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB3Test_gold b/system/t08_db/CleanupDB3Test_gold index 73279e145f508169ae690a2c24ebe5e12f7d0a3e..73c82f87d17f1ce539d7ae1d501cb11b00b5e595 100644 --- a/system/t08_db/CleanupDB3Test_gold +++ b/system/t08_db/CleanupDB3Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB4Test_gold b/system/t08_db/CleanupDB4Test_gold index 138adc2947775925c7066e4f3f035629d2f2d1ca..faa25944e39a3947682179fa340aa037fcc17e90 100644 --- a/system/t08_db/CleanupDB4Test_gold +++ b/system/t08_db/CleanupDB4Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB5Test_gold b/system/t08_db/CleanupDB5Test_gold index 73279e145f508169ae690a2c24ebe5e12f7d0a3e..47bba4e2ba84624c874e300539f43919702bae23 100644 --- a/system/t08_db/CleanupDB5Test_gold +++ b/system/t08_db/CleanupDB5Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB6Test_gold b/system/t08_db/CleanupDB6Test_gold index 138adc2947775925c7066e4f3f035629d2f2d1ca..faa25944e39a3947682179fa340aa037fcc17e90 100644 --- a/system/t08_db/CleanupDB6Test_gold +++ b/system/t08_db/CleanupDB6Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB7Test_gold b/system/t08_db/CleanupDB7Test_gold index 138adc2947775925c7066e4f3f035629d2f2d1ca..faa25944e39a3947682179fa340aa037fcc17e90 100644 --- a/system/t08_db/CleanupDB7Test_gold +++ b/system/t08_db/CleanupDB7Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB8Test_gold b/system/t08_db/CleanupDB8Test_gold index f769f203f72285223e7988f3ac4416f319605d06..43ebe9aaa59c389eb9ef18fc315e2ebdb53f7d6f 100644 --- a/system/t08_db/CleanupDB8Test_gold +++ b/system/t08_db/CleanupDB8Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (4)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (6)... diff --git a/system/t08_db/CleanupDB9Test_gold b/system/t08_db/CleanupDB9Test_gold index 138adc2947775925c7066e4f3f035629d2f2d1ca..faa25944e39a3947682179fa340aa037fcc17e90 100644 --- a/system/t08_db/CleanupDB9Test_gold +++ b/system/t08_db/CleanupDB9Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)...