From 2bf1cbfa2c8f6b37256bbfa5526fb5a478c92c96 Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez <ryan.gonzalez@collabora.com> Date: Wed, 6 Dec 2023 17:57:37 -0600 Subject: [PATCH 1/4] docker: Switch from building on Debian to using the official Go image Getting Go 1.21 (required by newer aptly) on bookworm requires utilizing the backports repository; it's easier to just rely on the official images instead. Signed-off-by: Ryan Gonzalez <ryan.gonzalez@collabora.com> --- docker/Dockerfile | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3a6b3c09..a0ca55b0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,15 +1,13 @@ # Global ARGs shared by all stages ARG DEBIAN_FRONTEND=noninteractive -ARG GOPATH=/usr/local/go # Build aptly -FROM debian:bookworm-slim as builder +FROM golang:1.21-bookworm as builder ENV LC_ALL=C.UTF-8 ARG APTLY_DEBUG=false ARG DEBIAN_FRONTEND # Useful for passing flags down for development purposes. ARG GOFLAGS -ARG GOPATH RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -17,7 +15,6 @@ RUN apt-get update && \ ca-certificates \ gcc \ git \ - golang-go \ libc6-dev COPY . /work @@ -28,7 +25,6 @@ RUN sed -i "s/\\(EnableDebug = \\).*/\1$APTLY_DEBUG/" aptly/version.go \ FROM debian:bookworm-slim as server ENV LC_ALL=C.UTF-8 ARG DEBIAN_FRONTEND -ARG GOPATH RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -38,7 +34,7 @@ RUN apt-get update && \ gpgv \ xz-utils -COPY --from=builder $GOPATH/bin/aptly /usr/local/bin/aptly +COPY --from=builder /go/bin/aptly /usr/local/bin/aptly COPY docker/aptly.conf /etc/aptly.conf COPY docker/start-aptly.sh /usr/local/bin/ -- GitLab From e84b0a3999540557ca44eb7dee26e0ac77936957 Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez <ryan.gonzalez@collabora.com> Date: Fri, 6 Oct 2023 15:33:58 -0500 Subject: [PATCH 2/4] Skip loading reflists when listing published repos The output doesn't actually depend on the reflists, and loading them for every published repo starts to take substantial time and memory. Signed-off-by: Ryan Gonzalez <ryan.gonzalez@collabora.com> --- api/publish.go | 2 +- cmd/publish_list.go | 2 +- deb/publish.go | 40 ++++++++++++++++++++++++++++++---------- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/api/publish.go b/api/publish.go index 110dbe54..1cd0f009 100644 --- a/api/publish.go +++ b/api/publish.go @@ -61,7 +61,7 @@ func apiPublishList(c *gin.Context) { result := make([]*deb.PublishedRepo, 0, collection.Len()) err := collection.ForEach(func(repo *deb.PublishedRepo) error { - err := collection.LoadComplete(repo, collectionFactory) + err := collection.LoadShallow(repo, collectionFactory) if err != nil { return err } diff --git a/cmd/publish_list.go b/cmd/publish_list.go index f4bbb2af..e3a1d1a8 100644 --- a/cmd/publish_list.go +++ b/cmd/publish_list.go @@ -34,7 +34,7 @@ func aptlyPublishListTxt(cmd *commander.Command, _ []string) error { published := make([]string, 0, collectionFactory.PublishedRepoCollection().Len()) err = collectionFactory.PublishedRepoCollection().ForEach(func(repo *deb.PublishedRepo) error { - e := collectionFactory.PublishedRepoCollection().LoadComplete(repo, collectionFactory) + e := collectionFactory.PublishedRepoCollection().LoadShallow(repo, collectionFactory) if e != nil { fmt.Fprintf(os.Stderr, "Error found on one publish (prefix:%s / distribution:%s / component:%s\n)", repo.StoragePrefix(), repo.Distribution, repo.Components()) diff --git a/deb/publish.go b/deb/publish.go index 826f936a..b1401d8f 100644 --- a/deb/publish.go +++ b/deb/publish.go @@ -280,7 +280,7 @@ func NewPublishedRepo(storage, prefix, distribution string, architectures []stri return result, nil } -// MarshalJSON requires object to be "loaded completely" +// MarshalJSON requires object to filled by "LoadShallow" or "LoadComplete" func (p *PublishedRepo) MarshalJSON() ([]byte, error) { type sourceInfo struct { Component, Name string @@ -986,8 +986,11 @@ func (collection *PublishedRepoCollection) Update(repo *PublishedRepo) error { return batch.Write() } -// LoadComplete loads additional information for remote repo -func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, collectionFactory *CollectionFactory) (err error) { +// LoadShallow loads basic information on the repo's sources +// +// This does not *fully* load in the sources themselves and their packages. +// It's useful if you just want to use JSON serialization without loading in unnecessary things. +func (collection *PublishedRepoCollection) LoadShallow(repo *PublishedRepo, collectionFactory *CollectionFactory) (err error) { repo.sourceItems = make(map[string]repoSourceItem) if repo.SourceKind == SourceSnapshot { @@ -998,10 +1001,6 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if err != nil { return } - err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot) - if err != nil { - return - } repo.sourceItems[component] = item } @@ -1013,6 +1012,30 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if err != nil { return } + + item.packageRefs = &PackageRefList{} + repo.sourceItems[component] = item + } + } else { + panic("unknown SourceKind") + } + + return +} + +// LoadComplete loads complete information on the sources of the repo *and* their packages +func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, collectionFactory *CollectionFactory) (err error) { + collection.LoadShallow(repo, collectionFactory) + + if repo.SourceKind == SourceSnapshot { + for _, item := range repo.sourceItems { + err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot) + if err != nil { + return + } + } + } else if repo.SourceKind == SourceLocalRepo { + for component, item := range repo.sourceItems { err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo) if err != nil { return @@ -1031,13 +1054,10 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col } } - item.packageRefs = &PackageRefList{} err = item.packageRefs.Decode(encoded) if err != nil { return } - - repo.sourceItems[component] = item } } else { panic("unknown SourceKind") -- GitLab From f52c4f382cc88f3cacb2fdf1189d3194fde7eecf Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez <ryan.gonzalez@collabora.com> Date: Fri, 10 Nov 2023 10:23:42 -0600 Subject: [PATCH 3/4] Fix reflist diffs failing to compact when one of the inputs ends The previous reflist logic would early-exit the loop body if one of the lists was empty, but that skips the compacting logic entirely. Instead of doing the early-exit, we can leave a list's ref as nil when the list end is reached and then flip the comparison result, which will essentially treat it as being greater than all others. This should preserve the general behavior without omitting the compaction. Signed-off-by: Ryan Gonzalez <ryan.gonzalez@collabora.com> --- deb/reflist.go | 34 ++++++++++++---------------------- deb/reflist_test.go | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/deb/reflist.go b/deb/reflist.go index 25cb0b6e..30396548 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -196,31 +196,21 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle // until we reached end of both lists for il < ll || ir < lr { - // if we've exhausted left list, pull the rest from the right - if il == ll { - pr, err = packageCollection.ByKey(r.Refs[ir]) - if err != nil { - return nil, err - } - result = append(result, PackageDiff{Left: nil, Right: pr}) - ir++ - continue + var rl, rr []byte + if il < ll { + rl = l.Refs[il] } - // if we've exhausted right list, pull the rest from the left - if ir == lr { - pl, err = packageCollection.ByKey(l.Refs[il]) - if err != nil { - return nil, err - } - result = append(result, PackageDiff{Left: pl, Right: nil}) - il++ - continue + if ir < lr { + rr = r.Refs[ir] } - // refs on both sides are present, load them - rl, rr := l.Refs[il], r.Refs[ir] // compare refs rel := bytes.Compare(rl, rr) + // an unset ref is less than all others, but since it represents the end + // of a reflist, it should be *greater*, so flip the comparison result + if rl == nil || rr == nil { + rel = -rel + } if rel == 0 { // refs are identical, so are packages, advance pointer @@ -229,14 +219,14 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle pl, pr = nil, nil } else { // load pl & pr if they haven't been loaded before - if pl == nil { + if pl == nil && rl != nil { pl, err = packageCollection.ByKey(rl) if err != nil { return nil, err } } - if pr == nil { + if pr == nil && rr != nil { pr, err = packageCollection.ByKey(rr) if err != nil { return nil, err diff --git a/deb/reflist_test.go b/deb/reflist_test.go index d0ce21f5..ec7ed09f 100644 --- a/deb/reflist_test.go +++ b/deb/reflist_test.go @@ -237,6 +237,41 @@ func (s *PackageRefListSuite) TestDiff(c *C) { } +func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 + } + + for _, p := range packages { + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + + listB := NewPackageList() + listB.Add(packages[1]) + listB.Add(packages[2]) + + reflistA := NewPackageRefListFromPackageList(listA) + reflistB := NewPackageRefListFromPackageList(listB) + + diffAB, err := reflistA.Diff(reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 2) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") +} + func (s *PackageRefListSuite) TestMerge(c *C) { db, _ := goleveldb.NewOpenDB(c.MkDir()) coll := NewPackageCollection(db) -- GitLab From 353a6374cb4244e6c55120b3a2447f25f15cc520 Mon Sep 17 00:00:00 2001 From: Ryan Gonzalez <ryan.gonzalez@collabora.com> Date: Fri, 10 Nov 2023 17:01:16 -0600 Subject: [PATCH 4/4] Split reflists to share their contents across snapshots In current aptly, each repository and snapshot has its own reflist in the database. This brings a few problems with it: - Given a sufficiently large repositories and snapshots, these lists can get enormous, reaching >1MB. This is a problem for LevelDB's overall performance, as it tends to prefer values around the confiruged block size (defaults to just 4KiB). - When you take these large repositories and snapshot them, you have a full, new copy of the reflist, even if only a few packages changed. This means that having a lot of snapshots with a few changes causes the database to basically be full of largely duplicate reflists. - All the duplication also means that many of the same refs are being loaded repeatedly, which can cause some slowdown but, more notably, eats up huge amounts of memory. - Adding on more and more new repositories and snapshots will cause the time and memory spent on things like cleanup and publishing to grow roughly linearly. At the core, there are two problems here: - Reflists get very big because there are just a lot of packages. - Different reflists can tend to duplicate much of the same contents. *Split reflists* aim at solving this by separating reflists into 64 *buckets*. Package refs are sorted into individual buckets according to the following system: - Take the first 3 letters of the package name, after dropping a `lib` prefix. (Using only the first 3 letters will cause packages with similar prefixes to end up in the same bucket, under the assumption that packages with similar names tend to be updated together.) - Take the 64-bit xxhash of these letters. (xxhash was chosen because it relatively good distribution across the individual bits, which is important for the next step.) - Use the first 6 bits of the hash (range [0:63]) as an index into the buckets. Once refs are placed in buckets, a sha256 digest of all the refs in the bucket is taken. These buckets are then stored in the database, split into roughly block-sized segments, and all the repositories and snapshots simply store an array of bucket digests. This approach means that *repositories and snapshots can share their reflist buckets*. If a snapshot is taken of a repository, it will have the same contents, so its split reflist will point to the same buckets as the base repository, and only one copy of each bucket is stored in the database. When some packages in the repository change, only the buckets containing those packages will be modified; all the other buckets will remain unchanged, and thus their contents will still be shared. Later on, when these reflists are loaded, each bucket is only loaded once, short-cutting loaded many megabytes of data. In effect, split reflists are essentially copy-on-write, with only the changed buckets stored individually. Changing the disk format means that a migration needs to take place, so that task is moved into the database cleanup step, which will migrate reflists over to split reflists, as well as delete any unused reflist buckets. All the reflist tests are also changed to additionally test out split reflists; although the internal logic is all shared (since buckets are, themselves, just normal reflists), some special additions are needed to have native versions of the various reflist helper methods. In our tests, we've observed the following improvements: - Memory usage during publish and database cleanup, with `GOMEMLIMIT=2GiB`, goes down from ~3.2GiB (larger than the memory limit!) to ~0.7GiB, a decrease of ~4.5x. - Database size decreases from 1.3GB to 367MB. *In my local tests*, publish times had also decreased down to mere seconds but the same effect wasn't observed on the server, with the times staying around the same. My suspicions are that this is due to I/O performance: my local system is an M1 MBP, which almost certainly has much faster disk speeds than our DigitalOcean block volumes. Split reflists include a side effect of requiring more random accesses from reading all the buckets by their keys, so if your random I/O performance is slower, it might cancel out the benefits. That being said, even in that case, the memory usage and database size advantages still persist. Signed-off-by: Ryan Gonzalez <ryan.gonzalez@collabora.com> --- api/api.go | 2 +- api/db.go | 81 ++- api/metrics.go | 2 +- api/mirror.go | 12 +- api/publish.go | 10 +- api/repos.go | 20 +- api/snapshot.go | 30 +- cmd/cmd.go | 2 +- cmd/db_cleanup.go | 130 +++-- cmd/mirror_create.go | 2 +- cmd/mirror_edit.go | 2 +- cmd/mirror_rename.go | 2 +- cmd/mirror_show.go | 9 +- cmd/mirror_update.go | 8 +- cmd/package_show.go | 6 +- cmd/publish_snapshot.go | 6 +- cmd/publish_switch.go | 4 +- cmd/publish_update.go | 2 +- cmd/repo_add.go | 6 +- cmd/repo_create.go | 4 +- cmd/repo_edit.go | 4 +- cmd/repo_include.go | 2 +- cmd/repo_list.go | 5 +- cmd/repo_move.go | 16 +- cmd/repo_remove.go | 6 +- cmd/repo_rename.go | 2 +- cmd/repo_show.go | 9 +- cmd/snapshot_create.go | 6 +- cmd/snapshot_diff.go | 6 +- cmd/snapshot_filter.go | 4 +- cmd/snapshot_merge.go | 4 +- cmd/snapshot_pull.go | 6 +- cmd/snapshot_rename.go | 2 +- cmd/snapshot_search.go | 8 +- cmd/snapshot_show.go | 13 +- cmd/snapshot_verify.go | 2 +- database/database.go | 2 + database/goleveldb/database.go | 3 + database/goleveldb/storage.go | 11 + deb/changes.go | 9 +- deb/changes_test.go | 12 +- deb/collections.go | 12 + deb/graph.go | 6 +- deb/list.go | 2 +- deb/local.go | 28 +- deb/local_test.go | 34 +- deb/publish.go | 89 +-- deb/publish_bench_test.go | 7 +- deb/publish_test.go | 84 +-- deb/reflist.go | 772 ++++++++++++++++++++++++- deb/reflist_bench_test.go | 38 ++ deb/reflist_test.go | 890 +++++++++++++++++++---------- deb/remote.go | 26 +- deb/remote_test.go | 44 +- deb/snapshot.go | 34 +- deb/snapshot_bench_test.go | 15 +- deb/snapshot_test.go | 50 +- go.mod | 2 +- system/t08_db/CleanupDB10Test_gold | 1 + system/t08_db/CleanupDB11Test_gold | 2 + system/t08_db/CleanupDB12Test_gold | 2 + system/t08_db/CleanupDB1Test_gold | 1 + system/t08_db/CleanupDB2Test_gold | 1 + system/t08_db/CleanupDB3Test_gold | 1 + system/t08_db/CleanupDB4Test_gold | 1 + system/t08_db/CleanupDB5Test_gold | 1 + system/t08_db/CleanupDB6Test_gold | 1 + system/t08_db/CleanupDB7Test_gold | 1 + system/t08_db/CleanupDB8Test_gold | 1 + system/t08_db/CleanupDB9Test_gold | 1 + 70 files changed, 1959 insertions(+), 660 deletions(-) diff --git a/api/api.go b/api/api.go index c15be6e2..f931be7d 100644 --- a/api/api.go +++ b/api/api.go @@ -181,7 +181,7 @@ func maybeRunTaskInBackground(c *gin.Context, name string, resources []string, p // Common piece of code to show list of packages, // with searching & details if requested -func showPackages(c *gin.Context, reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) { +func showPackages(c *gin.Context, reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) { result := []*deb.Package{} list, err := deb.NewPackageListFromRefList(reflist, collectionFactory.PackageCollection(), nil) diff --git a/api/db.go b/api/db.go index 3f8b826d..8869fbd9 100644 --- a/api/db.go +++ b/api/db.go @@ -5,6 +5,7 @@ import ( "sort" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/task" "github.com/aptly-dev/aptly/utils" @@ -20,18 +21,22 @@ func apiDbCleanup(c *gin.Context) { collectionFactory := context.NewCollectionFactory() - // collect information about referenced packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about referenced packages and their reflist buckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() + + reflistMigration := collectionFactory.RefListCollection().NewMigration() out.Printf("Loading mirrors, local repos, snapshots and published repos...") err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -40,14 +45,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -56,12 +61,14 @@ func apiDbCleanup(c *gin.Context) { } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) return nil }) @@ -73,13 +80,16 @@ func apiDbCleanup(c *gin.Context) { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) } return nil }) @@ -87,11 +97,20 @@ func apiDbCleanup(c *gin.Context) { return nil, err } + err = reflistMigration.Flush() + if err != nil { + return nil, err + } + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + out.Printf("Split %d reflist(s) into %d bucket(s) (%d segment(s))", + stats.Reflists, stats.Buckets, stats.Segments) + } + // ... and compare it to the list of all packages out.Printf("Loading list of all packages...") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced out.Printf("Deleting unreferenced packages (%d)...", toDelete.Len()) @@ -112,6 +131,28 @@ func apiDbCleanup(c *gin.Context) { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return nil, err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + out.Printf("Deleting unreferenced reflist buckets (%d)...", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return nil, err + } + + if err := batch.Write(); err != nil { + return nil, err + } + } + // now, build a list of files that should be present in Repository (package pool) out.Printf("Building list of files referenced by packages...") referencedFiles := make([]string, 0, existingPackageRefs.Len()) diff --git a/api/metrics.go b/api/metrics.go index 94a9dc25..875c3c19 100644 --- a/api/metrics.go +++ b/api/metrics.go @@ -102,7 +102,7 @@ func countPackagesByRepos() { components := repo.Components() for _, c := range components { - count := float64(len(repo.RefList(c).Refs)) + count := float64(repo.RefList(c).Len()) apiReposPackageCountGauge.WithLabelValues(fmt.Sprintf("%s", (repo.SourceNames())), repo.Distribution, c).Set(count) } diff --git a/api/mirror.go b/api/mirror.go index 285e09e7..f0b9aa28 100644 --- a/api/mirror.go +++ b/api/mirror.go @@ -123,7 +123,7 @@ func apiMirrorsCreate(c *gin.Context) { return } - err = collection.Add(repo) + err = collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to add mirror: %s", err)) return @@ -183,7 +183,7 @@ func apiMirrorsShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -203,7 +203,7 @@ func apiMirrorsPackages(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to show: %s", err)) } @@ -397,12 +397,12 @@ func apiMirrorsUpdate(c *gin.Context) { e := context.ReOpenDatabase() if e == nil { remote.MarkAsIdle() - collection.Update(remote) + collection.Update(remote, collectionFactory.RefListCollection()) } }() remote.MarkAsUpdating() - err = collection.Update(remote) + err = collection.Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } @@ -568,7 +568,7 @@ func apiMirrorsUpdate(c *gin.Context) { log.Info().Msgf("%s: Finalizing download\n", b.Name) remote.FinalizeDownload(collectionFactory, out) - err = collectionFactory.RemoteRepoCollection().Update(remote) + err = collectionFactory.RemoteRepoCollection().Update(remote, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } diff --git a/api/publish.go b/api/publish.go index 1cd0f009..f646e6ca 100644 --- a/api/publish.go +++ b/api/publish.go @@ -140,7 +140,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { } resources = append(resources, string(snapshot.ResourceKey())) - err = snapshotCollection.LoadComplete(snapshot) + err = snapshotCollection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to publish: %s", err)) return @@ -164,7 +164,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { } resources = append(resources, string(localRepo.Key())) - err = localCollection.LoadComplete(localRepo) + err = localCollection.LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, fmt.Errorf("unable to publish: %s", err)) } @@ -231,7 +231,7 @@ func apiPublishRepoOrSnapshot(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to publish: %s", err) } - err = collection.Add(published) + err = collection.Add(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } @@ -311,7 +311,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return } - err2 = snapshotCollection.LoadComplete(snapshot) + err2 = snapshotCollection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err2 != nil { AbortWithJSONError(c, 500, err2) return @@ -346,7 +346,7 @@ func apiPublishUpdateSwitch(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to update: %s", err) } - err = collection.Update(published) + err = collection.Update(published, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save to DB: %s", err) } diff --git a/api/repos.go b/api/repos.go index fe6206d3..ca5d62f8 100644 --- a/api/repos.go +++ b/api/repos.go @@ -82,7 +82,7 @@ func apiReposCreate(c *gin.Context) { collectionFactory := context.NewCollectionFactory() collection := collectionFactory.LocalRepoCollection() - err := collection.Add(repo) + err := collection.Add(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 400, err) return @@ -132,7 +132,7 @@ func apiReposEdit(c *gin.Context) { repo.DefaultComponent = *b.DefaultComponent } - err = collection.Update(repo) + err = collection.Update(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -201,7 +201,7 @@ func apiReposPackagesShow(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -229,7 +229,7 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -261,9 +261,9 @@ func apiReposPackagesAddDelete(c *gin.Context, taskNamePrefix string, cb func(li } } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -320,7 +320,7 @@ func apiReposPackageFromDir(c *gin.Context) { return } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -369,9 +369,9 @@ func apiReposPackageFromDir(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to save: %s", err) } @@ -489,7 +489,7 @@ func apiReposIncludePackageFromDir(c *gin.Context) { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignature, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, nil, query.Parse) failedFiles = append(failedFiles, failedFiles2...) if err != nil { diff --git a/api/snapshot.go b/api/snapshot.go index af905221..a58fe53b 100644 --- a/api/snapshot.go +++ b/api/snapshot.go @@ -69,7 +69,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { return &task.ProcessReturnValue{Code: http.StatusConflict, Value: nil}, err } - err = collection.LoadComplete(repo) + err = collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -83,7 +83,7 @@ func apiSnapshotsCreateFromMirror(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -128,7 +128,7 @@ func apiSnapshotsCreate(c *gin.Context) { return } - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -155,9 +155,9 @@ func apiSnapshotsCreate(c *gin.Context) { } } - snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewPackageRefListFromPackageList(list), b.Description) + snapshot = deb.NewSnapshotFromRefList(b.Name, sources, deb.NewSplitRefListFromPackageList(list), b.Description) - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -197,7 +197,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { resources := []string{string(repo.Key()), "S" + b.Name} taskName := fmt.Sprintf("Create snapshot of repo %s", name) maybeRunTaskInBackground(c, taskName, resources, func(out aptly.Progress, detail *task.Detail) (*task.ProcessReturnValue, error) { - err := collection.LoadComplete(repo) + err := collection.LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -211,7 +211,7 @@ func apiSnapshotsCreateFromRepository(c *gin.Context) { snapshot.Description = b.Description } - err = snapshotCollection.Add(snapshot) + err = snapshotCollection.Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusBadRequest, Value: nil}, err } @@ -261,7 +261,7 @@ func apiSnapshotsUpdate(c *gin.Context) { snapshot.Description = b.Description } - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, err } @@ -280,7 +280,7 @@ func apiSnapshotsShow(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -347,20 +347,20 @@ func apiSnapshotsDiff(c *gin.Context) { return } - err = collection.LoadComplete(snapshotA) + err = collection.LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } - err = collection.LoadComplete(snapshotB) + err = collection.LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { AbortWithJSONError(c, 500, err) return @@ -390,7 +390,7 @@ func apiSnapshotsSearchPackages(c *gin.Context) { return } - err = collection.LoadComplete(snapshot) + err = collection.LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, 500, err) return @@ -441,7 +441,7 @@ func apiSnapshotsMerge(c *gin.Context) { return } - err = snapshotCollection.LoadComplete(sources[i]) + err = snapshotCollection.LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { AbortWithJSONError(c, http.StatusInternalServerError, err) return @@ -467,7 +467,7 @@ func apiSnapshotsMerge(c *gin.Context) { snapshot = deb.NewSnapshotFromRefList(body.Destination, sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return &task.ProcessReturnValue{Code: http.StatusInternalServerError, Value: nil}, fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/cmd.go b/cmd/cmd.go index 14a0efd1..b70bf145 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -21,7 +21,7 @@ const ( ) // ListPackagesRefList shows list of packages in PackageRefList -func ListPackagesRefList(reflist *deb.PackageRefList, collectionFactory *deb.CollectionFactory) (err error) { +func ListPackagesRefList(reflist deb.AnyRefList, collectionFactory *deb.CollectionFactory) (err error) { fmt.Printf("Packages:\n") if reflist == nil { diff --git a/cmd/db_cleanup.go b/cmd/db_cleanup.go index 66fece67..ec190685 100644 --- a/cmd/db_cleanup.go +++ b/cmd/db_cleanup.go @@ -6,6 +6,7 @@ import ( "strings" "github.com/aptly-dev/aptly/aptly" + "github.com/aptly-dev/aptly/database" "github.com/aptly-dev/aptly/deb" "github.com/aptly-dev/aptly/utils" "github.com/smira/commander" @@ -24,12 +25,20 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { dryRun := context.Flags().Lookup("dry-run").Value.Get().(bool) collectionFactory := context.NewCollectionFactory() - // collect information about references packages... - existingPackageRefs := deb.NewPackageRefList() + // collect information about references packages and their reflistbuckets... + existingPackageRefs := deb.NewSplitRefList() + existingBuckets := deb.NewRefListDigestSet() // used only in verbose mode to report package use source packageRefSources := map[string][]string{} + var reflistMigration *deb.RefListMigration + if !dryRun { + reflistMigration = collectionFactory.RefListCollection().NewMigration() + } else { + reflistMigration = collectionFactory.RefListCollection().NewMigrationDryRun() + } + context.Progress().ColoredPrintf("@{w!}Loading mirrors, local repos, snapshots and published repos...@|") if verbose { context.Progress().ColoredPrintf("@{y}Loading mirrors:@|") @@ -39,20 +48,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) - if verbose { - description := fmt.Sprintf("mirror %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + + if verbose { + description := fmt.Sprintf("mirror %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -71,21 +81,23 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", repo.Name) } - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) - if e != nil { + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, repo.RefKey(), reflistMigration) + if e != nil && e != database.ErrNotFound { return e } - if repo.RefList() != nil { - existingPackageRefs = existingPackageRefs.Merge(repo.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) - if verbose { - description := fmt.Sprintf("local repo %s", repo.Name) - repo.RefList().ForEach(func(key []byte) error { - packageRefSources[string(key)] = append(packageRefSources[string(key)], description) - return nil - }) - } + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + + if verbose { + description := fmt.Sprintf("local repo %s", repo.Name) + sl.ForEach(func(key []byte) error { + packageRefSources[string(key)] = append(packageRefSources[string(key)], description) + return nil + }) } return nil @@ -104,16 +116,18 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { context.Progress().ColoredPrintf("- @{g}%s@|", snapshot.Name) } - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, snapshot.RefKey(), reflistMigration) if e != nil { return e } - existingPackageRefs = existingPackageRefs.Merge(snapshot.RefList(), false, true) + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) if verbose { description := fmt.Sprintf("snapshot %s", snapshot.Name) - snapshot.RefList().ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -136,17 +150,21 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { if published.SourceKind != deb.SourceLocalRepo { return nil } - e := collectionFactory.PublishedRepoCollection().LoadComplete(published, collectionFactory) - if e != nil { - return e - } for _, component := range published.Components() { - existingPackageRefs = existingPackageRefs.Merge(published.RefList(component), false, true) + sl := deb.NewSplitRefList() + e := collectionFactory.RefListCollection().LoadCompleteAndMigrate(sl, published.RefKey(component), reflistMigration) + if e != nil { + return e + } + + existingPackageRefs = existingPackageRefs.Merge(sl, false, true) + existingBuckets.AddAllInRefList(sl) + if verbose { description := fmt.Sprintf("published repository %s:%s/%s component %s", published.Storage, published.Prefix, published.Distribution, component) - published.RefList(component).ForEach(func(key []byte) error { + sl.ForEach(func(key []byte) error { packageRefSources[string(key)] = append(packageRefSources[string(key)], description) return nil }) @@ -160,11 +178,29 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { collectionFactory.Flush() + err = reflistMigration.Flush() + if err != nil { + return err + } + + if verbose { + if stats := reflistMigration.Stats(); stats.Reflists > 0 { + if !dryRun { + context.Progress().ColoredPrintf("@{w!}Split %d reflist(s) into %d bucket(s) (%d segment(s))@|", + stats.Reflists, stats.Buckets, stats.Segments) + } else { + context.Progress().ColoredPrintf( + "@{y!}Skipped splitting %d reflist(s) into %d bucket(s) (%d segment(s)), as -dry-run has been requested.@|", + stats.Reflists, stats.Buckets, stats.Segments) + } + } + } + // ... and compare it to the list of all packages context.Progress().ColoredPrintf("@{w!}Loading list of all packages...@|") allPackageRefs := collectionFactory.PackageCollection().AllPackageRefs() - toDelete := allPackageRefs.Subtract(existingPackageRefs) + toDelete := allPackageRefs.Subtract(existingPackageRefs.Flatten()) // delete packages that are no longer referenced context.Progress().ColoredPrintf("@{r!}Deleting unreferenced packages (%d)...@|", toDelete.Len()) @@ -202,6 +238,32 @@ func aptlyDbCleanup(cmd *commander.Command, args []string) error { } } + bucketsToDelete, err := collectionFactory.RefListCollection().AllBucketDigests() + if err != nil { + return err + } + + bucketsToDelete.RemoveAll(existingBuckets) + + context.Progress().ColoredPrintf("@{r!}Deleting unreferenced reflist buckets (%d)...@|", bucketsToDelete.Len()) + if bucketsToDelete.Len() > 0 { + if !dryRun { + batch := db.CreateBatch() + err := bucketsToDelete.ForEach(func(digest []byte) error { + return collectionFactory.RefListCollection().UnsafeDropBucket(digest, batch) + }) + if err != nil { + return err + } + + if err := batch.Write(); err != nil { + return err + } + } else { + context.Progress().ColoredPrintf("@{y!}Skipped reflist deletion, as -dry-run has been requested.@|") + } + } + collectionFactory.Flush() // now, build a list of files that should be present in Repository (package pool) diff --git a/cmd/mirror_create.go b/cmd/mirror_create.go index 78d91b58..eb2ba7ca 100644 --- a/cmd/mirror_create.go +++ b/cmd/mirror_create.go @@ -65,7 +65,7 @@ func aptlyMirrorCreate(cmd *commander.Command, args []string) error { } collectionFactory := context.NewCollectionFactory() - err = collectionFactory.RemoteRepoCollection().Add(repo) + err = collectionFactory.RemoteRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add mirror: %s", err) } diff --git a/cmd/mirror_edit.go b/cmd/mirror_edit.go index 86462c4c..93986c06 100644 --- a/cmd/mirror_edit.go +++ b/cmd/mirror_edit.go @@ -75,7 +75,7 @@ func aptlyMirrorEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/mirror_rename.go b/cmd/mirror_rename.go index 2ff9f920..ff453b85 100644 --- a/cmd/mirror_rename.go +++ b/cmd/mirror_rename.go @@ -37,7 +37,7 @@ func aptlyMirrorRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/mirror_show.go b/cmd/mirror_show.go index 03179161..3c52d6e3 100644 --- a/cmd/mirror_show.go +++ b/cmd/mirror_show.go @@ -38,7 +38,7 @@ func aptlyMirrorShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -98,12 +98,13 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().RemoteRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.RemoteRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -113,7 +114,7 @@ func aptlyMirrorShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/mirror_update.go b/cmd/mirror_update.go index 2bca585a..29a05c78 100644 --- a/cmd/mirror_update.go +++ b/cmd/mirror_update.go @@ -30,7 +30,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to update: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -98,12 +98,12 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { err = context.ReOpenDatabase() if err == nil { repo.MarkAsIdle() - collectionFactory.RemoteRepoCollection().Update(repo) + collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) } }() repo.MarkAsUpdating() - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } @@ -259,7 +259,7 @@ func aptlyMirrorUpdate(cmd *commander.Command, args []string) error { } repo.FinalizeDownload(collectionFactory, context.Progress()) - err = collectionFactory.RemoteRepoCollection().Update(repo) + err = collectionFactory.RemoteRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to update: %s", err) } diff --git a/cmd/package_show.go b/cmd/package_show.go index 37f07e9b..6bbec984 100644 --- a/cmd/package_show.go +++ b/cmd/package_show.go @@ -14,7 +14,7 @@ import ( func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) (err error) { err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *deb.RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -30,7 +30,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -46,7 +46,7 @@ func printReferencesTo(p *deb.Package, collectionFactory *deb.CollectionFactory) } err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *deb.Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/publish_snapshot.go b/cmd/publish_snapshot.go index 322479aa..a1e89ea1 100644 --- a/cmd/publish_snapshot.go +++ b/cmd/publish_snapshot.go @@ -49,7 +49,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -85,7 +85,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(localRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to publish: %s", err) } @@ -171,7 +171,7 @@ func aptlyPublishSnapshotOrRepo(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Add(published) + err = collectionFactory.PublishedRepoCollection().Add(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_switch.go b/cmd/publish_switch.go index 0784fba3..db98ed4d 100644 --- a/cmd/publish_switch.go +++ b/cmd/publish_switch.go @@ -73,7 +73,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to switch: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to switch: %s", err) } @@ -105,7 +105,7 @@ func aptlyPublishSwitch(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/publish_update.go b/cmd/publish_update.go index fcdea8ed..3e043666 100644 --- a/cmd/publish_update.go +++ b/cmd/publish_update.go @@ -69,7 +69,7 @@ func aptlyPublishUpdate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to publish: %s", err) } - err = collectionFactory.PublishedRepoCollection().Update(published) + err = collectionFactory.PublishedRepoCollection().Update(published, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save to DB: %s", err) } diff --git a/cmd/repo_add.go b/cmd/repo_add.go index 8189e783..0263879f 100644 --- a/cmd/repo_add.go +++ b/cmd/repo_add.go @@ -28,7 +28,7 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to add: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add: %s", err) } @@ -58,9 +58,9 @@ func aptlyRepoAdd(cmd *commander.Command, args []string) error { processedFiles = append(processedFiles, otherFiles...) - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_create.go b/cmd/repo_create.go index 5fef46d9..0e3a1e52 100644 --- a/cmd/repo_create.go +++ b/cmd/repo_create.go @@ -36,7 +36,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load source snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load source snapshot: %s", err) } @@ -44,7 +44,7 @@ func aptlyRepoCreate(cmd *commander.Command, args []string) error { repo.UpdateRefList(snapshot.RefList()) } - err = collectionFactory.LocalRepoCollection().Add(repo) + err = collectionFactory.LocalRepoCollection().Add(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add local repo: %s", err) } diff --git a/cmd/repo_edit.go b/cmd/repo_edit.go index bc81dc4a..c7fbc419 100644 --- a/cmd/repo_edit.go +++ b/cmd/repo_edit.go @@ -22,7 +22,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to edit: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } @@ -53,7 +53,7 @@ func aptlyRepoEdit(cmd *commander.Command, args []string) error { } } - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to edit: %s", err) } diff --git a/cmd/repo_include.go b/cmd/repo_include.go index b84b96a3..4a750db1 100644 --- a/cmd/repo_include.go +++ b/cmd/repo_include.go @@ -64,7 +64,7 @@ func aptlyRepoInclude(cmd *commander.Command, args []string) error { _, failedFiles2, err = deb.ImportChangesFiles( changesFiles, reporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles, verifier, repoTemplate, context.Progress(), collectionFactory.LocalRepoCollection(), collectionFactory.PackageCollection(), - context.PackagePool(), collectionFactory.ChecksumCollection, + collectionFactory.RefListCollection(), context.PackagePool(), collectionFactory.ChecksumCollection, uploaders, query.Parse) failedFiles = append(failedFiles, failedFiles2...) diff --git a/cmd/repo_list.go b/cmd/repo_list.go index 9c4b0d47..f3ca4a8b 100644 --- a/cmd/repo_list.go +++ b/cmd/repo_list.go @@ -36,7 +36,7 @@ func aptlyRepoListTxt(cmd *commander.Command, _ []string) error { if raw { repos[i] = repo.Name } else { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -77,7 +77,8 @@ func aptlyRepoListJSON(_ *commander.Command, _ []string) error { repos := make([]*deb.LocalRepo, context.NewCollectionFactory().LocalRepoCollection().Len()) i := 0 context.NewCollectionFactory().LocalRepoCollection().ForEach(func(repo *deb.LocalRepo) error { - e := context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + collectionFactory := context.NewCollectionFactory() + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/cmd/repo_move.go b/cmd/repo_move.go index 8be6698b..004b4af0 100644 --- a/cmd/repo_move.go +++ b/cmd/repo_move.go @@ -25,13 +25,13 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } var ( - srcRefList *deb.PackageRefList + srcRefList *deb.SplitRefList srcRepo *deb.LocalRepo ) @@ -45,7 +45,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: source and destination are the same", command) } - err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -59,7 +59,7 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to %s: %s", command, err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(srcRemoteRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to %s: %s", command, err) } @@ -150,17 +150,17 @@ func aptlyRepoMoveCopyImport(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - dstRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(dstList)) + dstRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(dstList)) - err = collectionFactory.LocalRepoCollection().Update(dstRepo) + err = collectionFactory.LocalRepoCollection().Update(dstRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } if command == "move" { // nolint: goconst - srcRepo.UpdateRefList(deb.NewPackageRefListFromPackageList(srcList)) + srcRepo.UpdateRefList(deb.NewSplitRefListFromPackageList(srcList)) - err = collectionFactory.LocalRepoCollection().Update(srcRepo) + err = collectionFactory.LocalRepoCollection().Update(srcRepo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_remove.go b/cmd/repo_remove.go index 93e8535c..d3a1159e 100644 --- a/cmd/repo_remove.go +++ b/cmd/repo_remove.go @@ -24,7 +24,7 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to remove: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to remove: %s", err) } @@ -59,9 +59,9 @@ func aptlyRepoRemove(cmd *commander.Command, args []string) error { if context.Flags().Lookup("dry-run").Value.Get().(bool) { context.Progress().Printf("\nChanges not saved, as dry run has been requested.\n") } else { - repo.UpdateRefList(deb.NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(deb.NewSplitRefListFromPackageList(list)) - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to save: %s", err) } diff --git a/cmd/repo_rename.go b/cmd/repo_rename.go index 9234b7c7..459afcbb 100644 --- a/cmd/repo_rename.go +++ b/cmd/repo_rename.go @@ -32,7 +32,7 @@ func aptlyRepoRename(cmd *commander.Command, args []string) error { } repo.Name = newName - err = collectionFactory.LocalRepoCollection().Update(repo) + err = collectionFactory.LocalRepoCollection().Update(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/repo_show.go b/cmd/repo_show.go index a61a5f1f..741915d1 100644 --- a/cmd/repo_show.go +++ b/cmd/repo_show.go @@ -36,7 +36,7 @@ func aptlyRepoShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -63,12 +63,13 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { name := args[0] - repo, err := context.NewCollectionFactory().LocalRepoCollection().ByName(name) + collectionFactory := context.NewCollectionFactory() + repo, err := collectionFactory.LocalRepoCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -79,7 +80,7 @@ func aptlyRepoShowJSON(_ *commander.Command, args []string) error { if withPackages { if repo.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(repo.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(repo.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err == nil { packageList = list.FullNames() } diff --git a/cmd/snapshot_create.go b/cmd/snapshot_create.go index 000a78d9..6bc319de 100644 --- a/cmd/snapshot_create.go +++ b/cmd/snapshot_create.go @@ -30,7 +30,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -50,7 +50,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to create snapshot: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } @@ -71,7 +71,7 @@ func aptlySnapshotCreate(cmd *commander.Command, args []string) error { return commander.ErrCommandError } - err = collectionFactory.SnapshotCollection().Add(snapshot) + err = collectionFactory.SnapshotCollection().Add(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to add snapshot: %s", err) } diff --git a/cmd/snapshot_diff.go b/cmd/snapshot_diff.go index ccbea32e..19da7fcc 100644 --- a/cmd/snapshot_diff.go +++ b/cmd/snapshot_diff.go @@ -23,7 +23,7 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot A: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotA, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot A: %s", err) } @@ -34,13 +34,13 @@ func aptlySnapshotDiff(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot B: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshotB, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot B: %s", err) } // Calculate diff - diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection()) + diff, err := snapshotA.RefList().Diff(snapshotB.RefList(), collectionFactory.PackageCollection(), nil) if err != nil { return fmt.Errorf("unable to calculate diff: %s", err) } diff --git a/cmd/snapshot_filter.go b/cmd/snapshot_filter.go index b81a9cfc..5aed03b9 100644 --- a/cmd/snapshot_filter.go +++ b/cmd/snapshot_filter.go @@ -27,7 +27,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to filter: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to filter: %s", err) } @@ -76,7 +76,7 @@ func aptlySnapshotFilter(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[1], []*deb.Snapshot{source}, result, fmt.Sprintf("Filtered '%s', query was: '%s'", source.Name, strings.Join(args[2:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_merge.go b/cmd/snapshot_merge.go index 0a319a5a..e9eb0c77 100644 --- a/cmd/snapshot_merge.go +++ b/cmd/snapshot_merge.go @@ -24,7 +24,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to load snapshot: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(sources[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(sources[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to load snapshot: %s", err) } @@ -57,7 +57,7 @@ func aptlySnapshotMerge(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromRefList(args[0], sources, result, fmt.Sprintf("Merged from sources: %s", strings.Join(sourceDescription, ", "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_pull.go b/cmd/snapshot_pull.go index 884b50ff..50e8488b 100644 --- a/cmd/snapshot_pull.go +++ b/cmd/snapshot_pull.go @@ -29,7 +29,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -40,7 +40,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to pull: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(source) + err = collectionFactory.SnapshotCollection().LoadComplete(source, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to pull: %s", err) } @@ -138,7 +138,7 @@ func aptlySnapshotPull(cmd *commander.Command, args []string) error { destination := deb.NewSnapshotFromPackageList(args[2], []*deb.Snapshot{snapshot, source}, packageList, fmt.Sprintf("Pulled into '%s' with '%s' as source, pull request was: '%s'", snapshot.Name, source.Name, strings.Join(args[3:], " "))) - err = collectionFactory.SnapshotCollection().Add(destination) + err = collectionFactory.SnapshotCollection().Add(destination, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to create snapshot: %s", err) } diff --git a/cmd/snapshot_rename.go b/cmd/snapshot_rename.go index b8ac74cf..b13b7dca 100644 --- a/cmd/snapshot_rename.go +++ b/cmd/snapshot_rename.go @@ -32,7 +32,7 @@ func aptlySnapshotRename(cmd *commander.Command, args []string) error { } snapshot.Name = newName - err = collectionFactory.SnapshotCollection().Update(snapshot) + err = collectionFactory.SnapshotCollection().Update(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to rename: %s", err) } diff --git a/cmd/snapshot_search.go b/cmd/snapshot_search.go index d771af7c..c0244f11 100644 --- a/cmd/snapshot_search.go +++ b/cmd/snapshot_search.go @@ -25,7 +25,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error command := cmd.Parent.Name() collectionFactory := context.NewCollectionFactory() - var reflist *deb.PackageRefList + var reflist *deb.SplitRefList if command == "snapshot" { // nolint: goconst var snapshot *deb.Snapshot @@ -34,7 +34,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -47,7 +47,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.RemoteRepoCollection().LoadComplete(repo) + err = collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } @@ -60,7 +60,7 @@ func aptlySnapshotMirrorRepoSearch(cmd *commander.Command, args []string) error return fmt.Errorf("unable to search: %s", err) } - err = collectionFactory.LocalRepoCollection().LoadComplete(repo) + err = collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to search: %s", err) } diff --git a/cmd/snapshot_show.go b/cmd/snapshot_show.go index e03a49e5..582b6a9e 100644 --- a/cmd/snapshot_show.go +++ b/cmd/snapshot_show.go @@ -35,7 +35,7 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { return fmt.Errorf("unable to show: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -86,16 +86,17 @@ func aptlySnapshotShowTxt(_ *commander.Command, args []string) error { } func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { + collectionFactory := context.NewCollectionFactory() var err error name := args[0] - snapshot, err := context.NewCollectionFactory().SnapshotCollection().ByName(name) + snapshot, err := collectionFactory.SnapshotCollection().ByName(name) if err != nil { return fmt.Errorf("unable to show: %s", err) } - err = context.NewCollectionFactory().SnapshotCollection().LoadComplete(snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to show: %s", err) } @@ -105,14 +106,14 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { for _, sourceID := range snapshot.SourceIDs { if snapshot.SourceKind == deb.SourceSnapshot { var source *deb.Snapshot - source, err = context.NewCollectionFactory().SnapshotCollection().ByUUID(sourceID) + source, err = collectionFactory.SnapshotCollection().ByUUID(sourceID) if err != nil { continue } snapshot.Snapshots = append(snapshot.Snapshots, source) } else if snapshot.SourceKind == deb.SourceLocalRepo { var source *deb.LocalRepo - source, err = context.NewCollectionFactory().LocalRepoCollection().ByUUID(sourceID) + source, err = collectionFactory.LocalRepoCollection().ByUUID(sourceID) if err != nil { continue } @@ -133,7 +134,7 @@ func aptlySnapshotShowJSON(_ *commander.Command, args []string) error { if withPackages { if snapshot.RefList() != nil { var list *deb.PackageList - list, err = deb.NewPackageListFromRefList(snapshot.RefList(), context.NewCollectionFactory().PackageCollection(), context.Progress()) + list, err = deb.NewPackageListFromRefList(snapshot.RefList(), collectionFactory.PackageCollection(), context.Progress()) if err != nil { return fmt.Errorf("unable to get package list: %s", err) } diff --git a/cmd/snapshot_verify.go b/cmd/snapshot_verify.go index f815f29c..fc566aae 100644 --- a/cmd/snapshot_verify.go +++ b/cmd/snapshot_verify.go @@ -23,7 +23,7 @@ func aptlySnapshotVerify(cmd *commander.Command, args []string) error { return fmt.Errorf("unable to verify: %s", err) } - err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i]) + err = collectionFactory.SnapshotCollection().LoadComplete(snapshots[i], collectionFactory.RefListCollection()) if err != nil { return fmt.Errorf("unable to verify: %s", err) } diff --git a/database/database.go b/database/database.go index 709a1aa8..03d896b2 100644 --- a/database/database.go +++ b/database/database.go @@ -48,6 +48,8 @@ type Storage interface { CreateTemporary() (Storage, error) + GetRecommendedMaxKVSize() int + Open() error Close() error CompactDB() error diff --git a/database/goleveldb/database.go b/database/goleveldb/database.go index a2874a6e..011681a6 100644 --- a/database/goleveldb/database.go +++ b/database/goleveldb/database.go @@ -9,10 +9,13 @@ import ( "github.com/aptly-dev/aptly/database" ) +const blockSize = 4 * 1024 + func internalOpen(path string, throttleCompaction bool) (*leveldb.DB, error) { o := &opt.Options{ Filter: filter.NewBloomFilter(10), OpenFilesCacheCapacity: 256, + BlockSize: blockSize, } if throttleCompaction { diff --git a/database/goleveldb/storage.go b/database/goleveldb/storage.go index 37acf3d8..1281f3fb 100644 --- a/database/goleveldb/storage.go +++ b/database/goleveldb/storage.go @@ -16,6 +16,17 @@ type storage struct { db *leveldb.DB } +func (s *storage) GetRecommendedMaxKVSize() int { + // The block size configured is not actually a *set* block size, but rather a + // *minimum*. LevelDB only checks if a block is full after a new key/value pair is + // written, meaning that blocks will tend to overflow a bit. + // Therefore, using the default block size as the max value size will ensure + // that a new block will only contain a single value and that the size will + // only ever be as large as around double the block size (if the block was + // nearly full before the new items were added). + return blockSize +} + // CreateTemporary creates new DB of the same type in temp dir func (s *storage) CreateTemporary() (database.Storage, error) { tempdir, err := os.MkdirTemp("", "aptly") diff --git a/deb/changes.go b/deb/changes.go index c264986a..6c8bf881 100644 --- a/deb/changes.go +++ b/deb/changes.go @@ -291,7 +291,8 @@ func CollectChangesFiles(locations []string, reporter aptly.ResultReporter) (cha // ImportChangesFiles imports referenced files in changes files into local repository func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, acceptUnsigned, ignoreSignatures, forceReplace, noRemoveFiles bool, verifier pgp.Verifier, repoTemplate *template.Template, progress aptly.Progress, localRepoCollection *LocalRepoCollection, packageCollection *PackageCollection, - pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { + reflistCollection *RefListCollection, pool aptly.PackagePool, checksumStorageProvider aptly.ChecksumStorageProvider, uploaders *Uploaders, + parseQuery parseQuery) (processedFiles []string, failedFiles []string, err error) { for _, path := range changesFiles { var changes *Changes @@ -359,7 +360,7 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac } } - err = localRepoCollection.LoadComplete(repo) + err = localRepoCollection.LoadComplete(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to load repo: %s", err) } @@ -382,9 +383,9 @@ func ImportChangesFiles(changesFiles []string, reporter aptly.ResultReporter, ac return nil, nil, fmt.Errorf("unable to import package files: %s", err) } - repo.UpdateRefList(NewPackageRefListFromPackageList(list)) + repo.UpdateRefList(NewSplitRefListFromPackageList(list)) - err = localRepoCollection.Update(repo) + err = localRepoCollection.Update(repo, reflistCollection) if err != nil { return nil, nil, fmt.Errorf("unable to save: %s", err) } diff --git a/deb/changes_test.go b/deb/changes_test.go index b7dc4d95..1d50d610 100644 --- a/deb/changes_test.go +++ b/deb/changes_test.go @@ -21,6 +21,7 @@ type ChangesSuite struct { db database.Storage localRepoCollection *LocalRepoCollection packageCollection *PackageCollection + reflistCollection *RefListCollection packagePool aptly.PackagePool checksumStorage aptly.ChecksumStorage progress aptly.Progress @@ -42,6 +43,7 @@ func (s *ChangesSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.localRepoCollection = NewLocalRepoCollection(s.db) s.packageCollection = NewPackageCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.checksumStorage = files.NewMockChecksumStorage() s.packagePool = files.NewPackagePool(s.Dir, false) @@ -88,7 +90,7 @@ func (s *ChangesSuite) TestCollectChangesFiles(c *C) { func (s *ChangesSuite) TestImportChangesFiles(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) origFailedFiles := []string{ "testdata/changes/calamares.changes", @@ -124,7 +126,8 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { processedFiles, failedFiles, err := ImportChangesFiles( append(changesFiles, "testdata/changes/notexistent.changes"), s.Reporter, true, true, false, false, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, DeepEquals, append(expectedFailedFiles, "testdata/changes/notexistent.changes")) @@ -133,7 +136,7 @@ func (s *ChangesSuite) TestImportChangesFiles(c *C) { func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { repo := NewLocalRepo("test", "Test Comment") - c.Assert(s.localRepoCollection.Add(repo), IsNil) + c.Assert(s.localRepoCollection.Add(repo, s.reflistCollection), IsNil) changesFiles, failedFiles := CollectChangesFiles( []string{"testdata/dbgsym-with-source-version"}, s.Reporter) @@ -142,7 +145,8 @@ func (s *ChangesSuite) TestImportDbgsymWithVersionedSourceField(c *C) { _, failedFiles, err := ImportChangesFiles( changesFiles, s.Reporter, true, true, false, true, &NullVerifier{}, - template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.packagePool, func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, + template.Must(template.New("test").Parse("test")), s.progress, s.localRepoCollection, s.packageCollection, s.reflistCollection, s.packagePool, + func(database.ReaderWriter) aptly.ChecksumStorage { return s.checksumStorage }, nil, nil) c.Assert(err, IsNil) c.Check(failedFiles, IsNil) diff --git a/deb/collections.go b/deb/collections.go index 7dfe8523..ff711e26 100644 --- a/deb/collections.go +++ b/deb/collections.go @@ -16,6 +16,7 @@ type CollectionFactory struct { snapshots *SnapshotCollection localRepos *LocalRepoCollection publishedRepos *PublishedRepoCollection + reflists *RefListCollection checksums *ChecksumCollection } @@ -91,6 +92,17 @@ func (factory *CollectionFactory) PublishedRepoCollection() *PublishedRepoCollec return factory.publishedRepos } +func (factory *CollectionFactory) RefListCollection() *RefListCollection { + factory.Lock() + defer factory.Unlock() + + if factory.reflists == nil { + factory.reflists = NewRefListCollection(factory.db) + } + + return factory.reflists +} + // ChecksumCollection returns (or creates) new ChecksumCollection func (factory *CollectionFactory) ChecksumCollection(db database.ReaderWriter) aptly.ChecksumStorage { factory.Lock() diff --git a/deb/graph.go b/deb/graph.go index 16a7ce85..77421c3a 100644 --- a/deb/graph.go +++ b/deb/graph.go @@ -33,7 +33,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz existingNodes := map[string]bool{} err = collectionFactory.RemoteRepoCollection().ForEach(func(repo *RemoteRepo) error { - e := collectionFactory.RemoteRepoCollection().LoadComplete(repo) + e := collectionFactory.RemoteRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -55,7 +55,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz } err = collectionFactory.LocalRepoCollection().ForEach(func(repo *LocalRepo) error { - e := collectionFactory.LocalRepoCollection().LoadComplete(repo) + e := collectionFactory.LocalRepoCollection().LoadComplete(repo, collectionFactory.RefListCollection()) if e != nil { return e } @@ -81,7 +81,7 @@ func BuildGraph(collectionFactory *CollectionFactory, layout string) (gographviz }) err = collectionFactory.SnapshotCollection().ForEach(func(snapshot *Snapshot) error { - e := collectionFactory.SnapshotCollection().LoadComplete(snapshot) + e := collectionFactory.SnapshotCollection().LoadComplete(snapshot, collectionFactory.RefListCollection()) if e != nil { return e } diff --git a/deb/list.go b/deb/list.go index ec14bb98..e59390f6 100644 --- a/deb/list.go +++ b/deb/list.go @@ -90,7 +90,7 @@ func NewPackageListWithDuplicates(duplicates bool, capacity int) *PackageList { } // NewPackageListFromRefList loads packages list from PackageRefList -func NewPackageListFromRefList(reflist *PackageRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { +func NewPackageListFromRefList(reflist AnyRefList, collection *PackageCollection, progress aptly.Progress) (*PackageList, error) { // empty reflist if reflist == nil { return NewPackageList(), nil diff --git a/deb/local.go b/deb/local.go index 2a15c734..acae4664 100644 --- a/deb/local.go +++ b/deb/local.go @@ -26,7 +26,7 @@ type LocalRepo struct { // Uploaders configuration Uploaders *Uploaders `codec:"Uploaders,omitempty" json:"-"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewLocalRepo creates new instance of Debian local repository @@ -55,13 +55,13 @@ func (repo *LocalRepo) NumPackages() int { } // RefList returns package list for repo -func (repo *LocalRepo) RefList() *PackageRefList { +func (repo *LocalRepo) RefList() *SplitRefList { return repo.packageRefs } // UpdateRefList changes package list for local repo -func (repo *LocalRepo) UpdateRefList(reflist *PackageRefList) { - repo.packageRefs = reflist +func (repo *LocalRepo) UpdateRefList(sl *SplitRefList) { + repo.packageRefs = sl } // Encode does msgpack encoding of LocalRepo @@ -140,14 +140,14 @@ func (collection *LocalRepoCollection) search(filter func(*LocalRepo) bool, uniq } // Add appends new repo to collection and saves it -func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Add(repo *LocalRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("local repo with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -157,27 +157,25 @@ func (collection *LocalRepoCollection) Add(repo *LocalRepo) error { } // Update stores updated information about repo in DB -func (collection *LocalRepoCollection) Update(repo *LocalRepo) error { +func (collection *LocalRepoCollection) Update(repo *LocalRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + bc := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), bc) } return batch.Write() } // LoadComplete loads additional information for local repo -func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/local_test.go b/deb/local_test.go index c9072b7d..b87b1b62 100644 --- a/deb/local_test.go +++ b/deb/local_test.go @@ -12,7 +12,7 @@ import ( type LocalRepoSuite struct { db database.Storage list *PackageList - reflist *PackageRefList + reflist *SplitRefList repo *LocalRepo } @@ -24,7 +24,7 @@ func (s *LocalRepoSuite) SetUpTest(c *C) { s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) s.repo = NewLocalRepo("lrepo", "Super repo") s.repo.packageRefs = s.reflist @@ -75,10 +75,11 @@ func (s *LocalRepoSuite) TestRefKey(c *C) { } type LocalRepoCollectionSuite struct { - db database.Storage - collection *LocalRepoCollection - list *PackageList - reflist *PackageRefList + db database.Storage + collection *LocalRepoCollection + reflistCollection *RefListCollection + list *PackageList + reflist *SplitRefList } var _ = Suite(&LocalRepoCollectionSuite{}) @@ -86,12 +87,13 @@ var _ = Suite(&LocalRepoCollectionSuite{}) func (s *LocalRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewLocalRepoCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.list = NewPackageList() s.list.Add(&Package{Name: "lib", Version: "1.7", Architecture: "i386"}) s.list.Add(&Package{Name: "app", Version: "1.9", Architecture: "amd64"}) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromRefList(NewPackageRefListFromPackageList(s.list)) } func (s *LocalRepoCollectionSuite) TearDownTest(c *C) { @@ -103,8 +105,8 @@ func (s *LocalRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("local1") c.Assert(err, IsNil) @@ -121,7 +123,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -135,7 +137,7 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) { func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo := NewLocalRepo("local1", "Comment 1") - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection := NewLocalRepoCollection(s.db) r, err := collection.ByName("local1") @@ -143,20 +145,20 @@ func (s *LocalRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.reflistCollection), IsNil) collection = NewLocalRepoCollection(s.db) r, err = collection.ByName("local1") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.reflistCollection), IsNil) c.Assert(r.NumPackages(), Equals, 2) } func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { repo := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo) + s.collection.Add(repo, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*LocalRepo) error { @@ -178,10 +180,10 @@ func (s *LocalRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *LocalRepoCollectionSuite) TestDrop(c *C) { repo1 := NewLocalRepo("local1", "Comment 1") - s.collection.Add(repo1) + s.collection.Add(repo1, s.reflistCollection) repo2 := NewLocalRepo("local2", "Comment 2") - s.collection.Add(repo2) + s.collection.Add(repo2, s.reflistCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/publish.go b/deb/publish.go index b1401d8f..c929c6ec 100644 --- a/deb/publish.go +++ b/deb/publish.go @@ -27,7 +27,7 @@ type repoSourceItem struct { // Pointer to local repo if SourceKind == "local" localRepo *LocalRepo // Package references is SourceKind == "local" - packageRefs *PackageRefList + packageRefs *SplitRefList } // PublishedRepo is a published for http/ftp representation of snapshot as Debian repository @@ -397,7 +397,7 @@ func (p *PublishedRepo) RefKey(component string) []byte { } // RefList returns list of package refs in local repo -func (p *PublishedRepo) RefList(component string) *PackageRefList { +func (p *PublishedRepo) RefList(component string) *SplitRefList { item := p.sourceItems[component] if p.SourceKind == SourceLocalRepo { return item.packageRefs @@ -944,14 +944,14 @@ func (collection *PublishedRepoCollection) loadList() { } // Add appends new repo to collection and saves it -func (collection *PublishedRepoCollection) Add(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Add(repo *PublishedRepo, reflistCollection *RefListCollection) error { collection.loadList() if collection.CheckDuplicate(repo) != nil { return fmt.Errorf("published repo with storage/prefix/distribution %s/%s/%s already exists", repo.Storage, repo.Prefix, repo.Distribution) } - err := collection.Update(repo) + err := collection.Update(repo, reflistCollection) if err != nil { return err } @@ -974,13 +974,14 @@ func (collection *PublishedRepoCollection) CheckDuplicate(repo *PublishedRepo) * } // Update stores updated information about repo in DB -func (collection *PublishedRepoCollection) Update(repo *PublishedRepo) error { +func (collection *PublishedRepoCollection) Update(repo *PublishedRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.SourceKind == SourceLocalRepo { + rb := reflistCollection.NewBatch(batch) for component, item := range repo.sourceItems { - batch.Put(repo.RefKey(component), item.packageRefs.Encode()) + reflistCollection.UpdateInBatch(item.packageRefs, repo.RefKey(component), rb) } } return batch.Write() @@ -1013,7 +1014,7 @@ func (collection *PublishedRepoCollection) LoadShallow(repo *PublishedRepo, coll return } - item.packageRefs = &PackageRefList{} + item.packageRefs = NewSplitRefList() repo.sourceItems[component] = item } } else { @@ -1029,35 +1030,29 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col if repo.SourceKind == SourceSnapshot { for _, item := range repo.sourceItems { - err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot) + err = collectionFactory.SnapshotCollection().LoadComplete(item.snapshot, collectionFactory.RefListCollection()) if err != nil { return } } } else if repo.SourceKind == SourceLocalRepo { for component, item := range repo.sourceItems { - err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo) + err = collectionFactory.LocalRepoCollection().LoadComplete(item.localRepo, collectionFactory.RefListCollection()) if err != nil { return } - var encoded []byte - encoded, err = collection.db.Get(repo.RefKey(component)) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey(component)) if err != nil { // < 0.6 saving w/o component name if err == database.ErrNotFound && len(repo.Sources) == 1 { - encoded, err = collection.db.Get(repo.RefKey("")) + err = collectionFactory.RefListCollection().LoadComplete(item.packageRefs, repo.RefKey("")) } if err != nil { return } } - - err = item.packageRefs.Decode(encoded) - if err != nil { - return - } } } else { panic("unknown SourceKind") @@ -1162,6 +1157,11 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix referencedFiles := map[string][]string{} processedComponentRefs := map[string]*PackageRefList{} + processedComponentBuckets := map[string]*RefListDigestSet{} + for _, component := range components { + processedComponentBuckets[component] = NewRefListDigestSet() + } + for _, r := range collection.list { if r.Prefix == prefix { matches := false @@ -1185,36 +1185,51 @@ func (collection *PublishedRepoCollection) listReferencedFilesByComponent(prefix for _, component := range components { if utils.StrSliceHasItem(repoComponents, component) { - unseenRefs := r.RefList(component) - processedRefs := processedComponentRefs[component] - if processedRefs != nil { - unseenRefs = unseenRefs.Subtract(processedRefs) - } else { - processedRefs = NewPackageRefList() - } + processedBuckets := processedComponentBuckets[component] - if unseenRefs.Len() == 0 { - continue - } - processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) + err := r.RefList(component).ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + if processedBuckets.Has(digest) { + return nil + } + processedBuckets.Add(digest) + + unseenRefs := bucket + processedRefs := processedComponentRefs[component] + if processedRefs != nil { + unseenRefs = unseenRefs.Subtract(processedRefs) + } else { + processedRefs = NewPackageRefList() + } - packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) - if err != nil { - return nil, err - } + if unseenRefs.Len() == 0 { + return nil + } + processedComponentRefs[component] = processedRefs.Merge(unseenRefs, false, true) - packageList.ForEach(func(p *Package) error { - poolDir, err := p.PoolDirectory() + packageList, err := NewPackageListFromRefList(unseenRefs, collectionFactory.PackageCollection(), progress) if err != nil { return err } - for _, f := range p.Files() { - referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) - } + packageList.ForEach(func(p *Package) error { + poolDir, err := p.PoolDirectory() + if err != nil { + return err + } + + for _, f := range p.Files() { + referencedFiles[component] = append(referencedFiles[component], filepath.Join(poolDir, f.Filename)) + } + + return nil + }) return nil }) + + if err != nil { + return nil, err + } } } } diff --git a/deb/publish_bench_test.go b/deb/publish_bench_test.go index b135a8b8..f1d87e1e 100644 --- a/deb/publish_bench_test.go +++ b/deb/publish_bench_test.go @@ -31,6 +31,7 @@ func BenchmarkListReferencedFiles(b *testing.B) { packageCollection := factory.PackageCollection() repoCollection := factory.LocalRepoCollection() publishCollection := factory.PublishedRepoCollection() + reflistCollection := factory.RefListCollection() sharedRefs := NewPackageRefList() { @@ -91,14 +92,14 @@ func BenchmarkListReferencedFiles(b *testing.B) { repo := NewLocalRepo(fmt.Sprintf("repo%d", repoIndex), "comment") repo.DefaultDistribution = fmt.Sprintf("dist%d", repoIndex) repo.DefaultComponent = defaultComponent - repo.UpdateRefList(refs.Merge(sharedRefs, false, true)) - repoCollection.Add(repo) + repo.UpdateRefList(NewSplitRefListFromRefList(refs.Merge(sharedRefs, false, true))) + repoCollection.Add(repo, reflistCollection) publish, err := NewPublishedRepo("", "test", "", nil, []string{defaultComponent}, []interface{}{repo}, factory) if err != nil { b.Fatal(err) } - publishCollection.Add(publish) + publishCollection.Add(publish, reflistCollection) } db.CompactDB() diff --git a/deb/publish_test.go b/deb/publish_test.go index 15e3be92..8d0d7ae1 100644 --- a/deb/publish_test.go +++ b/deb/publish_test.go @@ -82,6 +82,7 @@ type PublishedRepoSuite struct { db database.Storage factory *CollectionFactory packageCollection *PackageCollection + reflistCollection *RefListCollection } var _ = Suite(&PublishedRepoSuite{}) @@ -113,21 +114,22 @@ func (s *PublishedRepoSuite) SetUpTest(c *C) { s.p2.UpdateFiles(s.p1.Files()) s.p3.UpdateFiles(s.p1.Files()) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) + s.reflistCollection = s.factory.RefListCollection() repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) repo.packageRefs = s.reflist - s.factory.RemoteRepoCollection().Add(repo) + s.factory.RemoteRepoCollection().Add(repo, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") s.localRepo.packageRefs = s.reflist - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.snapshot, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot) + s.factory.SnapshotCollection().Add(s.snapshot, s.reflistCollection) s.snapshot2, _ = NewSnapshotFromRepository("snap", repo) - s.factory.SnapshotCollection().Add(s.snapshot2) + s.factory.SnapshotCollection().Add(s.snapshot2, s.reflistCollection) s.packageCollection = s.factory.PackageCollection() s.packageCollection.Update(s.p1) @@ -284,7 +286,7 @@ func (s *PublishedRepoSuite) TestDistributionComponentGuessing(c *C) { s.localRepo.DefaultDistribution = "precise" s.localRepo.DefaultComponent = "contrib" - s.factory.LocalRepoCollection().Update(s.localRepo) + s.factory.LocalRepoCollection().Update(s.localRepo, s.reflistCollection) repo, err = NewPublishedRepo("", "ppa", "", nil, []string{""}, []interface{}{s.localRepo}, s.factory) c.Check(err, IsNil) @@ -442,6 +444,7 @@ type PublishedRepoCollectionSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection snap1, snap2 *Snapshot localRepo *LocalRepo @@ -457,22 +460,23 @@ func (s *PublishedRepoCollectionSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() snap1Refs := NewPackageRefList() snap1Refs.Refs = [][]byte{s.p1.Key(""), s.p2.Key("")} sort.Sort(snap1Refs) - s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, snap1Refs, "desc1") + s.snap1 = NewSnapshotFromRefList("snap1", []*Snapshot{}, NewSplitRefListFromRefList(snap1Refs), "desc1") snap2Refs := NewPackageRefList() snap2Refs.Refs = [][]byte{s.p3.Key("")} sort.Sort(snap2Refs) - s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, snap2Refs, "desc2") + s.snap2 = NewSnapshotFromRefList("snap2", []*Snapshot{}, NewSplitRefListFromRefList(snap2Refs), "desc2") - s.snapshotCollection.Add(s.snap1) - s.snapshotCollection.Add(s.snap2) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) + s.snapshotCollection.Add(s.snap2, s.reflistCollection) s.localRepo = NewLocalRepo("local1", "comment1") - s.factory.LocalRepoCollection().Add(s.localRepo) + s.factory.LocalRepoCollection().Add(s.localRepo, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main", "contrib"}, []interface{}{s.snap2, s.snap1}, s.factory) @@ -491,14 +495,14 @@ func (s *PublishedRepoCollectionSuite) TestAddByStoragePrefixDistribution(c *C) _, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) - c.Assert(s.collection.Add(s.repo1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo2), IsNil) - c.Assert(s.collection.Add(s.repo3), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo3, s.reflistCollection), ErrorMatches, ".*already exists") c.Assert(s.collection.CheckDuplicate(s.repo3), Equals, s.repo1) - c.Assert(s.collection.Add(s.repo4), IsNil) - c.Assert(s.collection.Add(s.repo5), IsNil) + c.Assert(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.repo5, s.reflistCollection), IsNil) r, err := s.collection.ByStoragePrefixDistribution("", "ppa", "anaconda") c.Assert(err, IsNil) @@ -524,7 +528,7 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { _, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.repo1), IsNil) + c.Assert(s.collection.Add(s.repo1, s.reflistCollection), IsNil) r, err := s.collection.ByUUID(s.repo1.UUID) c.Assert(err, IsNil) @@ -535,8 +539,8 @@ func (s *PublishedRepoCollectionSuite) TestByUUID(c *C) { } func (s *PublishedRepoCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.repo1), IsNil) - c.Assert(s.collection.Update(s.repo4), IsNil) + c.Assert(s.collection.Update(s.repo1, s.reflistCollection), IsNil) + c.Assert(s.collection.Update(s.repo4, s.reflistCollection), IsNil) collection := NewPublishedRepoCollection(s.db) r, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -584,7 +588,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { encoder.Encode(&old) c.Assert(s.db.Put(s.repo1.Key(), buf.Bytes()), IsNil) - c.Assert(s.db.Put(s.repo1.RefKey(""), s.localRepo.RefList().Encode()), IsNil) + c.Assert(s.db.Put(s.repo1.RefKey(""), NewPackageRefList().Encode()), IsNil) collection := NewPublishedRepoCollection(s.db) repo, err := collection.ByStoragePrefixDistribution("", "ppa", "anaconda") @@ -599,7 +603,7 @@ func (s *PublishedRepoCollectionSuite) TestLoadPre0_6(c *C) { } func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.repo1) + s.collection.Add(s.repo1, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*PublishedRepo) error { @@ -620,17 +624,17 @@ func (s *PublishedRepoCollectionSuite) TestForEachAndLen(c *C) { } func (s *PublishedRepoCollectionSuite) TestBySnapshot(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) c.Check(s.collection.BySnapshot(s.snap1), DeepEquals, []*PublishedRepo{s.repo1, s.repo2}) c.Check(s.collection.BySnapshot(s.snap2), DeepEquals, []*PublishedRepo{s.repo2}) } func (s *PublishedRepoCollectionSuite) TestByLocalRepo(c *C) { - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepo(s.localRepo), DeepEquals, []*PublishedRepo{s.repo4, s.repo5}) } @@ -640,10 +644,10 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { c.Check(s.factory.PackageCollection().Update(s.p2), IsNil) c.Check(s.factory.PackageCollection().Update(s.p3), IsNil) - c.Check(s.collection.Add(s.repo1), IsNil) - c.Check(s.collection.Add(s.repo2), IsNil) - c.Check(s.collection.Add(s.repo4), IsNil) - c.Check(s.collection.Add(s.repo5), IsNil) + c.Check(s.collection.Add(s.repo1, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo2, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo4, s.reflistCollection), IsNil) + c.Check(s.collection.Add(s.repo5, s.reflistCollection), IsNil) files, err := s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -656,12 +660,12 @@ func (s *PublishedRepoCollectionSuite) TestListReferencedFiles(c *C) { }) snap3 := NewSnapshotFromRefList("snap3", []*Snapshot{}, s.snap2.RefList(), "desc3") - s.snapshotCollection.Add(snap3) + s.snapshotCollection.Add(snap3, s.reflistCollection) // Ensure that adding a second publish point with matching files doesn't give duplicate results. repo3, err := NewPublishedRepo("", "", "anaconda-2", []string{}, []string{"main"}, []interface{}{snap3}, s.factory) c.Check(err, IsNil) - c.Check(s.collection.Add(repo3), IsNil) + c.Check(s.collection.Add(repo3, s.reflistCollection), IsNil) files, err = s.collection.listReferencedFilesByComponent(".", []string{"main", "contrib"}, s.factory, nil) c.Assert(err, IsNil) @@ -679,6 +683,7 @@ type PublishedRepoRemoveSuite struct { db database.Storage factory *CollectionFactory snapshotCollection *SnapshotCollection + reflistCollection *RefListCollection collection *PublishedRepoCollection root, root2 string provider *FakeStorageProvider @@ -694,10 +699,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.factory = NewCollectionFactory(s.db) s.snapshotCollection = s.factory.SnapshotCollection() + s.reflistCollection = s.factory.RefListCollection() s.snap1 = NewSnapshotFromPackageList("snap1", []*Snapshot{}, NewPackageList(), "desc1") - s.snapshotCollection.Add(s.snap1) + s.snapshotCollection.Add(s.snap1, s.reflistCollection) s.repo1, _ = NewPublishedRepo("", "ppa", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) s.repo2, _ = NewPublishedRepo("", "", "anaconda", []string{}, []string{"main"}, []interface{}{s.snap1}, s.factory) @@ -706,11 +712,11 @@ func (s *PublishedRepoRemoveSuite) SetUpTest(c *C) { s.repo5, _ = NewPublishedRepo("files:other", "ppa", "osminog", []string{}, []string{"contrib"}, []interface{}{s.snap1}, s.factory) s.collection = s.factory.PublishedRepoCollection() - s.collection.Add(s.repo1) - s.collection.Add(s.repo2) - s.collection.Add(s.repo3) - s.collection.Add(s.repo4) - s.collection.Add(s.repo5) + s.collection.Add(s.repo1, s.reflistCollection) + s.collection.Add(s.repo2, s.reflistCollection) + s.collection.Add(s.repo3, s.reflistCollection) + s.collection.Add(s.repo4, s.reflistCollection) + s.collection.Add(s.repo5, s.reflistCollection) s.root = c.MkDir() s.publishedStorage = files.NewPublishedStorage(s.root, "", "") diff --git a/deb/reflist.go b/deb/reflist.go index 30396548..e039de12 100644 --- a/deb/reflist.go +++ b/deb/reflist.go @@ -2,10 +2,15 @@ package deb import ( "bytes" + "crypto/sha256" + "encoding/base64" "encoding/json" + "fmt" "sort" "github.com/AlekSi/pointer" + "github.com/aptly-dev/aptly/database" + "github.com/cespare/xxhash/v2" "github.com/ugorji/go/codec" ) @@ -44,6 +49,13 @@ func NewPackageRefListFromPackageList(list *PackageList) *PackageRefList { return reflist } +func (l *PackageRefList) Clone() *PackageRefList { + clone := &PackageRefList{} + clone.Refs = make([][]byte, l.Len()) + copy(clone.Refs, l.Refs) + return clone +} + // Len returns number of refs func (l *PackageRefList) Len() int { return len(l.Refs) @@ -184,8 +196,12 @@ func (d PackageDiff) MarshalJSON() ([]byte, error) { type PackageDiffs []PackageDiff // Diff calculates difference between two reflists -func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection) (result PackageDiffs, err error) { - result = make(PackageDiffs, 0, 128) +func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } // pointer to left and right reflists il, ir := 0, 0 @@ -258,7 +274,7 @@ func (l *PackageRefList) Diff(r *PackageRefList, packageCollection *PackageColle } } - return + return result, nil } // Merge merges reflist r into current reflist. If overrideMatching, merge @@ -391,3 +407,753 @@ func (l *PackageRefList) FilterLatestRefs() { lastArch, lastName, lastVer = arch, name, ver } } + +const ( + reflistBucketCount = 1 << 6 + reflistBucketMask = reflistBucketCount - 1 +) + +type reflistDigestArray [sha256.Size]byte + +func bucketRefPrefix(ref []byte) []byte { + const maxPrefixLen = 3 + + // Cut out the arch, leaving behind the package name and subsequent info. + _, ref, _ = bytes.Cut(ref, []byte{' '}) + + // Strip off the lib prefix, so that "libxyz" and "xyz", which are likely + // to be updated together, go in the same bucket. + libPrefix := []byte("lib") + if bytes.HasPrefix(ref, libPrefix) { + ref = ref[len(libPrefix):] + } + + prefixLen := min(maxPrefixLen, len(ref)) + prefix, _, _ := bytes.Cut(ref[:prefixLen], []byte{' '}) + return prefix +} + +func bucketIdxForRef(ref []byte) int { + return int(xxhash.Sum64(bucketRefPrefix(ref))) & reflistBucketMask +} + +// SplitRefList is a list of package refs, similar to a PackageRefList. However, +// instead of storing a linear array of refs, SplitRefList splits the refs into +// PackageRefList "buckets", based on a hash of the package name inside the ref. +// Each bucket has a digest of its contents that serves as its key in the database. +// +// When serialized, a SplitRefList just becomes an array of bucket digests, and +// the buckets themselves are stored separately. Because the buckets are then +// referenced by their digests, multiple independent reflists can share buckets, +// if their buckets have matching digests. +// +// Buckets themselves may not be confirmed to a single database value; instead, +// they're split into "segments", based on the database's preferred maximum +// value size. This prevents large buckets from slowing down the database. +type SplitRefList struct { + Buckets [][]byte + + bucketRefs []*PackageRefList +} + +// NewSplitRefList creates empty SplitRefList +func NewSplitRefList() *SplitRefList { + sl := &SplitRefList{} + sl.reset() + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageRefList +func NewSplitRefListFromRefList(reflist *PackageRefList) *SplitRefList { + sl := NewSplitRefList() + sl.Replace(reflist) + return sl +} + +// NewSplitRefListFromRefList creates SplitRefList from PackageList +func NewSplitRefListFromPackageList(list *PackageList) *SplitRefList { + return NewSplitRefListFromRefList(NewPackageRefListFromPackageList(list)) +} + +func (sl *SplitRefList) reset() { + sl.Buckets = make([][]byte, reflistBucketCount) + sl.bucketRefs = make([]*PackageRefList, reflistBucketCount) +} + +// Has checks whether package is part of reflist +func (sl *SplitRefList) Has(p *Package) bool { + idx := bucketIdxForRef(p.Key("")) + if bucket := sl.bucketRefs[idx]; bucket != nil { + return bucket.Has(p) + } + return false +} + +// Len returns number of refs +func (sl *SplitRefList) Len() int { + total := 0 + for _, bucket := range sl.bucketRefs { + if bucket != nil { + total += bucket.Len() + } + } + return total +} + +func reflistDigest(l *PackageRefList) []byte { + // Different algorithms on PackageRefLists will sometimes return a nil slice + // of refs and other times return an empty slice. Regardless, they should + // both be treated identically and be given an empty digest. + if len(l.Refs) == 0 { + return nil + } + + h := sha256.New() + for _, ref := range l.Refs { + h.Write(ref) + h.Write([]byte{0}) + } + return h.Sum(nil) +} + +// Removes all the refs inside and replaces them with those in the given reflist +func (sl *SplitRefList) Replace(reflist *PackageRefList) { + sl.reset() + + for _, ref := range reflist.Refs { + idx := bucketIdxForRef(ref) + + bucket := sl.bucketRefs[idx] + if bucket == nil { + bucket = NewPackageRefList() + sl.bucketRefs[idx] = bucket + } + + bucket.Refs = append(bucket.Refs, ref) + } + + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + sort.Sort(bucket) + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Merge merges reflist r into current reflist (see PackageRefList.Merge) +func (sl *SplitRefList) Merge(r *SplitRefList, overrideMatching, ignoreConflicting bool) (result *SplitRefList) { + result = NewSplitRefList() + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket == nil && rbucket == nil { + continue + } + + if lbucket == nil { + lbucket = &empty + } else if rbucket == nil { + rbucket = &empty + } + + result.bucketRefs[idx] = lbucket.Merge(rbucket, overrideMatching, ignoreConflicting) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } + + return +} + +// Subtract returns all packages in l that are not in r +func (sl *SplitRefList) Subtract(r *SplitRefList) (result *SplitRefList) { + result = NewSplitRefList() + + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result.bucketRefs[idx] = lbucket.Subtract(rbucket) + result.Buckets[idx] = reflistDigest(result.bucketRefs[idx]) + } else { + result.bucketRefs[idx] = lbucket.Clone() + result.Buckets[idx] = sl.Buckets[idx] + } + } + } + + return +} + +// Diff calculates difference between two reflists +func (sl *SplitRefList) Diff(r *SplitRefList, packageCollection *PackageCollection, result PackageDiffs) (PackageDiffs, error) { + var err error + + if result == nil { + result = make(PackageDiffs, 0, 128) + } + + var empty PackageRefList + for idx, lbucket := range sl.bucketRefs { + rbucket := r.bucketRefs[idx] + if lbucket != nil { + if rbucket != nil { + result, err = lbucket.Diff(rbucket, packageCollection, result) + } else { + result, err = lbucket.Diff(&empty, packageCollection, result) + } + } else if rbucket != nil { + result, err = empty.Diff(rbucket, packageCollection, result) + } + + if err != nil { + return nil, err + } + } + + sort.Slice(result, func(i, j int) bool { + var ri, rj []byte + if result[i].Left != nil { + ri = result[i].Left.Key("") + } else { + ri = result[i].Right.Key("") + } + if result[j].Left != nil { + rj = result[j].Left.Key("") + } else { + rj = result[j].Right.Key("") + } + + return bytes.Compare(ri, rj) < 0 + }) + + return result, nil +} + +// FilterLatestRefs reduces a reflist to the latest of each package (see PackageRefList.FilterLatestRefs) +func (sl *SplitRefList) FilterLatestRefs() { + for idx, bucket := range sl.bucketRefs { + if bucket != nil { + bucket.FilterLatestRefs() + sl.Buckets[idx] = reflistDigest(bucket) + } + } +} + +// Flatten creates a flat PackageRefList containing all the refs in this reflist +func (sl *SplitRefList) Flatten() *PackageRefList { + reflist := NewPackageRefList() + sl.ForEach(func(ref []byte) error { + reflist.Refs = append(reflist.Refs, ref) + return nil + }) + sort.Sort(reflist) + return reflist +} + +// ForEachBucket calls handler for each bucket in list +func (sl *SplitRefList) ForEachBucket(handler func(digest []byte, bucket *PackageRefList) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := handler(digest, bucket); err != nil { + return err + } + } + } + + return nil +} + +// ForEach calls handler for each package ref in list +// +// IMPORTANT: unlike PackageRefList.ForEach, the order of handler invocations +// is *not* guaranteed to be sorted. +func (sl *SplitRefList) ForEach(handler func([]byte) error) error { + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + bucket := sl.bucketRefs[idx] + if bucket != nil { + if err := bucket.ForEach(handler); err != nil { + return err + } + } + } + + return nil +} + +// RefListDigestSet is a set of SplitRefList bucket digests +type RefListDigestSet struct { + items map[reflistDigestArray]struct{} +} + +// NewRefListDigestSet creates empty RefListDigestSet +func NewRefListDigestSet() *RefListDigestSet { + return &RefListDigestSet{items: map[reflistDigestArray]struct{}{}} +} + +// Len returns number of digests in the set +func (set *RefListDigestSet) Len() int { + return len(set.items) +} + +// ForEach calls handler for each digest in the set +func (set *RefListDigestSet) ForEach(handler func(digest []byte) error) error { + for digest := range set.items { + if err := handler(digest[:]); err != nil { + return err + } + } + + return nil +} + +// Add adds digest to set, doing nothing if the digest was already present +func (set *RefListDigestSet) Add(digest []byte) { + set.items[reflistDigestArray(digest)] = struct{}{} +} + +// AddAllInRefList adds all the bucket digests in a SplitRefList to the set +func (set *RefListDigestSet) AddAllInRefList(sl *SplitRefList) { + for _, digest := range sl.Buckets { + if len(digest) > 0 { + set.Add(digest) + } + } +} + +// Has checks whether a digest is part of set +func (set *RefListDigestSet) Has(digest []byte) bool { + _, ok := set.items[reflistDigestArray(digest)] + return ok +} + +// Remove removes a digest from set +func (set *RefListDigestSet) Remove(digest []byte) { + delete(set.items, reflistDigestArray(digest)) +} + +// RemoveAll removes all the digests in other from the current set +func (set *RefListDigestSet) RemoveAll(other *RefListDigestSet) { + for digest := range other.items { + delete(set.items, digest) + } +} + +// RefListCollection does listing, updating/adding/deleting of SplitRefLists +type RefListCollection struct { + db database.Storage + + cache map[reflistDigestArray]*PackageRefList +} + +// NewRefListCollection creates a RefListCollection +func NewRefListCollection(db database.Storage) *RefListCollection { + return &RefListCollection{db: db, cache: make(map[reflistDigestArray]*PackageRefList)} +} + +type reflistStorageFormat int + +const ( + // (legacy format) all the refs are stored inline in a single value + reflistStorageFormatInline reflistStorageFormat = iota + // the refs are split into buckets that are stored externally from the value + reflistStorageFormatSplit +) + +// NoPadding is used because all digests are the same length, so the padding +// is useless and only serves to muddy the output. +var bucketDigestEncoding = base64.StdEncoding.WithPadding(base64.NoPadding) + +func segmentPrefix(encodedDigest string) []byte { + return []byte(fmt.Sprintf("F%s-", encodedDigest)) +} + +func segmentIndexKey(prefix []byte, idx int) []byte { + // Assume most buckets won't have more than 0xFFFF = ~65k segments (which + // would be an extremely large bucket!). + return append(bytes.Clone(prefix), []byte(fmt.Sprintf("%04x", idx))...) +} + +// AllBucketDigests returns a set of all the bucket digests in the database +func (collection *RefListCollection) AllBucketDigests() (*RefListDigestSet, error) { + digests := NewRefListDigestSet() + + err := collection.db.ProcessByPrefix([]byte("F"), func(key []byte, value []byte) error { + if !bytes.HasSuffix(key, []byte("-0000")) { + // Ignore additional segments for the same digest. + return nil + } + + encodedDigest, _, foundDash := bytes.Cut(key[1:], []byte("-")) + if !foundDash { + return fmt.Errorf("invalid key: %s", string(key)) + } + digest := make([]byte, bucketDigestEncoding.DecodedLen(len(encodedDigest))) + if _, err := bucketDigestEncoding.Decode(digest, encodedDigest); err != nil { + return fmt.Errorf("decoding key %s: %w", string(key), err) + } + + digests.Add(digest) + return nil + }) + + if err != nil { + return nil, err + } + return digests, nil +} + +// UnsafeDropBucket drops the bucket associated with digest from the database, +// doing so inside batch +// +// This is considered "unsafe" because no checks are performed to ensure that +// the bucket is no longer referenced by any saved reflists. +func (collection *RefListCollection) UnsafeDropBucket(digest []byte, batch database.Batch) error { + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + return collection.db.ProcessByPrefix(prefix, func(key []byte, value []byte) error { + return batch.Delete(key) + }) +} + +func (collection *RefListCollection) load(sl *SplitRefList, key []byte) (reflistStorageFormat, error) { + sl.reset() + + data, err := collection.db.Get(key) + if err != nil { + return 0, err + } + + var splitOrInlineRefList struct { + *SplitRefList + *PackageRefList + } + handle := &codec.MsgpackHandle{} + handle.ZeroCopy = true + decoder := codec.NewDecoderBytes(data, handle) + if err := decoder.Decode(&splitOrInlineRefList); err != nil { + return 0, err + } + + if splitOrInlineRefList.SplitRefList != nil { + sl.Buckets = splitOrInlineRefList.Buckets + } else if splitOrInlineRefList.PackageRefList != nil { + sl.Replace(splitOrInlineRefList.PackageRefList) + return reflistStorageFormatInline, nil + } + + return reflistStorageFormatSplit, nil +} + +func (collection *RefListCollection) loadBuckets(sl *SplitRefList) error { + for idx := range sl.Buckets { + if sl.bucketRefs[idx] != nil { + continue + } + + var bucket *PackageRefList + + if digest := sl.Buckets[idx]; len(digest) > 0 { + cacheKey := reflistDigestArray(digest) + bucket = collection.cache[cacheKey] + if bucket == nil { + bucket = NewPackageRefList() + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + err := collection.db.ProcessByPrefix(prefix, func(digest []byte, value []byte) error { + var l PackageRefList + if err := l.Decode(append([]byte{}, value...)); err != nil { + return err + } + + bucket.Refs = append(bucket.Refs, l.Refs...) + return nil + }) + + if err != nil { + return err + } + + // The segments may not have been iterated in order, so make sure to re-sort + // here. + sort.Sort(bucket) + collection.cache[cacheKey] = bucket + } + + actualDigest := reflistDigest(bucket) + if !bytes.Equal(actualDigest, digest) { + return fmt.Errorf("corrupt reflist bucket %d: expected digest %s, got %s", + idx, + bucketDigestEncoding.EncodeToString(digest), + bucketDigestEncoding.EncodeToString(actualDigest)) + } + } + + sl.bucketRefs[idx] = bucket + } + + return nil +} + +// LoadComplete loads the reflist stored at the given key, as well as all the +// buckets referenced by a split reflist +func (collection *RefListCollection) LoadComplete(sl *SplitRefList, key []byte) error { + if _, err := collection.load(sl, key); err != nil { + return err + } + + return collection.loadBuckets(sl) +} + +// RefListBatch is a wrapper over a database.Batch that tracks already-written +// reflists to avoid writing them multiple times +// +// It is *not* safe to use the same underlying database.Batch that has already +// been given to UnsafeDropBucket. +type RefListBatch struct { + batch database.Batch + + alreadyWritten *RefListDigestSet +} + +// NewBatch creates a new RefListBatch wrapping the given database.Batch +func (collection *RefListCollection) NewBatch(batch database.Batch) *RefListBatch { + return &RefListBatch{ + batch: batch, + alreadyWritten: NewRefListDigestSet(), + } +} + +type reflistUpdateContext struct { + rb *RefListBatch + stats *RefListMigrationStats +} + +func clearSegmentRefs(reflist *PackageRefList, recommendedMaxKVSize int) { + avgRefsInSegment := recommendedMaxKVSize / 70 + reflist.Refs = make([][]byte, 0, avgRefsInSegment) +} + +func flushSegmentRefs(uctx *reflistUpdateContext, prefix []byte, segment int, reflist *PackageRefList) error { + encoded := reflist.Encode() + err := uctx.rb.batch.Put(segmentIndexKey(prefix, segment), encoded) + if err == nil && uctx.stats != nil { + uctx.stats.Segments++ + } + return err +} + +func (collection *RefListCollection) updateWithContext(sl *SplitRefList, key []byte, uctx *reflistUpdateContext) error { + if sl != nil { + recommendedMaxKVSize := collection.db.GetRecommendedMaxKVSize() + + for idx, digest := range sl.Buckets { + if len(digest) == 0 { + continue + } + + if uctx.rb.alreadyWritten.Has(digest) { + continue + } + + prefix := segmentPrefix(bucketDigestEncoding.EncodeToString(digest)) + if collection.db.HasPrefix(prefix) { + continue + } + + // All the sizing information taken from the msgpack spec: + // https://github.com/msgpack/msgpack/blob/master/spec.md + + // Assume that a segment will have [16,2^16) elements, which would + // fit into an array 16 and thus have 3 bytes of overhead. + // (A database would need a massive recommendedMaxKVSize to pass + // that limit.) + size := len(segmentIndexKey(prefix, 0)) + 3 + segment := 0 + + var reflist PackageRefList + clearSegmentRefs(&reflist, recommendedMaxKVSize) + for _, ref := range sl.bucketRefs[idx].Refs { + // In order to determine the size of the ref in the database, + // we need to know how much overhead will be added with by msgpack + // encoding. + requiredSize := len(ref) + if requiredSize < 1<<5 { + requiredSize++ + } else if requiredSize < 1<<8 { + requiredSize += 2 + } else if requiredSize < 1<<16 { + requiredSize += 3 + } else { + requiredSize += 4 + } + if size+requiredSize > recommendedMaxKVSize { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + clearSegmentRefs(&reflist, recommendedMaxKVSize) + segment++ + } + + reflist.Refs = append(reflist.Refs, ref) + size += requiredSize + } + + if len(reflist.Refs) > 0 { + if err := flushSegmentRefs(uctx, prefix, segment, &reflist); err != nil { + return err + } + } + + uctx.rb.alreadyWritten.Add(digest) + if uctx.stats != nil { + uctx.stats.Buckets++ + } + } + } + + var buf bytes.Buffer + encoder := codec.NewEncoder(&buf, &codec.MsgpackHandle{}) + encoder.Encode(sl) + err := uctx.rb.batch.Put(key, buf.Bytes()) + if err == nil && uctx.stats != nil { + uctx.stats.Reflists++ + } + return err +} + +// UpdateInBatch will save or update the SplitRefList at key, as well as save the buckets inside, +// as part of the given batch +func (collection *RefListCollection) UpdateInBatch(sl *SplitRefList, key []byte, batch *RefListBatch) error { + return collection.updateWithContext(sl, key, &reflistUpdateContext{rb: batch}) +} + +// Update will save or update the SplitRefList at key, as well as save the buckets inside +func (collection *RefListCollection) Update(sl *SplitRefList, key []byte) error { + rb := collection.NewBatch(collection.db.CreateBatch()) + err := collection.UpdateInBatch(sl, key, rb) + if err == nil { + err = rb.batch.Write() + } + return err +} + +// RefListMigrationStats counts a number of reflists, buckets, and segments +type RefListMigrationStats struct { + Reflists, Buckets, Segments int +} + +// RefListMigration wraps a RefListBatch for the purpose of migrating inline format +// reflists to split reflists +// +// Once the batch gets too large, it will automatically be flushed to the database, +// and a new batch will be created in its place. +type RefListMigration struct { + rb *RefListBatch + + dryRun bool + + // current number of reflists/buckets/segments queued in the current, unwritten batch + batchStats RefListMigrationStats + flushStats RefListMigrationStats +} + +// NewMigration creates an empty RefListMigration +func (collection *RefListCollection) NewMigration() *RefListMigration { + return &RefListMigration{} +} + +// NewMigrationDryRun creates an empty RefListMigration that will track the +// changes to make as usual but avoid actually writing to the db +func (collection *RefListCollection) NewMigrationDryRun() *RefListMigration { + return &RefListMigration{dryRun: true} +} + +// Stats returns statistics on the written values in the current migration +func (migration *RefListMigration) Stats() RefListMigrationStats { + return migration.flushStats +} + +// Flush will flush the current batch in the migration to the database +func (migration *RefListMigration) Flush() error { + if migration.batchStats.Segments > 0 { + if !migration.dryRun { + if err := migration.rb.batch.Write(); err != nil { + return err + } + + // It's important that we don't clear the batch on dry runs, because + // the batch is what contains the list of already-written buckets. + // If we're not writing to the database, and we clear that list, + // duplicate "writes" will occur. + migration.rb = nil + } + + migration.flushStats.Reflists += migration.batchStats.Reflists + migration.flushStats.Buckets += migration.batchStats.Buckets + migration.flushStats.Segments += migration.batchStats.Segments + migration.batchStats = RefListMigrationStats{} + } + + return nil +} + +// LoadCompleteAndMigrate will load the reflist and its buckets as RefListCollection.LoadComplete, +// migrating any inline reflists to split ones along the way +func (collection *RefListCollection) LoadCompleteAndMigrate(sl *SplitRefList, key []byte, migration *RefListMigration) error { + // Given enough reflists, the memory used by a batch starts to become massive, so + // make sure to flush the written segments periodically. Note that this is only + // checked *after* a migration of a full bucket (and all the segments inside) + // takes place, as splitting a single bucket write into multiple batches would + // be unsafe if an interruption occurs midway. + const maxMigratorBatch = 50000 + + format, err := collection.load(sl, key) + if err != nil { + return err + } + + switch format { + case reflistStorageFormatInline: + if migration.rb == nil { + migration.rb = collection.NewBatch(collection.db.CreateBatch()) + } + + collection.updateWithContext(sl, key, &reflistUpdateContext{ + rb: migration.rb, + stats: &migration.batchStats, + }) + + if migration.batchStats.Segments > maxMigratorBatch { + if err := migration.Flush(); err != nil { + return err + } + } + + return nil + case reflistStorageFormatSplit: + return collection.loadBuckets(sl) + default: + panic(fmt.Sprintf("unexpected format %v", format)) + } +} + +// AnyRefList is implemented by both PackageRefList and SplitRefList +type AnyRefList interface { + Has(p *Package) bool + Len() int + ForEach(handler func([]byte) error) error + FilterLatestRefs() +} + +// Check interface +var ( + _ AnyRefList = (*PackageRefList)(nil) + _ AnyRefList = (*SplitRefList)(nil) +) diff --git a/deb/reflist_bench_test.go b/deb/reflist_bench_test.go index b377574c..f81a84d0 100644 --- a/deb/reflist_bench_test.go +++ b/deb/reflist_bench_test.go @@ -45,3 +45,41 @@ func BenchmarkReflistDecode(b *testing.B) { (&PackageRefList{}).Decode(data) } } + +func BenchmarkSplitRefListCreationSmall(b *testing.B) { + const count = 400 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} + +func BenchmarkSplitRefListCreationLarge(b *testing.B) { + const count = 4096 + + l := NewPackageRefList() + + for i := 0; i < count; i++ { + l.Refs = append(l.Refs, []byte(fmt.Sprintf("Pamd64 %x %d", i, i))) + } + + sort.Sort(l) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := 0; j < 8; j++ { + NewSplitRefListFromRefList(l) + } + } +} diff --git a/deb/reflist_test.go b/deb/reflist_test.go index ec7ed09f..bfc64113 100644 --- a/deb/reflist_test.go +++ b/deb/reflist_test.go @@ -1,7 +1,10 @@ package deb import ( + "bytes" + "encoding/hex" "errors" + "fmt" "github.com/aptly-dev/aptly/database/goleveldb" @@ -9,24 +12,83 @@ import ( ) type PackageRefListSuite struct { - // Simple list with "real" packages from stanzas - list *PackageList p1, p2, p3, p4, p5, p6 *Package } var _ = Suite(&PackageRefListSuite{}) -func toStrSlice(reflist *PackageRefList) (result []string) { +func verifyRefListIntegrity(c *C, rl AnyRefList) AnyRefList { + if rl, ok := rl.(*SplitRefList); ok { + for idx, bucket := range rl.bucketRefs { + if bucket == nil { + bucket = NewPackageRefList() + } + c.Check(rl.Buckets[idx], DeepEquals, reflistDigest(bucket)) + } + } + + return rl +} + +func getRefs(rl AnyRefList) (refs [][]byte) { + switch rl := rl.(type) { + case *PackageRefList: + refs = rl.Refs + case *SplitRefList: + refs = rl.Flatten().Refs + default: + panic(fmt.Sprintf("unexpected reflist type %t", rl)) + } + + // Hack so that passing getRefs-returned slices to DeepEquals won't fail given a nil + // slice and an empty slice. + if len(refs) == 0 { + refs = nil + } + return +} + +func toStrSlice(reflist AnyRefList) (result []string) { result = make([]string, reflist.Len()) - for i, r := range reflist.Refs { + for i, r := range getRefs(reflist) { result[i] = string(r) } return } -func (s *PackageRefListSuite) SetUpTest(c *C) { - s.list = NewPackageList() +type reflistFactory struct { + new func() AnyRefList + newFromRefs func(refs ...[]byte) AnyRefList + newFromPackageList func(list *PackageList) AnyRefList +} + +func forEachRefList(test func(f reflistFactory)) { + test(reflistFactory{ + new: func() AnyRefList { + return NewPackageRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return &PackageRefList{Refs: refs} + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewPackageRefListFromPackageList(list) + }, + }) + test(reflistFactory{ + new: func() AnyRefList { + return NewSplitRefList() + }, + newFromRefs: func(refs ...[]byte) AnyRefList { + return NewSplitRefListFromRefList(&PackageRefList{Refs: refs}) + }, + newFromPackageList: func(list *PackageList) AnyRefList { + return NewSplitRefListFromPackageList(list) + }, + }) +} + +func (s *PackageRefListSuite) SetUpTest(c *C) { s.p1 = NewPackageFromControlFile(packageStanza.Copy()) s.p2 = NewPackageFromControlFile(packageStanza.Copy()) stanza := packageStanza.Copy() @@ -44,346 +106,600 @@ func (s *PackageRefListSuite) SetUpTest(c *C) { } func (s *PackageRefListSuite) TestNewPackageListFromRefList(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - coll.Update(s.p1) - coll.Update(s.p3) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + coll.Update(s.p1) + coll.Update(s.p3) - reflist := NewPackageRefListFromPackageList(s.list) + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - _, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, ErrorMatches, "unable to load package with key.*") + reflist := f.newFromPackageList(list) - coll.Update(s.p5) - coll.Update(s.p6) + _, err := NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, ErrorMatches, "unable to load package with key.*") - list, err := NewPackageListFromRefList(reflist, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 4) - c.Check(list.Add(s.p4), ErrorMatches, "conflict in package.*") + coll.Update(s.p5) + coll.Update(s.p6) - list, err = NewPackageListFromRefList(nil, coll, nil) - c.Assert(err, IsNil) - c.Check(list.Len(), Equals, 0) + list, err = NewPackageListFromRefList(reflist, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 4) + c.Check(list.Add(s.p4), ErrorMatches, "conflict in package.*") + + list, err = NewPackageListFromRefList(nil, coll, nil) + c.Assert(err, IsNil) + c.Check(list.Len(), Equals, 0) + }) } func (s *PackageRefListSuite) TestNewPackageRefList(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) - - reflist := NewPackageRefListFromPackageList(s.list) - c.Assert(reflist.Len(), Equals, 4) - c.Check(reflist.Refs[0], DeepEquals, []byte(s.p1.Key(""))) - c.Check(reflist.Refs[1], DeepEquals, []byte(s.p6.Key(""))) - c.Check(reflist.Refs[2], DeepEquals, []byte(s.p5.Key(""))) - c.Check(reflist.Refs[3], DeepEquals, []byte(s.p3.Key(""))) - - reflist = NewPackageRefList() - c.Check(reflist.Len(), Equals, 0) + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) + + reflist := f.newFromPackageList(list) + verifyRefListIntegrity(c, reflist) + c.Assert(reflist.Len(), Equals, 4) + refs := getRefs(reflist) + c.Check(refs[0], DeepEquals, []byte(s.p1.Key(""))) + c.Check(refs[1], DeepEquals, []byte(s.p6.Key(""))) + c.Check(refs[2], DeepEquals, []byte(s.p5.Key(""))) + c.Check(refs[3], DeepEquals, []byte(s.p3.Key(""))) + + reflist = f.new() + c.Check(reflist.Len(), Equals, 0) + }) } -func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) +func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - reflist := NewPackageRefListFromPackageList(s.list) + reflist := f.newFromPackageList(list) - reflist2 := &PackageRefList{} - err := reflist2.Decode(reflist.Encode()) - c.Assert(err, IsNil) - c.Check(reflist2.Len(), Equals, reflist.Len()) - c.Check(reflist2.Refs, DeepEquals, reflist.Refs) -} + Len := 0 + err := reflist.ForEach(func([]byte) error { + Len++ + return nil + }) -func (s *PackageRefListSuite) TestPackageRefListForeach(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - s.list.Add(s.p6) + c.Check(Len, Equals, 4) + c.Check(err, IsNil) - reflist := NewPackageRefListFromPackageList(s.list) + e := errors.New("b") - Len := 0 - err := reflist.ForEach(func([]byte) error { - Len++ - return nil - }) - - c.Check(Len, Equals, 4) - c.Check(err, IsNil) + err = reflist.ForEach(func([]byte) error { + return e + }) - e := errors.New("b") + c.Check(err, Equals, e) + }) +} - err = reflist.ForEach(func([]byte) error { - return e +func (s *PackageRefListSuite) TestHas(c *C) { + forEachRefList(func(f reflistFactory) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + reflist := f.newFromPackageList(list) + + c.Check(reflist.Has(s.p1), Equals, true) + c.Check(reflist.Has(s.p3), Equals, true) + c.Check(reflist.Has(s.p5), Equals, true) + c.Check(reflist.Has(s.p2), Equals, true) + c.Check(reflist.Has(s.p6), Equals, false) }) +} - c.Check(err, Equals, e) +func subtractRefLists(l, r AnyRefList) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Subtract(r.(*PackageRefList)) + case *SplitRefList: + return l.Subtract(r.(*SplitRefList)) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } -func (s *PackageRefListSuite) TestHas(c *C) { - s.list.Add(s.p1) - s.list.Add(s.p3) - s.list.Add(s.p5) - reflist := NewPackageRefListFromPackageList(s.list) - - c.Check(reflist.Has(s.p1), Equals, true) - c.Check(reflist.Has(s.p3), Equals, true) - c.Check(reflist.Has(s.p5), Equals, true) - c.Check(reflist.Has(s.p2), Equals, true) - c.Check(reflist.Has(s.p6), Equals, false) +func (s *PackageRefListSuite) TestSubtract(c *C) { + forEachRefList(func(f reflistFactory) { + r1 := []byte("Pall r1") + r2 := []byte("Pall r2") + r3 := []byte("Pall r3") + r4 := []byte("Pall r4") + r5 := []byte("Pall r5") + + empty := f.newFromRefs() + l1 := f.newFromRefs(r1, r2, r3, r4) + l2 := f.newFromRefs(r1, r3) + l3 := f.newFromRefs(r2, r4) + l4 := f.newFromRefs(r4, r5) + l5 := f.newFromRefs(r1, r2, r3) + + c.Check(getRefs(subtractRefLists(l1, empty)), DeepEquals, getRefs(l1)) + c.Check(getRefs(subtractRefLists(l1, l2)), DeepEquals, getRefs(l3)) + c.Check(getRefs(subtractRefLists(l1, l3)), DeepEquals, getRefs(l2)) + c.Check(getRefs(subtractRefLists(l1, l4)), DeepEquals, getRefs(l5)) + c.Check(getRefs(subtractRefLists(empty, l1)), DeepEquals, getRefs(empty)) + c.Check(getRefs(subtractRefLists(l2, l3)), DeepEquals, getRefs(l2)) + }) } -func (s *PackageRefListSuite) TestSubstract(c *C) { - r1 := []byte("r1") - r2 := []byte("r2") - r3 := []byte("r3") - r4 := []byte("r4") - r5 := []byte("r5") - - empty := &PackageRefList{Refs: [][]byte{}} - l1 := &PackageRefList{Refs: [][]byte{r1, r2, r3, r4}} - l2 := &PackageRefList{Refs: [][]byte{r1, r3}} - l3 := &PackageRefList{Refs: [][]byte{r2, r4}} - l4 := &PackageRefList{Refs: [][]byte{r4, r5}} - l5 := &PackageRefList{Refs: [][]byte{r1, r2, r3}} - - c.Check(l1.Subtract(empty), DeepEquals, l1) - c.Check(l1.Subtract(l2), DeepEquals, l3) - c.Check(l1.Subtract(l3), DeepEquals, l2) - c.Check(l1.Subtract(l4), DeepEquals, l5) - c.Check(empty.Subtract(l1), DeepEquals, empty) - c.Check(l2.Subtract(l3), DeepEquals, l2) +func diffRefLists(l, r AnyRefList, packageCollection *PackageCollection) (PackageDiffs, error) { + switch l := l.(type) { + case *PackageRefList: + return l.Diff(r.(*PackageRefList), packageCollection, nil) + case *SplitRefList: + return l.Diff(r.(*SplitRefList), packageCollection, nil) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } } func (s *PackageRefListSuite) TestDiff(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 - } + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "xyz", Version: "3.0", Architecture: "sparc"}, //6 + } + + for _, p := range packages { + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[6]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + + diffAA, err := diffRefLists(reflistA, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffAA, HasLen, 0) + + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 4) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") + c.Check(diffAB[2].Right, IsNil) + + c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") + c.Check(diffAB[3].Right, IsNil) + + diffBA, err := diffRefLists(reflistB, reflistA, coll) + c.Check(err, IsNil) + c.Check(diffBA, HasLen, 4) + + c.Check(diffBA[0].Right, IsNil) + c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + + c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") + c.Check(diffBA[2].Left, IsNil) + + c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") + c.Check(diffBA[3].Left, IsNil) + }) +} - for _, p := range packages { - coll.Update(p) - } +func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[6]) + packages := []*Package{ + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 + } - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) + for _, p := range packages { + coll.Update(p) + } - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + listA := NewPackageList() + listA.Add(packages[0]) - diffAA, err := reflistA.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffAA, HasLen, 0) + listB := NewPackageList() + listB.Add(packages[1]) + listB.Add(packages[2]) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 4) + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + diffAB, err := diffRefLists(reflistA, reflistB, coll) + c.Check(err, IsNil) + c.Check(diffAB, HasLen, 2) + + c.Check(diffAB[0].Left, IsNil) + c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + + c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") + c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + }) +} - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") +func mergeRefLists(l, r AnyRefList, overrideMatching, ignoreConflicting bool) AnyRefList { + switch l := l.(type) { + case *PackageRefList: + return l.Merge(r.(*PackageRefList), overrideMatching, ignoreConflicting) + case *SplitRefList: + return l.Merge(r.(*SplitRefList), overrideMatching, ignoreConflicting) + default: + panic(fmt.Sprintf("unexpected reflist type %t", l)) + } +} - c.Check(diffAB[2].Left.String(), Equals, "dpkg_1.7_i386") - c.Check(diffAB[2].Right, IsNil) +func (s *PackageRefListSuite) TestMerge(c *C) { + forEachRefList(func(f reflistFactory) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + coll := NewPackageCollection(db) + + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 + {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 + {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 + {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 + {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 + {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 + {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 + {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 + {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 + {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 + } + + for _, p := range packages { + p.V06Plus = true + coll.Update(p) + } + + listA := NewPackageList() + listA.Add(packages[0]) + listA.Add(packages[1]) + listA.Add(packages[2]) + listA.Add(packages[3]) + listA.Add(packages[7]) + + listB := NewPackageList() + listB.Add(packages[0]) + listB.Add(packages[2]) + listB.Add(packages[4]) + listB.Add(packages[5]) + listB.Add(packages[6]) + + listC := NewPackageList() + listC.Add(packages[0]) + listC.Add(packages[8]) + listC.Add(packages[9]) + + reflistA := f.newFromPackageList(listA) + reflistB := f.newFromPackageList(listB) + reflistC := f.newFromPackageList(listC) + + mergeAB := mergeRefLists(reflistA, reflistB, true, false) + mergeBA := mergeRefLists(reflistB, reflistA, true, false) + mergeAC := mergeRefLists(reflistA, reflistC, true, false) + mergeBC := mergeRefLists(reflistB, reflistC, true, false) + mergeCB := mergeRefLists(reflistC, reflistB, true, false) + + verifyRefListIntegrity(c, mergeAB) + verifyRefListIntegrity(c, mergeBA) + verifyRefListIntegrity(c, mergeAC) + verifyRefListIntegrity(c, mergeBC) + verifyRefListIntegrity(c, mergeCB) + + c.Check(toStrSlice(mergeAB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBA), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeAC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBC), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + c.Check(toStrSlice(mergeCB), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) + + mergeABall := mergeRefLists(reflistA, reflistB, false, false) + mergeBAall := mergeRefLists(reflistB, reflistA, false, false) + mergeACall := mergeRefLists(reflistA, reflistC, false, false) + mergeBCall := mergeRefLists(reflistB, reflistC, false, false) + mergeCBall := mergeRefLists(reflistC, reflistB, false, false) + + verifyRefListIntegrity(c, mergeABall) + verifyRefListIntegrity(c, mergeBAall) + verifyRefListIntegrity(c, mergeACall) + verifyRefListIntegrity(c, mergeBCall) + verifyRefListIntegrity(c, mergeCBall) + + c.Check(mergeABall, DeepEquals, mergeBAall) + c.Check(toStrSlice(mergeBAall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + + c.Check(mergeBCall, Not(DeepEquals), mergeCBall) + c.Check(toStrSlice(mergeACall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) + c.Check(toStrSlice(mergeBCall), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", + "Pi386 lib 1.0 00000000"}) + + mergeBCwithConflicts := mergeRefLists(reflistB, reflistC, false, true) + c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, + []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", + "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + }) +} - c.Check(diffAB[3].Left.String(), Equals, "xyz_3.0_sparc") - c.Check(diffAB[3].Right, IsNil) +func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { + forEachRefList(func(f reflistFactory) { + packages := []*Package{ + {Name: "lib", Version: "1.0", Architecture: "i386"}, + {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, + {Name: "lib", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3", Architecture: "i386"}, + {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, + {Name: "dpkg", Version: "1.5", Architecture: "i386"}, + {Name: "dpkg", Version: "1.6", Architecture: "i386"}, + } + + rl := NewPackageList() + rl.Add(packages[0]) + rl.Add(packages[1]) + rl.Add(packages[2]) + rl.Add(packages[3]) + rl.Add(packages[4]) + rl.Add(packages[5]) + rl.Add(packages[6]) + rl.Add(packages[7]) + + result := f.newFromPackageList(rl) + result.FilterLatestRefs() + + verifyRefListIntegrity(c, result) + c.Check(toStrSlice(result), DeepEquals, + []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + }) +} - diffBA, err := reflistB.Diff(reflistA, coll) - c.Check(err, IsNil) - c.Check(diffBA, HasLen, 4) +func (s *PackageRefListSuite) TestPackageRefListEncodeDecode(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p5) + list.Add(s.p6) - c.Check(diffBA[0].Right, IsNil) - c.Check(diffBA[0].Left.String(), Equals, "app_1.1~bp2_amd64") + reflist := NewPackageRefListFromPackageList(list) - c.Check(diffBA[1].Right.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffBA[1].Left.String(), Equals, "app_1.1~bp2_i386") + reflist2 := &PackageRefList{} + err := reflist2.Decode(reflist.Encode()) + c.Assert(err, IsNil) + c.Check(reflist2.Len(), Equals, reflist.Len()) + c.Check(reflist2.Refs, DeepEquals, reflist.Refs) +} - c.Check(diffBA[2].Right.String(), Equals, "dpkg_1.7_i386") - c.Check(diffBA[2].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketPrefix(c *C) { + c.Check(bucketRefPrefix([]byte("Pall abcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pall libabcd 1.0")), DeepEquals, []byte("abc")) + c.Check(bucketRefPrefix([]byte("Pamd64 xy 1.0")), DeepEquals, []byte("xy")) +} - c.Check(diffBA[3].Right.String(), Equals, "xyz_3.0_sparc") - c.Check(diffBA[3].Left, IsNil) +func (s *PackageRefListSuite) TestRefListBucketIdx(c *C) { + c.Check(bucketIdxForRef(s.p1.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p2.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p3.Key("")), Equals, 26) + c.Check(bucketIdxForRef(s.p4.Key("")), Equals, 46) + c.Check(bucketIdxForRef(s.p5.Key("")), Equals, 4) + c.Check(bucketIdxForRef(s.p6.Key("")), Equals, 46) +} +func (s *PackageRefListSuite) TestSplitRefListBuckets(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) + + sl := NewSplitRefListFromPackageList(list) + verifyRefListIntegrity(c, sl) + + c.Check(hex.EncodeToString(sl.Buckets[4]), Equals, "7287aed32daad5d1aab4e89533bde135381d932e60548cfc00b882ca8858ae07") + c.Check(toStrSlice(sl.bucketRefs[4]), DeepEquals, []string{string(s.p5.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[26]), Equals, "f31fc28e82368b63c8be47eefc64b8e217e2e5349c7e3827b98f80536b956f6e") + c.Check(toStrSlice(sl.bucketRefs[26]), DeepEquals, []string{string(s.p3.Key(""))}) + c.Check(hex.EncodeToString(sl.Buckets[46]), Equals, "55e70286393afc5da5046d68c632d35f98bec24781ae433bd1a1069b52853367") + c.Check(toStrSlice(sl.bucketRefs[46]), DeepEquals, []string{string(s.p1.Key("")), string(s.p6.Key(""))}) } -func (s *PackageRefListSuite) TestDiffCompactsAtEnd(c *C) { - db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) +func (s *PackageRefListSuite) TestRefListDigestSet(c *C) { + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + list.Add(s.p6) - packages := []*Package{ - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //0 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //1 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //2 - } + sl := NewSplitRefListFromPackageList(list) - for _, p := range packages { - coll.Update(p) - } + set := NewRefListDigestSet() + c.Check(set.Len(), Equals, 0) - listA := NewPackageList() - listA.Add(packages[0]) + err := sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, false) + return nil + }) + c.Assert(err, IsNil) - listB := NewPackageList() - listB.Add(packages[1]) - listB.Add(packages[2]) + set.AddAllInRefList(sl) + c.Check(set.Len(), Equals, 3) - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, true) + return nil + }) + c.Assert(err, IsNil) - diffAB, err := reflistA.Diff(reflistB, coll) - c.Check(err, IsNil) - c.Check(diffAB, HasLen, 2) + firstDigest := sl.Buckets[bucketIdxForRef(s.p1.Key(""))] + set.Remove(firstDigest) + c.Check(set.Len(), Equals, 2) - c.Check(diffAB[0].Left, IsNil) - c.Check(diffAB[0].Right.String(), Equals, "app_1.1~bp2_amd64") + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set.Has(digest), Equals, !bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) - c.Check(diffAB[1].Left.String(), Equals, "app_1.1~bp1_i386") - c.Check(diffAB[1].Right.String(), Equals, "app_1.1~bp2_i386") + set2 := NewRefListDigestSet() + set2.AddAllInRefList(sl) + set2.RemoveAll(set) + + err = sl.ForEachBucket(func(digest []byte, bucket *PackageRefList) error { + c.Check(set2.Has(digest), Equals, bytes.Equal(digest, firstDigest)) + return nil + }) + c.Assert(err, IsNil) } -func (s *PackageRefListSuite) TestMerge(c *C) { +func (s *PackageRefListSuite) TestRefListCollectionLoadSave(c *C) { db, _ := goleveldb.NewOpenDB(c.MkDir()) - coll := NewPackageCollection(db) - - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, //0 - {Name: "dpkg", Version: "1.7", Architecture: "i386"}, //1 - {Name: "data", Version: "1.1~bp1", Architecture: "all"}, //2 - {Name: "app", Version: "1.1~bp1", Architecture: "i386"}, //3 - {Name: "app", Version: "1.1~bp2", Architecture: "i386"}, //4 - {Name: "app", Version: "1.1~bp2", Architecture: "amd64"}, //5 - {Name: "dpkg", Version: "1.0", Architecture: "i386"}, //6 - {Name: "xyz", Version: "1.0", Architecture: "sparc"}, //7 - {Name: "dpkg", Version: "1.0", Architecture: "i386", FilesHash: 0x34445}, //8 - {Name: "app", Version: "1.1~bp2", Architecture: "i386", FilesHash: 0x44}, //9 - } + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + err := reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(reflist)) - for _, p := range packages { - p.V06Plus = true - coll.Update(p) - } + list.Add(s.p6) + sl = NewSplitRefListFromPackageList(list) + err = reflistCollection.Update(sl, key) + c.Assert(err, IsNil) - listA := NewPackageList() - listA.Add(packages[0]) - listA.Add(packages[1]) - listA.Add(packages[2]) - listA.Add(packages[3]) - listA.Add(packages[7]) - - listB := NewPackageList() - listB.Add(packages[0]) - listB.Add(packages[2]) - listB.Add(packages[4]) - listB.Add(packages[5]) - listB.Add(packages[6]) - - listC := NewPackageList() - listC.Add(packages[0]) - listC.Add(packages[8]) - listC.Add(packages[9]) - - reflistA := NewPackageRefListFromPackageList(listA) - reflistB := NewPackageRefListFromPackageList(listB) - reflistC := NewPackageRefListFromPackageList(listC) - - mergeAB := reflistA.Merge(reflistB, true, false) - mergeBA := reflistB.Merge(reflistA, true, false) - mergeAC := reflistA.Merge(reflistC, true, false) - mergeBC := reflistB.Merge(reflistC, true, false) - mergeCB := reflistC.Merge(reflistB, true, false) - - c.Check(toStrSlice(mergeAB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBA), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeAC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBC), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) - c.Check(toStrSlice(mergeCB), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 dpkg 1.0 00000000", "Pi386 lib 1.0 00000000"}) - - mergeABall := reflistA.Merge(reflistB, false, false) - mergeBAall := reflistB.Merge(reflistA, false, false) - mergeACall := reflistA.Merge(reflistC, false, false) - mergeBCall := reflistB.Merge(reflistC, false, false) - mergeCBall := reflistC.Merge(reflistB, false, false) - - c.Check(mergeABall, DeepEquals, mergeBAall) - c.Check(toStrSlice(mergeBAall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000000", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - - c.Check(mergeBCall, Not(DeepEquals), mergeCBall) - c.Check(toStrSlice(mergeACall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pi386 app 1.1~bp1 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 dpkg 1.7 00000000", "Pi386 lib 1.0 00000000", "Psparc xyz 1.0 00000000"}) - c.Check(toStrSlice(mergeBCall), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", "Pi386 dpkg 1.0 00034445", - "Pi386 lib 1.0 00000000"}) - - mergeBCwithConflicts := reflistB.Merge(reflistC, false, true) - c.Check(toStrSlice(mergeBCwithConflicts), DeepEquals, - []string{"Pall data 1.1~bp1 00000000", "Pamd64 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000000", "Pi386 app 1.1~bp2 00000044", - "Pi386 dpkg 1.0 00000000", "Pi386 dpkg 1.0 00034445", "Pi386 lib 1.0 00000000"}) + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) } -func (s *PackageRefListSuite) TestFilterLatestRefs(c *C) { - packages := []*Package{ - {Name: "lib", Version: "1.0", Architecture: "i386"}, - {Name: "lib", Version: "1.2~bp1", Architecture: "i386"}, - {Name: "lib", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3", Architecture: "i386"}, - {Name: "dpkg", Version: "1.3~bp2", Architecture: "i386"}, - {Name: "dpkg", Version: "1.5", Architecture: "i386"}, - {Name: "dpkg", Version: "1.6", Architecture: "i386"}, - } +func (s *PackageRefListSuite) TestRefListCollectionMigrate(c *C) { + db, _ := goleveldb.NewOpenDB(c.MkDir()) + reflistCollection := NewRefListCollection(db) + packageCollection := NewPackageCollection(db) + + packageCollection.Update(s.p1) + packageCollection.Update(s.p2) + packageCollection.Update(s.p3) + packageCollection.Update(s.p4) + packageCollection.Update(s.p5) + packageCollection.Update(s.p6) + + list := NewPackageList() + list.Add(s.p1) + list.Add(s.p2) + list.Add(s.p3) + list.Add(s.p4) + list.Add(s.p5) + + key := []byte("test") + + reflist := NewPackageRefListFromPackageList(list) + db.Put(key, reflist.Encode()) + + sl := NewSplitRefList() + format, err := reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatInline) + + migrator := reflistCollection.NewMigration() + err = reflistCollection.LoadCompleteAndMigrate(sl, key, migrator) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) - rl := NewPackageList() - rl.Add(packages[0]) - rl.Add(packages[1]) - rl.Add(packages[2]) - rl.Add(packages[3]) - rl.Add(packages[4]) - rl.Add(packages[5]) - rl.Add(packages[6]) - rl.Add(packages[7]) - - result := NewPackageRefListFromPackageList(rl) - result.FilterLatestRefs() - - c.Check(toStrSlice(result), DeepEquals, - []string{"Pi386 dpkg 1.6", "Pi386 lib 1.2"}) + stats := migrator.Stats() + c.Check(stats.Reflists, Equals, 0) + c.Check(stats.Buckets, Equals, 0) + c.Check(stats.Segments, Equals, 0) + + err = migrator.Flush() + c.Assert(err, IsNil) + stats = migrator.Stats() + c.Check(stats.Reflists, Equals, 1) + c.Check(stats.Buckets, Not(Equals), 0) + c.Check(stats.Segments, Equals, stats.Segments) + + sl = NewSplitRefList() + err = reflistCollection.LoadComplete(sl, key) + c.Assert(err, IsNil) + verifyRefListIntegrity(c, sl) + c.Check(toStrSlice(sl), DeepEquals, toStrSlice(NewPackageRefListFromPackageList(list))) + + format, err = reflistCollection.load(sl, key) + c.Assert(err, IsNil) + c.Check(format, Equals, reflistStorageFormatSplit) } diff --git a/deb/remote.go b/deb/remote.go index 72deb7af..ce54ea63 100644 --- a/deb/remote.go +++ b/deb/remote.go @@ -73,7 +73,7 @@ type RemoteRepo struct { // Packages for json output Packages []string `codec:"-" json:",omitempty"` // "Snapshot" of current list of packages - packageRefs *PackageRefList + packageRefs *SplitRefList // Parsed archived root archiveRootURL *url.URL // Current list of packages (filled while updating mirror) @@ -171,7 +171,7 @@ func (repo *RemoteRepo) NumPackages() int { } // RefList returns package list for repo -func (repo *RemoteRepo) RefList() *PackageRefList { +func (repo *RemoteRepo) RefList() *SplitRefList { return repo.packageRefs } @@ -659,7 +659,7 @@ func (repo *RemoteRepo) FinalizeDownload(collectionFactory *CollectionFactory, p }) if err == nil { - repo.packageRefs = NewPackageRefListFromPackageList(repo.packageList) + repo.packageRefs = NewSplitRefListFromPackageList(repo.packageList) repo.packageList = nil } @@ -801,14 +801,14 @@ func (collection *RemoteRepoCollection) search(filter func(*RemoteRepo) bool, un } // Add appends new repo to collection and saves it -func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Add(repo *RemoteRepo, reflistCollection *RefListCollection) error { _, err := collection.ByName(repo.Name) if err == nil { return fmt.Errorf("mirror with name %s already exists", repo.Name) } - err = collection.Update(repo) + err = collection.Update(repo, reflistCollection) if err != nil { return err } @@ -818,28 +818,26 @@ func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error { } // Update stores updated information about repo in DB -func (collection *RemoteRepoCollection) Update(repo *RemoteRepo) error { +func (collection *RemoteRepoCollection) Update(repo *RemoteRepo, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(repo.Key(), repo.Encode()) if repo.packageRefs != nil { - batch.Put(repo.RefKey(), repo.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(repo.packageRefs, repo.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information for remote repo -func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo) error { - encoded, err := collection.db.Get(repo.RefKey()) +func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo, reflistCollection *RefListCollection) error { + repo.packageRefs = NewSplitRefList() + err := reflistCollection.LoadComplete(repo.packageRefs, repo.RefKey()) if err == database.ErrNotFound { return nil } - if err != nil { - return err - } - repo.packageRefs = &PackageRefList{} - return repo.packageRefs.Decode(encoded) + return err } // ByName looks up repository by name diff --git a/deb/remote_test.go b/deb/remote_test.go index c331579f..3e05ef7e 100644 --- a/deb/remote_test.go +++ b/deb/remote_test.go @@ -52,7 +52,7 @@ func (n *NullVerifier) IsClearSigned(clearsign io.Reader) (bool, error) { type PackageListMixinSuite struct { p1, p2, p3 *Package list *PackageList - reflist *PackageRefList + reflist *SplitRefList } func (s *PackageListMixinSuite) SetUpPackages() { @@ -72,7 +72,7 @@ func (s *PackageListMixinSuite) SetUpPackages() { s.list.Add(s.p2) s.list.Add(s.p3) - s.reflist = NewPackageRefListFromPackageList(s.list) + s.reflist = NewSplitRefListFromPackageList(s.list) } type RemoteRepoSuite struct { @@ -291,7 +291,7 @@ func (s *RemoteRepoSuite) TestDownload(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -373,12 +373,12 @@ func (s *RemoteRepoSuite) TestDownloadWithInstaller(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "installer") } @@ -419,12 +419,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSources(c *C) { s.repo.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.repo.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.repo.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -503,7 +503,7 @@ func (s *RemoteRepoSuite) TestDownloadFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") @@ -593,12 +593,12 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { s.flat.FinalizeDownload(s.collectionFactory, nil) c.Assert(s.flat.packageRefs, NotNil) - pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[0]) + pkg, err := s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[0]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "amanda-client") - pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Refs[1]) + pkg, err = s.collectionFactory.PackageCollection().ByKey(s.flat.packageRefs.Flatten().Refs[1]) c.Assert(err, IsNil) c.Check(pkg.Name, Equals, "access-modifier-checker") @@ -658,8 +658,9 @@ func (s *RemoteRepoSuite) TestDownloadWithSourcesFlat(c *C) { type RemoteRepoCollectionSuite struct { PackageListMixinSuite - db database.Storage - collection *RemoteRepoCollection + db database.Storage + collection *RemoteRepoCollection + refListCollection *RefListCollection } var _ = Suite(&RemoteRepoCollectionSuite{}) @@ -667,6 +668,7 @@ var _ = Suite(&RemoteRepoCollectionSuite{}) func (s *RemoteRepoCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewRemoteRepoCollection(s.db) + s.refListCollection = NewRefListCollection(s.db) s.SetUpPackages() } @@ -679,8 +681,8 @@ func (s *RemoteRepoCollectionSuite) TestAddByName(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) - c.Assert(s.collection.Add(repo), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), ErrorMatches, ".*already exists") r, err := s.collection.ByName("yandex") c.Assert(err, IsNil) @@ -697,7 +699,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { c.Assert(err, ErrorMatches, "*.not found") repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Add(repo), IsNil) + c.Assert(s.collection.Add(repo, s.refListCollection), IsNil) r, err := s.collection.ByUUID(repo.UUID) c.Assert(err, IsNil) @@ -711,7 +713,7 @@ func (s *RemoteRepoCollectionSuite) TestByUUID(c *C) { func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection := NewRemoteRepoCollection(s.db) r, err := collection.ByName("yandex") @@ -719,20 +721,20 @@ func (s *RemoteRepoCollectionSuite) TestUpdateLoadComplete(c *C) { c.Assert(r.packageRefs, IsNil) repo.packageRefs = s.reflist - c.Assert(s.collection.Update(repo), IsNil) + c.Assert(s.collection.Update(repo, s.refListCollection), IsNil) collection = NewRemoteRepoCollection(s.db) r, err = collection.ByName("yandex") c.Assert(err, IsNil) c.Assert(r.packageRefs, IsNil) c.Assert(r.NumPackages(), Equals, 0) - c.Assert(s.collection.LoadComplete(r), IsNil) + c.Assert(s.collection.LoadComplete(r, s.refListCollection), IsNil) c.Assert(r.NumPackages(), Equals, 3) } func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { repo, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo) + s.collection.Add(repo, s.refListCollection) count := 0 err := s.collection.ForEach(func(*RemoteRepo) error { @@ -754,10 +756,10 @@ func (s *RemoteRepoCollectionSuite) TestForEachAndLen(c *C) { func (s *RemoteRepoCollectionSuite) TestDrop(c *C) { repo1, _ := NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo1) + s.collection.Add(repo1, s.refListCollection) repo2, _ := NewRemoteRepo("tyndex", "http://mirror.yandex.ru/debian/", "wheezy", []string{"main"}, []string{}, false, false, false) - s.collection.Add(repo2) + s.collection.Add(repo2, s.refListCollection) r1, _ := s.collection.ByUUID(repo1.UUID) c.Check(r1, Equals, repo1) diff --git a/deb/snapshot.go b/deb/snapshot.go index f351b87f..a9a8655c 100644 --- a/deb/snapshot.go +++ b/deb/snapshot.go @@ -40,7 +40,7 @@ type Snapshot struct { NotAutomatic string ButAutomaticUpgrades string - packageRefs *PackageRefList + packageRefs *SplitRefList } // NewSnapshotFromRepository creates snapshot from current state of repository @@ -76,7 +76,7 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { } if snap.packageRefs == nil { - snap.packageRefs = NewPackageRefList() + snap.packageRefs = NewSplitRefList() } return snap, nil @@ -84,11 +84,13 @@ func NewSnapshotFromLocalRepo(name string, repo *LocalRepo) (*Snapshot, error) { // NewSnapshotFromPackageList creates snapshot from PackageList func NewSnapshotFromPackageList(name string, sources []*Snapshot, list *PackageList, description string) *Snapshot { - return NewSnapshotFromRefList(name, sources, NewPackageRefListFromPackageList(list), description) + sl := NewSplitRefList() + sl.Replace(NewPackageRefListFromPackageList(list)) + return NewSnapshotFromRefList(name, sources, sl, description) } -// NewSnapshotFromRefList creates snapshot from PackageRefList -func NewSnapshotFromRefList(name string, sources []*Snapshot, list *PackageRefList, description string) *Snapshot { +// NewSnapshotFromRefList creates snapshot from SplitRefList +func NewSnapshotFromRefList(name string, sources []*Snapshot, list *SplitRefList, description string) *Snapshot { sourceUUIDs := make([]string, len(sources)) for i := range sources { sourceUUIDs[i] = sources[i].UUID @@ -116,7 +118,7 @@ func (s *Snapshot) NumPackages() int { } // RefList returns list of package refs in snapshot -func (s *Snapshot) RefList() *PackageRefList { +func (s *Snapshot) RefList() *SplitRefList { return s.packageRefs } @@ -209,13 +211,13 @@ func NewSnapshotCollection(db database.Storage) *SnapshotCollection { } // Add appends new repo to collection and saves it -func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Add(snapshot *Snapshot, reflistCollection *RefListCollection) error { _, err := collection.ByName(snapshot.Name) if err == nil { return fmt.Errorf("snapshot with name %s already exists", snapshot.Name) } - err = collection.Update(snapshot) + err = collection.Update(snapshot, reflistCollection) if err != nil { return err } @@ -225,26 +227,22 @@ func (collection *SnapshotCollection) Add(snapshot *Snapshot) error { } // Update stores updated information about snapshot in DB -func (collection *SnapshotCollection) Update(snapshot *Snapshot) error { +func (collection *SnapshotCollection) Update(snapshot *Snapshot, reflistCollection *RefListCollection) error { batch := collection.db.CreateBatch() batch.Put(snapshot.Key(), snapshot.Encode()) if snapshot.packageRefs != nil { - batch.Put(snapshot.RefKey(), snapshot.packageRefs.Encode()) + rb := reflistCollection.NewBatch(batch) + reflistCollection.UpdateInBatch(snapshot.packageRefs, snapshot.RefKey(), rb) } return batch.Write() } // LoadComplete loads additional information about snapshot -func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot) error { - encoded, err := collection.db.Get(snapshot.RefKey()) - if err != nil { - return err - } - - snapshot.packageRefs = &PackageRefList{} - return snapshot.packageRefs.Decode(encoded) +func (collection *SnapshotCollection) LoadComplete(snapshot *Snapshot, reflistCollection *RefListCollection) error { + snapshot.packageRefs = NewSplitRefList() + return reflistCollection.LoadComplete(snapshot.packageRefs, snapshot.RefKey()) } func (collection *SnapshotCollection) search(filter func(*Snapshot) bool, unique bool) []*Snapshot { diff --git a/deb/snapshot_bench_test.go b/deb/snapshot_bench_test.go index c6bb94a2..4475ca57 100644 --- a/deb/snapshot_bench_test.go +++ b/deb/snapshot_bench_test.go @@ -18,10 +18,11 @@ func BenchmarkSnapshotCollectionForEach(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } @@ -47,11 +48,12 @@ func BenchmarkSnapshotCollectionByUUID(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) uuids := []string{} for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } uuids = append(uuids, snapshot.UUID) @@ -78,10 +80,11 @@ func BenchmarkSnapshotCollectionByName(b *testing.B) { defer db.Close() collection := NewSnapshotCollection(db) + reflistCollection := NewRefListCollection(db) for i := 0; i < count; i++ { - snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewPackageRefList(), fmt.Sprintf("Snapshot number %d", i)) - if collection.Add(snapshot) != nil { + snapshot := NewSnapshotFromRefList(fmt.Sprintf("snapshot%d", i), nil, NewSplitRefList(), fmt.Sprintf("Snapshot number %d", i)) + if collection.Add(snapshot, reflistCollection) != nil { b.FailNow() } } diff --git a/deb/snapshot_test.go b/deb/snapshot_test.go index d27c4226..805ccc8e 100644 --- a/deb/snapshot_test.go +++ b/deb/snapshot_test.go @@ -109,6 +109,7 @@ type SnapshotCollectionSuite struct { snapshot1, snapshot2 *Snapshot snapshot3, snapshot4 *Snapshot collection *SnapshotCollection + reflistCollection *RefListCollection } var _ = Suite(&SnapshotCollectionSuite{}) @@ -116,6 +117,7 @@ var _ = Suite(&SnapshotCollectionSuite{}) func (s *SnapshotCollectionSuite) SetUpTest(c *C) { s.db, _ = goleveldb.NewOpenDB(c.MkDir()) s.collection = NewSnapshotCollection(s.db) + s.reflistCollection = NewRefListCollection(s.db) s.SetUpPackages() s.repo1, _ = NewRemoteRepo("yandex", "http://mirror.yandex.ru/debian/", "squeeze", []string{"main"}, []string{}, false, false, false) @@ -143,10 +145,10 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { _, err := s.collection.ByName("snap1") c.Assert(err, ErrorMatches, "*.not found") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot1), ErrorMatches, ".*already exists") + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), ErrorMatches, ".*already exists") - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) snapshot, err := s.collection.ByName("snap1") c.Assert(err, IsNil) @@ -167,20 +169,20 @@ func (s *SnapshotCollectionSuite) TestAddByNameByUUID(c *C) { } func (s *SnapshotCollectionSuite) TestUpdateLoadComplete(c *C) { - c.Assert(s.collection.Update(s.snapshot1), IsNil) + c.Assert(s.collection.Update(s.snapshot1, s.reflistCollection), IsNil) collection := NewSnapshotCollection(s.db) snapshot, err := collection.ByName("snap1") c.Assert(err, IsNil) c.Assert(snapshot.packageRefs, IsNil) - c.Assert(s.collection.LoadComplete(snapshot), IsNil) + c.Assert(s.collection.LoadComplete(snapshot, s.reflistCollection), IsNil) c.Assert(snapshot.NumPackages(), Equals, 3) } func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) count := 0 err := s.collection.ForEach(func(*Snapshot) error { @@ -200,10 +202,10 @@ func (s *SnapshotCollectionSuite) TestForEachAndLen(c *C) { } func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { - s.collection.Add(s.snapshot2) - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot4) - s.collection.Add(s.snapshot3) + s.collection.Add(s.snapshot2, s.reflistCollection) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot4, s.reflistCollection) + s.collection.Add(s.snapshot3, s.reflistCollection) names := []string{} @@ -217,8 +219,8 @@ func (s *SnapshotCollectionSuite) TestForEachSorted(c *C) { } func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) c.Check(s.collection.ByRemoteRepoSource(s.repo1), DeepEquals, []*Snapshot{s.snapshot1}) c.Check(s.collection.ByRemoteRepoSource(s.repo2), DeepEquals, []*Snapshot{s.snapshot2}) @@ -229,10 +231,10 @@ func (s *SnapshotCollectionSuite) TestFindByRemoteRepoSource(c *C) { } func (s *SnapshotCollectionSuite) TestFindByLocalRepoSource(c *C) { - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(s.snapshot3), IsNil) - c.Assert(s.collection.Add(s.snapshot4), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot4, s.reflistCollection), IsNil) c.Check(s.collection.ByLocalRepoSource(s.lrepo1), DeepEquals, []*Snapshot{s.snapshot3}) c.Check(s.collection.ByLocalRepoSource(s.lrepo2), DeepEquals, []*Snapshot{s.snapshot4}) @@ -247,11 +249,11 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { snapshot4 := NewSnapshotFromRefList("snap4", []*Snapshot{s.snapshot1}, s.reflist, "desc2") snapshot5 := NewSnapshotFromRefList("snap5", []*Snapshot{snapshot3}, s.reflist, "desc3") - c.Assert(s.collection.Add(s.snapshot1), IsNil) - c.Assert(s.collection.Add(s.snapshot2), IsNil) - c.Assert(s.collection.Add(snapshot3), IsNil) - c.Assert(s.collection.Add(snapshot4), IsNil) - c.Assert(s.collection.Add(snapshot5), IsNil) + c.Assert(s.collection.Add(s.snapshot1, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(s.snapshot2, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot3, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot4, s.reflistCollection), IsNil) + c.Assert(s.collection.Add(snapshot5, s.reflistCollection), IsNil) list := s.collection.BySnapshotSource(s.snapshot1) sorter, _ := newSnapshotSorter("name", list) @@ -263,8 +265,8 @@ func (s *SnapshotCollectionSuite) TestFindSnapshotSource(c *C) { } func (s *SnapshotCollectionSuite) TestDrop(c *C) { - s.collection.Add(s.snapshot1) - s.collection.Add(s.snapshot2) + s.collection.Add(s.snapshot1, s.reflistCollection) + s.collection.Add(s.snapshot2, s.reflistCollection) snap, _ := s.collection.ByUUID(s.snapshot1.UUID) c.Check(snap, Equals, s.snapshot1) diff --git a/go.mod b/go.mod index e1ce930c..8b9194b7 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/DisposaBoy/JsonConfigReader v0.0.0-20201129172854-99cf318d67e7 github.com/awalterschulze/gographviz v2.0.3+incompatible github.com/cavaliergopher/grab/v3 v3.0.1 + github.com/cespare/xxhash/v2 v2.2.0 github.com/cheggaaa/pb v1.0.29 github.com/gin-gonic/gin v1.9.1 github.com/go-playground/validator/v10 v10.15.4 // indirect @@ -59,7 +60,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/sts v1.23.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bytedance/sonic v1.10.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect github.com/chenzhuoyu/iasm v0.9.0 // indirect github.com/cloudflare/circl v1.3.3 // indirect diff --git a/system/t08_db/CleanupDB10Test_gold b/system/t08_db/CleanupDB10Test_gold index 138adc29..faa25944 100644 --- a/system/t08_db/CleanupDB10Test_gold +++ b/system/t08_db/CleanupDB10Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB11Test_gold b/system/t08_db/CleanupDB11Test_gold index be3767e9..33be8b2a 100644 --- a/system/t08_db/CleanupDB11Test_gold +++ b/system/t08_db/CleanupDB11Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Split 11 reflist(s) into 510 bucket(s) (123181 segment(s)) Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -24,6 +25,7 @@ List of package keys to delete: - Pi386 gnuplot-nox 4.6.1-1~maverick2 17785995cf0f815 - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB12Test_gold b/system/t08_db/CleanupDB12Test_gold index 31da9b23..4e88abc2 100644 --- a/system/t08_db/CleanupDB12Test_gold +++ b/system/t08_db/CleanupDB12Test_gold @@ -14,6 +14,7 @@ Loading mirrors: Loading local repos: Loading snapshots: Loading published repositories: +Skipped splitting 11 reflist(s) into 510 bucket(s) (123181 segment(s)), as -dry-run has been requested. Loading list of all packages... Deleting unreferenced packages (7)... List of package keys to delete: @@ -25,6 +26,7 @@ List of package keys to delete: - Pi386 gnuplot-x11 4.6.1-1~maverick2 d42e1d0d2f23740 - Psource gnuplot 4.6.1-1~maverick2 b8cd36358f5db41f Skipped deletion, as -dry-run has been requested. +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB1Test_gold b/system/t08_db/CleanupDB1Test_gold index 138adc29..faa25944 100644 --- a/system/t08_db/CleanupDB1Test_gold +++ b/system/t08_db/CleanupDB1Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB2Test_gold b/system/t08_db/CleanupDB2Test_gold index 1f289e67..4e84de6a 100644 --- a/system/t08_db/CleanupDB2Test_gold +++ b/system/t08_db/CleanupDB2Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (73270)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB3Test_gold b/system/t08_db/CleanupDB3Test_gold index 73279e14..73c82f87 100644 --- a/system/t08_db/CleanupDB3Test_gold +++ b/system/t08_db/CleanupDB3Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB4Test_gold b/system/t08_db/CleanupDB4Test_gold index 138adc29..faa25944 100644 --- a/system/t08_db/CleanupDB4Test_gold +++ b/system/t08_db/CleanupDB4Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB5Test_gold b/system/t08_db/CleanupDB5Test_gold index 73279e14..47bba4e2 100644 --- a/system/t08_db/CleanupDB5Test_gold +++ b/system/t08_db/CleanupDB5Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (7)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (9)... diff --git a/system/t08_db/CleanupDB6Test_gold b/system/t08_db/CleanupDB6Test_gold index 138adc29..faa25944 100644 --- a/system/t08_db/CleanupDB6Test_gold +++ b/system/t08_db/CleanupDB6Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB7Test_gold b/system/t08_db/CleanupDB7Test_gold index 138adc29..faa25944 100644 --- a/system/t08_db/CleanupDB7Test_gold +++ b/system/t08_db/CleanupDB7Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... diff --git a/system/t08_db/CleanupDB8Test_gold b/system/t08_db/CleanupDB8Test_gold index f769f203..43ebe9aa 100644 --- a/system/t08_db/CleanupDB8Test_gold +++ b/system/t08_db/CleanupDB8Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (4)... +Deleting unreferenced reflist buckets (1)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (6)... diff --git a/system/t08_db/CleanupDB9Test_gold b/system/t08_db/CleanupDB9Test_gold index 138adc29..faa25944 100644 --- a/system/t08_db/CleanupDB9Test_gold +++ b/system/t08_db/CleanupDB9Test_gold @@ -1,6 +1,7 @@ Loading mirrors, local repos, snapshots and published repos... Loading list of all packages... Deleting unreferenced packages (0)... +Deleting unreferenced reflist buckets (0)... Building list of files referenced by packages... Building list of files in package pool... Deleting unreferenced files (0)... -- GitLab