mirror of https://github.com/restic/restic.git
prune: make small pack size configureable for `prune` all changes together
cmd_prune.go: added option `--repack-smaller-than` prune.go: added field `SmallPackBytes` to `PruneOptions`, including checking and processing prune_test.go: added test `TestPruneSmall` doc/060_forget.rst: added description of enhancement changelog/unreleased/issue-5109: description of enhancement
This commit is contained in:
parent
5ddda7f5e9
commit
a2a1309fd9
|
@ -0,0 +1,8 @@
|
|||
Enhancement: Make small pack size configureable for `prune`
|
||||
|
||||
The `prune` command now supports the `--small-pack-size` option that
|
||||
allows repacking pack files smaller than the given size. It has to be used in
|
||||
conjunction with option `--repack-small`.
|
||||
|
||||
https://github.com/restic/restic/issues/5109
|
||||
https://github.com/restic/restic/pull/5183
|
|
@ -67,6 +67,9 @@ type PruneOptions struct {
|
|||
RepackCacheableOnly bool
|
||||
RepackSmall bool
|
||||
RepackUncompressed bool
|
||||
|
||||
SmallPackSize string
|
||||
SmallPackBytes uint64
|
||||
}
|
||||
|
||||
func (opts *PruneOptions) AddFlags(f *pflag.FlagSet) {
|
||||
|
@ -81,6 +84,7 @@ func (opts *PruneOptions) AddLimitedFlags(f *pflag.FlagSet) {
|
|||
f.BoolVar(&opts.RepackCacheableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable")
|
||||
f.BoolVar(&opts.RepackSmall, "repack-small", false, "repack pack files below 80% of target pack size")
|
||||
f.BoolVar(&opts.RepackUncompressed, "repack-uncompressed", false, "repack all uncompressed data")
|
||||
f.StringVar(&opts.SmallPackSize, "repack-smaller-than", "", "pack `below-limit` packfiles (allowed suffixes: k/K, m/M)")
|
||||
}
|
||||
|
||||
func verifyPruneOptions(opts *PruneOptions) error {
|
||||
|
@ -139,6 +143,20 @@ func verifyPruneOptions(opts *PruneOptions) error {
|
|||
}
|
||||
}
|
||||
|
||||
if !opts.RepackSmall && opts.SmallPackSize != "" {
|
||||
return errors.Fatal("option --repack-small and --repack-smaller-than: must be specified together")
|
||||
}
|
||||
|
||||
opts.SmallPackBytes = uint64(0)
|
||||
if opts.SmallPackSize != "" {
|
||||
size, err := ui.ParseBytes(opts.SmallPackSize)
|
||||
if err != nil {
|
||||
return errors.Fatalf("invalid number of bytes %q for --repack-smaller-than: %v", opts.SmallPackSize, err)
|
||||
}
|
||||
opts.SmallPackBytes = uint64(size)
|
||||
opts.RepackSmall = true
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -88,14 +88,14 @@ command must be run:
|
|||
searching used packs...
|
||||
collecting packs for deletion and repacking
|
||||
[0:00] 100.00% 5 / 5 packs processed
|
||||
|
||||
|
||||
to repack: 69 blobs / 1.078 MiB
|
||||
this removes: 67 blobs / 1.047 MiB
|
||||
to delete: 7 blobs / 25.726 KiB
|
||||
total prune: 74 blobs / 1.072 MiB
|
||||
remaining: 16 blobs / 38.003 KiB
|
||||
unused size after prune: 0 B (0.00% of remaining size)
|
||||
|
||||
|
||||
repacking packs
|
||||
[0:00] 100.00% 2 / 2 packs repacked
|
||||
rebuilding index
|
||||
|
@ -134,14 +134,14 @@ to ``forget``:
|
|||
searching used packs...
|
||||
collecting packs for deletion and repacking
|
||||
[0:00] 100.00% 5 / 5 packs processed
|
||||
|
||||
|
||||
to repack: 69 blobs / 1.078 MiB
|
||||
this removes 67 blobs / 1.047 MiB
|
||||
to delete: 7 blobs / 25.726 KiB
|
||||
total prune: 74 blobs / 1.072 MiB
|
||||
remaining: 16 blobs / 38.003 KiB
|
||||
unused size after prune: 0 B (0.00% of remaining size)
|
||||
|
||||
|
||||
repacking packs
|
||||
[0:00] 100.00% 2 / 2 packs repacked
|
||||
rebuilding index
|
||||
|
@ -214,7 +214,7 @@ The ``forget`` command accepts the following policy options:
|
|||
run) and these snapshots will hence not be removed.
|
||||
|
||||
.. note:: If there are not enough snapshots to keep one for each duration related
|
||||
``--keep-{within-,}*`` option, the oldest snapshot is kept additionally and
|
||||
``--keep-{within-,}*`` option, the oldest snapshot is kept additionally and
|
||||
marked as ``oldest`` in the output (e.g. ``oldest hourly snapshot``).
|
||||
|
||||
.. note:: Specifying ``--keep-tag ''`` will match untagged snapshots only.
|
||||
|
@ -331,7 +331,7 @@ kept, depending on whether one of them ends up being the same as an already kept
|
|||
snapshot. All other snapshots are removed.
|
||||
|
||||
You might want to maintain the same policy as in the example above, but have
|
||||
irregular backups. For example, the 7 snapshots specified with ``--keep-daily 7``
|
||||
irregular backups. For example, the 7 snapshots specified with ``--keep-daily 7``
|
||||
might be spread over a longer period. If what you want is to keep daily
|
||||
snapshots for the last week, weekly for the last month, monthly for the last
|
||||
year and yearly for the last 75 years, you can instead specify ``forget
|
||||
|
@ -448,13 +448,13 @@ The ``prune`` command accepts the following options:
|
|||
you want to minimize the time and bandwidth used by the ``prune``
|
||||
operation. Note that metadata will still be repacked.
|
||||
|
||||
Restic tries to repack as little data as possible while still ensuring this
|
||||
Restic tries to repack as little data as possible while still ensuring this
|
||||
limit for unused data. The default value is 5%.
|
||||
|
||||
- ``--max-repack-size size`` if set limits the total size of files to repack.
|
||||
As ``prune`` first stores all repacked files and deletes the obsolete files at the end,
|
||||
this option might be handy if you expect many files to be repacked and fear to run low
|
||||
on storage.
|
||||
on storage.
|
||||
|
||||
- ``--repack-cacheable-only`` if set to true only files which contain
|
||||
metadata and would be stored in the cache are repacked. Other pack files are
|
||||
|
@ -463,6 +463,14 @@ The ``prune`` command accepts the following options:
|
|||
your repository exceeds the value given by ``--max-unused``.
|
||||
The default value is false.
|
||||
|
||||
- ``--repack-small`` if set will repack pack files below 80% of target pack size.
|
||||
The default value is false.
|
||||
|
||||
- ``--repack-smaller-than`` in conjunction with ``--repack-small`` will repack all
|
||||
packfiles below the size of ``--repack-smaller-than``. This will allow to repack
|
||||
packfiles which initially came from a repository with a smaller ``--pack-size``
|
||||
to be compacted into larger packfiles.
|
||||
|
||||
- ``--dry-run`` only show what ``prune`` would do.
|
||||
|
||||
- ``--verbose`` increased verbosity shows additional statistics for ``prune``.
|
||||
|
|
|
@ -24,6 +24,7 @@ type PruneOptions struct {
|
|||
|
||||
MaxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
|
||||
MaxRepackBytes uint64
|
||||
SmallPackBytes uint64
|
||||
|
||||
RepackCacheableOnly bool
|
||||
RepackSmall bool
|
||||
|
@ -104,6 +105,9 @@ func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsed
|
|||
if repo.Config().Version < 2 && opts.RepackUncompressed {
|
||||
return nil, fmt.Errorf("compression requires at least repository format version 2")
|
||||
}
|
||||
if opts.SmallPackBytes > uint64(repo.packSize()) {
|
||||
return nil, fmt.Errorf("repack-smaller-than exceeds repository packsize")
|
||||
}
|
||||
|
||||
usedBlobs := index.NewAssociatedSet[uint8](repo.idx)
|
||||
err := getUsedBlobs(ctx, repo, usedBlobs)
|
||||
|
@ -326,7 +330,9 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
repoVersion := repo.Config().Version
|
||||
// only repack very small files by default
|
||||
targetPackSize := repo.packSize() / 25
|
||||
if opts.RepackSmall {
|
||||
if opts.SmallPackBytes > 0 {
|
||||
targetPackSize = uint(opts.SmallPackBytes)
|
||||
} else if opts.RepackSmall {
|
||||
// consider files with at least 80% of the target size as large enough
|
||||
targetPackSize = repo.packSize() / 5 * 4
|
||||
}
|
||||
|
@ -402,6 +408,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository,
|
|||
bar.Add(1)
|
||||
return nil
|
||||
})
|
||||
|
||||
bar.Done()
|
||||
if err != nil {
|
||||
return PrunePlan{}, err
|
||||
|
|
|
@ -2,6 +2,7 @@ package repository_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
@ -9,6 +10,7 @@ import (
|
|||
|
||||
"github.com/restic/restic/internal/checker"
|
||||
"github.com/restic/restic/internal/repository"
|
||||
"github.com/restic/restic/internal/repository/pack"
|
||||
"github.com/restic/restic/internal/restic"
|
||||
rtest "github.com/restic/restic/internal/test"
|
||||
"github.com/restic/restic/internal/ui/progress"
|
||||
|
@ -191,3 +193,85 @@ func TestPruneMaxUnusedDuplicate(t *testing.T) {
|
|||
rtest.Equals(t, rsize.Unref, uint64(0))
|
||||
rtest.Equals(t, rsize.Uncompressed, uint64(0))
|
||||
}
|
||||
|
||||
/*
|
||||
1.) create repository with packsize of 2M.
|
||||
2.) create enough data for 11 packfiles (31 packs)
|
||||
3.) run a repository.PlanPrune(...) with a packsize of 16M (current default).
|
||||
4.) run plan.Execute(...), extract plan.Stats() and check.
|
||||
5.) Check that all blobs are contained in the new packfiles.
|
||||
6.) The result should be less packfiles than before
|
||||
*/
|
||||
func TestPruneSmall(t *testing.T) {
|
||||
seed := time.Now().UnixNano()
|
||||
random := rand.New(rand.NewSource(seed))
|
||||
t.Logf("rand initialized with seed %d", seed)
|
||||
|
||||
be := repository.TestBackend(t)
|
||||
repo, _ := repository.TestRepositoryWithBackend(t, be, 0, repository.Options{PackSize: repository.MinPackSize})
|
||||
|
||||
const blobSize = 1000 * 1000
|
||||
const numBlobsCreated = 55
|
||||
|
||||
var wg errgroup.Group
|
||||
repo.StartPackUploader(context.TODO(), &wg)
|
||||
keep := restic.NewBlobSet()
|
||||
// we need a minum of 11 packfiles, each packfile will be about 5 Mb long
|
||||
for i := 0; i < numBlobsCreated; i++ {
|
||||
buf := make([]byte, blobSize)
|
||||
random.Read(buf)
|
||||
|
||||
id, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, restic.ID{}, false)
|
||||
rtest.OK(t, err)
|
||||
keep.Insert(restic.BlobHandle{Type: restic.DataBlob, ID: id})
|
||||
}
|
||||
|
||||
rtest.OK(t, repo.Flush(context.Background()))
|
||||
|
||||
// gather number of packfiles
|
||||
repoPacks, err := pack.Size(context.TODO(), repo, false)
|
||||
rtest.OK(t, err)
|
||||
lenPackfilesBefore := len(repoPacks)
|
||||
rtest.OK(t, repo.Close())
|
||||
|
||||
// and reopen repository with default packsize
|
||||
repo = repository.TestOpenBackend(t, be)
|
||||
rtest.OK(t, repo.LoadIndex(context.TODO(), nil))
|
||||
|
||||
opts := repository.PruneOptions{
|
||||
MaxRepackBytes: math.MaxUint64,
|
||||
MaxUnusedBytes: func(used uint64) (unused uint64) { return blobSize / 4 },
|
||||
SmallPackBytes: 5 * 1024 * 1024,
|
||||
RepackSmall: true,
|
||||
}
|
||||
plan, err := repository.PlanPrune(context.TODO(), opts, repo, func(ctx context.Context, repo restic.Repository, usedBlobs restic.FindBlobSet) error {
|
||||
for blob := range keep {
|
||||
usedBlobs.Insert(blob)
|
||||
}
|
||||
return nil
|
||||
}, &progress.NoopPrinter{})
|
||||
rtest.OK(t, err)
|
||||
rtest.OK(t, plan.Execute(context.TODO(), &progress.NoopPrinter{}))
|
||||
|
||||
stats := plan.Stats()
|
||||
rtest.Equals(t, stats.Size.Used/blobSize, uint64(numBlobsCreated), fmt.Sprintf("total size of blobs should be %d but is %d",
|
||||
numBlobsCreated, stats.Size.Used/blobSize))
|
||||
rtest.Equals(t, stats.Blobs.Used, stats.Blobs.Repack, "the number of blobs should be identical after a repack")
|
||||
|
||||
// repopen repository
|
||||
repo = repository.TestOpenBackend(t, be)
|
||||
checker.TestCheckRepo(t, repo, true)
|
||||
|
||||
// load all blobs
|
||||
for blob := range keep {
|
||||
_, err := repo.LoadBlob(context.TODO(), blob.Type, blob.ID, nil)
|
||||
rtest.OK(t, err)
|
||||
}
|
||||
|
||||
repoPacks, err = pack.Size(context.TODO(), repo, false)
|
||||
rtest.OK(t, err)
|
||||
lenPackfilesAfter := len(repoPacks)
|
||||
|
||||
rtest.Equals(t, lenPackfilesBefore > lenPackfilesAfter, true,
|
||||
fmt.Sprintf("the number packfiles before %d and after repack %d", lenPackfilesBefore, lenPackfilesAfter))
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue