rewrite: restuctured code to use func walker.NodeKeepEmptyDirectoryFunc

NodeKeepEmptyDirectoryFunc will be called when a subdirectory found to be empty
to decide how to proceed.
NodeKeepEmptyDirectoryFunc is calling the --include pattern filter functions and
returns true if the empty subdirectory is to be kept, otherwise skipped.
If the subdirectory is to be discarded, it will return an empty nodeID i
with not error flag set. This will be picked up by the caller and processed it
accordingly.

This function is fed into NewSnapshotSizeRewriter.
This commit is contained in:
Winfried Plappert 2025-02-20 13:11:10 +00:00
parent fea4cf949c
commit ec34ffe31f
3 changed files with 91 additions and 54 deletions

View File

@ -149,7 +149,7 @@ func runRepairSnapshots(ctx context.Context, gopts GlobalOptions, opts RepairOpt
func(ctx context.Context, sn *restic.Snapshot) (restic.ID, *restic.SnapshotSummary, error) {
id, err := rewriter.RewriteTree(ctx, repo, "/", *sn.Tree)
return id, nil, err
}, opts.DryRun, opts.Forget, nil, "repaired")
}, opts.DryRun, opts.Forget, nil, "repaired", false)
if err != nil {
return errors.Fatalf("unable to rewrite snapshot ID %q: %v", sn.ID().Str(), err)
}

View File

@ -29,13 +29,10 @@ you specify to exclude. All metadata (time, host, tags) will be preserved.
Alternatively you can use one of the --include variants to only include files
in the new snapshot which you want to preserve. All other files not mayching any
of your --include pattern will not be saved in the new snapshot. Empty subdirectories
however will always be preserved. Totally empty subdirectories (apart from genuine ones)
which have been completey evacuated by not including anything useful
will not be stored in the new snapshot.
If you specify an --include pattern which will not include anything useful, you will still
create a new snapshot if the original snapshot contained one or more empty subdirectories.
of your --include patterns will not be saved in the new snapshot. Empty subdirectories
however will be preserved when a filter is defined for them. Totally empty subdirectories
will not be stored in the new snapshot. If you specify an --include pattern
which will not include anything useful, the snapshot will not be modfied.
The snapshots to rewrite are specified using the --host, --tag and --path options,
or by providing a list of snapshot IDs. Please note that specifying neither any of
@ -157,10 +154,18 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti
}
var filter rewriteFilterFunc
var keepEmptyDirectory walker.NodeKeepEmptyDirectoryFunc
if len(rejectByNameFuncs) > 0 || len(includeByNameFuncs) > 0 || opts.SnapshotSummary {
rewriteNode := gatherFilters(rejectByNameFuncs, includeByNameFuncs)
rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode, len(includeByNameFuncs) > 0)
if len(includeByNameFuncs) > 0 {
keepEmptyDirectory = keepEmptyDirectoryFilter(includeByNameFuncs)
} else {
keepEmptyDirectory = func(_ string) bool {
return true
}
}
rewriter, querySize := walker.NewSnapshotSizeRewriter(rewriteNode, keepEmptyDirectory)
filter = func(ctx context.Context, sn *restic.Snapshot) (restic.ID, *restic.SnapshotSummary, error) {
id, err := rewriter.RewriteTree(ctx, repo, "/", *sn.Tree)
@ -184,11 +189,12 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti
}
return filterAndReplaceSnapshot(ctx, repo, sn,
filter, opts.DryRun, opts.Forget, metadata, "rewrite")
filter, opts.DryRun, opts.Forget, metadata, "rewrite", len(includeByNameFuncs) > 0)
}
func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *restic.Snapshot,
filter rewriteFilterFunc, dryRun bool, forget bool, newMetadata *snapshotMetadata, addTag string) (bool, error) {
filter rewriteFilterFunc, dryRun bool, forget bool, newMetadata *snapshotMetadata, addTag string,
includeFilterActive bool) (bool, error) {
wg, wgCtx := errgroup.WithContext(ctx)
repo.StartPackUploader(wgCtx, wg)
@ -210,6 +216,10 @@ func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *r
}
if filteredTree.IsNull() {
if includeFilterActive {
debug.Log("Snapshot %v not modified", sn)
return false, nil
}
if dryRun {
Verbosef("would delete empty snapshot\n")
} else {
@ -409,3 +419,26 @@ func gatherFilters(rejectByNameFuncs []filter.RejectByNameFunc, includeByNameFun
return rewriteNode
}
// helper function to keep / remove empty subdirectories for --include patterns
func keepEmptyDirectoryFilter(includeByNameFuncs []filter.IncludeByNameFunc) (keepEmptyDirectory walker.NodeKeepEmptyDirectoryFunc) {
inSelectByName := func(nodepath string) bool {
for _, include := range includeByNameFuncs {
flag1, _ := include(nodepath)
if flag1 {
return flag1
}
}
return false
}
keepEmptyDirectory = func(path string) bool {
keep := inSelectByName(path)
if keep {
Verboseff("including %s\n", path)
}
return keep
}
return keepEmptyDirectory
}

View File

@ -10,6 +10,7 @@ import (
)
type NodeRewriteFunc func(node *restic.Node, path string) *restic.Node
type NodeKeepEmptyDirectoryFunc func(path string) bool
type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (restic.ID, error)
type QueryRewrittenSizeFunc func() SnapshotSize
@ -20,13 +21,13 @@ type SnapshotSize struct {
type RewriteOpts struct {
// return nil to remove the node
RewriteNode NodeRewriteFunc
RewriteNode NodeRewriteFunc
KeepEmtpyDirectory NodeKeepEmptyDirectoryFunc
// decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail.
RewriteFailedTree FailedTreeRewriteFunc
AllowUnstableSerialization bool
DisableNodeCache bool
KeepEmptyDirecoryGlobal bool
}
type idMap map[restic.ID]restic.ID
@ -56,10 +57,15 @@ func NewTreeRewriter(opts RewriteOpts) *TreeRewriter {
return restic.ID{}, err
}
}
if rw.opts.KeepEmtpyDirectory == nil {
rw.opts.KeepEmtpyDirectory = func(_ string) bool {
return true
}
}
return rw
}
func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc, keepEmptyDirecoryGlobal bool) (*TreeRewriter, QueryRewrittenSizeFunc) {
func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc, keepEmptyDirecoryFilter NodeKeepEmptyDirectoryFunc) (*TreeRewriter, QueryRewrittenSizeFunc) {
var count uint
var size uint64
@ -73,8 +79,9 @@ func NewSnapshotSizeRewriter(rewriteNode NodeRewriteFunc, keepEmptyDirecoryGloba
return node
},
DisableNodeCache: true,
// KeepEmptyDirecoryGlobal = false will force old behaviour for --exclude variants
KeepEmptyDirecoryGlobal: keepEmptyDirecoryGlobal,
KeepEmtpyDirectory: func(path string) bool {
return keepEmptyDirecoryFilter(path)
},
})
ss := func() SnapshotSize {
@ -119,53 +126,50 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, node
tb := restic.NewTreeJSONBuilder()
countInserts := 0
// explicitely exclude empty directory - so it will be saved
if len(curTree.Nodes) > 0 {
for _, node := range curTree.Nodes {
if ctx.Err() != nil {
return restic.ID{}, ctx.Err()
}
for _, node := range curTree.Nodes {
if ctx.Err() != nil {
return restic.ID{}, ctx.Err()
}
path := path.Join(nodepath, node.Name)
node = t.opts.RewriteNode(node, path)
if node == nil {
continue
}
path := path.Join(nodepath, node.Name)
node = t.opts.RewriteNode(node, path)
if node == nil {
continue
}
if node.Type != restic.NodeTypeDir {
err = tb.AddNode(node)
if err != nil {
return restic.ID{}, err
}
countInserts++
continue
}
// treat nil as null id
var subtree restic.ID
if node.Subtree != nil {
subtree = *node.Subtree
}
newID, err := t.RewriteTree(ctx, repo, path, subtree)
if err != nil {
return restic.ID{}, err
}
// check for empty subtree condition here
if t.opts.KeepEmptyDirecoryGlobal && err == nil && newID.IsNull() {
continue
}
node.Subtree = &newID
if node.Type != restic.NodeTypeDir {
err = tb.AddNode(node)
if err != nil {
return restic.ID{}, err
}
countInserts++
continue
}
// check for empty node list
if t.opts.KeepEmptyDirecoryGlobal && countInserts == 0 {
// current subdirectory is empty - due to no includes: create condition here
return restic.ID{}, nil
// treat nil as null id
var subtree restic.ID
if node.Subtree != nil {
subtree = *node.Subtree
}
newID, err := t.RewriteTree(ctx, repo, path, subtree)
if err != nil {
return restic.ID{}, err
} else if err == nil && newID.IsNull() {
// skip empty subdirectory
continue
}
node.Subtree = &newID
err = tb.AddNode(node)
if err != nil {
return restic.ID{}, err
}
countInserts++
}
if countInserts == 0 && !t.opts.KeepEmtpyDirectory(nodepath) {
return restic.ID{}, nil
}
tree, err := tb.Finalize()