backup: allow excluding online-only cloud files

This commit is contained in:
Michael Wildman 2024-08-08 22:48:03 +12:00 committed by Michael Eischer
parent de3acd7937
commit da71e77b28
8 changed files with 154 additions and 1 deletions

View File

@ -0,0 +1,12 @@
Enhancement: Allow excluding online-only cloud files (e.g. OneDrive)
Restic treated OneDrive Files On-Demand as though they were regular files
for the purpose of backup which caused issues with VSS, could make backup
incredibly slow (as OneDrive attempted to download files), or could fill
the source disk (e.g. 1TB of files in OneDrive on a 500GB disk).
Restic now allows the user to exclude these files when backing up with
the `--exclude-cloud-files` switch.
https://github.com/restic/restic/issues/3697
https://github.com/restic/restic/issues/4935
https://github.com/restic/restic/pull/4990

View File

@ -77,6 +77,7 @@ type BackupOptions struct {
ExcludeIfPresent []string
ExcludeCaches bool
ExcludeLargerThan string
ExcludeCloudFiles bool
Stdin bool
StdinFilename string
StdinCommand bool
@ -140,6 +141,7 @@ func init() {
f.BoolVar(&backupOptions.NoScan, "no-scan", false, "do not run scanner to estimate size of backup")
if runtime.GOOS == "windows" {
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
f.BoolVar(&backupOptions.ExcludeCloudFiles, "exclude-cloud-files", false, "excludes online-only cloud files (such as OneDrive Files On-Demand)")
}
f.BoolVar(&backupOptions.SkipIfUnchanged, "skip-if-unchanged", false, "skip snapshot creation if identical to parent snapshot")
@ -347,6 +349,17 @@ func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs [
funcs = append(funcs, f)
}
if opts.ExcludeCloudFiles && !opts.Stdin && !opts.StdinCommand {
if runtime.GOOS != "windows" {
return nil, errors.Fatalf("exclude-cloud-files is only supported on Windows")
}
f, err := archiver.RejectCloudFiles(Warnf)
if err != nil {
return nil, err
}
funcs = append(funcs, f)
}
if opts.ExcludeCaches {
opts.ExcludeIfPresent = append(opts.ExcludeIfPresent, "CACHEDIR.TAG:Signature: 8a477f597d28d172789f06886806bc55")
}

View File

@ -297,7 +297,8 @@ the exclude options are:
- ``--exclude-file`` Specified one or more times to exclude items listed in a given file
- ``--iexclude-file`` Same as ``exclude-file`` but ignores cases like in ``--iexclude``
- ``--exclude-if-present foo`` Specified one or more times to exclude a folder's content if it contains a file called ``foo`` (optionally having a given header, no wildcards for the file name supported)
- ``--exclude-larger-than size`` Specified once to excludes files larger than the given size
- ``--exclude-larger-than size`` Specified once to exclude files larger than the given size
- ``--exclude-cloud-files`` Specified once to exclude online-only cloud files (such as OneDrive Files On-Demand), currently only supported on Windows
Please see ``restic help backup`` for more specific information about each exclude option.

View File

@ -316,3 +316,21 @@ func RejectBySize(maxSize int64) (RejectFunc, error) {
return false
}, nil
}
// RejectCloudFiles returns a func which rejects files which are online-only cloud files
func RejectCloudFiles(warnf func(msg string, args ...interface{})) (RejectFunc, error) {
return func(item string, fi *fs.ExtendedFileInfo, _ fs.FS) bool {
recall, err := fi.RecallOnDataAccess()
if err != nil {
warnf("item %v: error checking online-only status: %v", item, err)
return false
}
if recall {
debug.Log("rejecting online-only cloud file %s", item)
return true
}
return false
}, nil
}

View File

@ -32,3 +32,8 @@ func extendedStat(fi os.FileInfo) *ExtendedFileInfo {
ChangeTime: time.Unix(s.Ctimespec.Unix()),
}
}
// RecallOnDataAccess checks windows-specific attributes to determine if a file is a cloud-only placeholder.
func (*ExtendedFileInfo) RecallOnDataAccess() (bool, error) {
return false, nil
}

View File

@ -32,3 +32,8 @@ func extendedStat(fi os.FileInfo) *ExtendedFileInfo {
ChangeTime: time.Unix(s.Ctim.Unix()),
}
}
// RecallOnDataAccess checks windows-specific attributes to determine if a file is a cloud-only placeholder.
func (*ExtendedFileInfo) RecallOnDataAccess() (bool, error) {
return false, nil
}

View File

@ -8,6 +8,8 @@ import (
"os"
"syscall"
"time"
"golang.org/x/sys/windows"
)
// extendedStat extracts info into an ExtendedFileInfo for Windows.
@ -36,3 +38,20 @@ func extendedStat(fi os.FileInfo) *ExtendedFileInfo {
return &extFI
}
// RecallOnDataAccess checks if a file is available locally on the disk or if the file is
// just a placeholder which must be downloaded from a remote server. This is typically used
// in cloud syncing services (e.g. OneDrive) to prevent downloading files from cloud storage
// until they are accessed.
func (fi *ExtendedFileInfo) RecallOnDataAccess() (bool, error) {
attrs, ok := fi.sys.(*syscall.Win32FileAttributeData)
if !ok {
return false, fmt.Errorf("could not determine file attributes: %s", fi.Name)
}
if attrs.FileAttributes&windows.FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS > 0 {
return true, nil
}
return false, nil
}

View File

@ -0,0 +1,80 @@
package fs_test
import (
iofs "io/fs"
"os"
"path/filepath"
"syscall"
"testing"
"time"
"github.com/restic/restic/internal/fs"
rtest "github.com/restic/restic/internal/test"
"golang.org/x/sys/windows"
)
func TestRecallOnDataAccessRealFile(t *testing.T) {
// create a temp file for testing
tempdir := rtest.TempDir(t)
filename := filepath.Join(tempdir, "regular-file")
err := os.WriteFile(filename, []byte("foobar"), 0640)
rtest.OK(t, err)
fi, err := os.Stat(filename)
rtest.OK(t, err)
xs := fs.ExtendedStat(fi)
// ensure we can check attrs without error
recall, err := xs.RecallOnDataAccess()
rtest.Assert(t, err == nil, "err should be nil", err)
rtest.Assert(t, recall == false, "RecallOnDataAccess should be false")
}
// mockFileInfo implements os.FileInfo for mocking file attributes
type mockFileInfo struct {
FileAttributes uint32
}
func (m mockFileInfo) IsDir() bool {
return false
}
func (m mockFileInfo) ModTime() time.Time {
return time.Now()
}
func (m mockFileInfo) Mode() iofs.FileMode {
return 0
}
func (m mockFileInfo) Name() string {
return "test"
}
func (m mockFileInfo) Size() int64 {
return 0
}
func (m mockFileInfo) Sys() any {
return &syscall.Win32FileAttributeData{
FileAttributes: m.FileAttributes,
}
}
func TestRecallOnDataAccessMockCloudFile(t *testing.T) {
fi := mockFileInfo{
FileAttributes: windows.FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS,
}
xs := fs.ExtendedStat(fi)
recall, err := xs.RecallOnDataAccess()
rtest.Assert(t, err == nil, "err should be nil", err)
rtest.Assert(t, recall, "RecallOnDataAccess should be true")
}
func TestRecallOnDataAccessMockRegularFile(t *testing.T) {
fi := mockFileInfo{
FileAttributes: windows.FILE_ATTRIBUTE_ARCHIVE,
}
xs := fs.ExtendedStat(fi)
recall, err := xs.RecallOnDataAccess()
rtest.Assert(t, err == nil, "err should be nil", err)
rtest.Assert(t, recall == false, "RecallOnDataAccess should be false")
}