Add --checksum flag to only discard transfers by MD5SUM - fixes #61

Useful for copying between backends where checksum fetching is fast,
ie any of s3, swift, drive or googlecloudstorage.
This commit is contained in:
Alex Couper 2015-06-03 14:08:27 +00:00 committed by Nick Craig-Wood
parent d9fcc32f70
commit 7af1282375
6 changed files with 85 additions and 17 deletions

View File

@ -123,11 +123,11 @@ Checks the files in the source and destination match. It
compares sizes and MD5SUMs and prints a report of files which compares sizes and MD5SUMs and prints a report of files which
don't match. It doesn't alter the source or destination. don't match. It doesn't alter the source or destination.
rclone config rclone config
Enter an interactive configuration session. Enter an interactive configuration session.
rclone help rclone help
This help. This help.
@ -136,6 +136,7 @@ General options:
``` ```
--bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G --bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G
--checkers=8: Number of checkers to run in parallel. --checkers=8: Number of checkers to run in parallel.
-c, --checksum=false: Skip based on checksum, not mod-time & size
--config="~/.rclone.conf": Config file. --config="~/.rclone.conf": Config file.
--contimeout=1m0s: Connect timeout --contimeout=1m0s: Connect timeout
-n, --dry-run=false: Do a trial run with no permanent changes -n, --dry-run=false: Do a trial run with no permanent changes

View File

@ -106,17 +106,18 @@ Checks the files in the source and destination match. It
compares sizes and MD5SUMs and prints a report of files which compares sizes and MD5SUMs and prints a report of files which
don't match. It doesn't alter the source or destination. don't match. It doesn't alter the source or destination.
rclone config rclone config
Enter an interactive configuration session. Enter an interactive configuration session.
rclone help rclone help
This help. This help.
``` ```
--bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G --bwlimit=0: Bandwidth limit in kBytes/s, or use suffix k|M|G
--checkers=8: Number of checkers to run in parallel. --checkers=8: Number of checkers to run in parallel.
-c, --checksum=false: Skip based on checksum, not mod-time & size
--config="~/.rclone.conf": Config file. --config="~/.rclone.conf": Config file.
--contimeout=1m0s: Connect timeout --contimeout=1m0s: Connect timeout
-n, --dry-run=false: Do a trial run with no permanent changes -n, --dry-run=false: Do a trial run with no permanent changes

View File

@ -153,6 +153,13 @@ func (s *StatsInfo) DoneChecking(o Object) {
s.checks += 1 s.checks += 1
} }
// GetTransfers reads the number of transfers
func (s *StatsInfo) GetTransfers() int64 {
s.lock.RLock()
defer s.lock.RUnlock()
return s.transfers
}
// Transferring adds a transfer into the stats // Transferring adds a transfer into the stats
func (s *StatsInfo) Transferring(o Object) { func (s *StatsInfo) Transferring(o Object) {
s.lock.Lock() s.lock.Lock()

View File

@ -44,6 +44,7 @@ var (
checkers = pflag.IntP("checkers", "", 8, "Number of checkers to run in parallel.") checkers = pflag.IntP("checkers", "", 8, "Number of checkers to run in parallel.")
transfers = pflag.IntP("transfers", "", 4, "Number of file transfers to run in parallel.") transfers = pflag.IntP("transfers", "", 4, "Number of file transfers to run in parallel.")
configFile = pflag.StringP("config", "", ConfigPath, "Config file.") configFile = pflag.StringP("config", "", ConfigPath, "Config file.")
checkSum = pflag.BoolP("checksum", "c", false, "Skip based on checksum, not mod-time & size")
dryRun = pflag.BoolP("dry-run", "n", false, "Do a trial run with no permanent changes") dryRun = pflag.BoolP("dry-run", "n", false, "Do a trial run with no permanent changes")
connectTimeout = pflag.DurationP("contimeout", "", 60*time.Second, "Connect timeout") connectTimeout = pflag.DurationP("contimeout", "", 60*time.Second, "Connect timeout")
timeout = pflag.DurationP("timeout", "", 5*60*time.Second, "IO idle timeout") timeout = pflag.DurationP("timeout", "", 5*60*time.Second, "IO idle timeout")
@ -119,6 +120,7 @@ type ConfigInfo struct {
Verbose bool Verbose bool
Quiet bool Quiet bool
DryRun bool DryRun bool
CheckSum bool
ModifyWindow time.Duration ModifyWindow time.Duration
Checkers int Checkers int
Transfers int Transfers int
@ -194,6 +196,7 @@ func LoadConfig() {
Config.DryRun = *dryRun Config.DryRun = *dryRun
Config.Timeout = *timeout Config.Timeout = *timeout
Config.ConnectTimeout = *connectTimeout Config.ConnectTimeout = *connectTimeout
Config.CheckSum = *checkSum
ConfigPath = *configFile ConfigPath = *configFile

View File

@ -8,6 +8,7 @@ import (
"mime" "mime"
"path" "path"
"sync" "sync"
"time"
) )
// Work out modify window for fses passed in - sets Config.ModifyWindow // Work out modify window for fses passed in - sets Config.ModifyWindow
@ -71,16 +72,19 @@ func Equal(src, dst Object) bool {
return false return false
} }
// Size the same so check the mtime var srcModTime time.Time
srcModTime := src.ModTime() if !Config.CheckSum {
dstModTime := dst.ModTime() // Size the same so check the mtime
dt := dstModTime.Sub(srcModTime) srcModTime = src.ModTime()
ModifyWindow := Config.ModifyWindow dstModTime := dst.ModTime()
if dt >= ModifyWindow || dt <= -ModifyWindow { dt := dstModTime.Sub(srcModTime)
Debug(src, "Modification times differ by %s: %v, %v", dt, srcModTime, dstModTime) ModifyWindow := Config.ModifyWindow
} else { if dt >= ModifyWindow || dt <= -ModifyWindow {
Debug(src, "Size and modification time the same (differ by %s, within tolerance %s)", dt, ModifyWindow) Debug(src, "Modification times differ by %s: %v, %v", dt, srcModTime, dstModTime)
return true } else {
Debug(src, "Size and modification time the same (differ by %s, within tolerance %s)", dt, ModifyWindow)
return true
}
} }
// mtime is unreadable or different but size is the same so // mtime is unreadable or different but size is the same so
@ -91,9 +95,11 @@ func Equal(src, dst Object) bool {
return false return false
} }
// Size and MD5 the same but mtime different so update the if !Config.CheckSum {
// mtime of the dst object here // Size and MD5 the same but mtime different so update the
dst.SetModTime(srcModTime) // mtime of the dst object here
dst.SetModTime(srcModTime)
}
Debug(src, "Size and MD5SUM of src and dst objects identical") Debug(src, "Size and MD5SUM of src and dst objects identical")
return true return true

View File

@ -169,6 +169,56 @@ func TestCopyRedownload(t *testing.T) {
cleanTempDir(t) cleanTempDir(t)
} }
// Create a file and sync it. Change the last modified date and resync.
// If we're only doing sync by size and checksum, we expect nothing to
// to be transferred on the second sync.
func TestSyncBasedOnCheckSum(t *testing.T) {
cleanTempDir(t)
fs.Config.CheckSum = true
WriteFile("check sum", "", t1)
transfers_before := fs.Stats.GetTransfers()
err := fs.Sync(fremote, flocal, true)
if err != nil {
t.Fatalf("Initial sync failed: %v", err)
}
transfers_after := fs.Stats.GetTransfers()
//We should have transferred exactly one file.
if transfers_after-1 != transfers_before {
t.Fatalf("Initial sync didn't do what we wanted.")
}
err = os.Chtimes(localName+"/check sum", t2, t2)
if err != nil {
t.Fatalf("Chtimes failed: %v", err)
}
transfers_before = fs.Stats.GetTransfers()
err = fs.Sync(fremote, flocal, true)
if err != nil {
t.Fatalf("Sync failed: %v", err)
}
transfers_after = fs.Stats.GetTransfers()
//We should have transferred no files
if transfers_after != transfers_before {
t.Fatalf("We synced, though we shouldn't have.")
}
remote_items := []fstest.Item{
{Path: "check sum", Size: 0, ModTime: t1, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"},
}
local_items := []fstest.Item{
{Path: "check sum", Size: 0, ModTime: t2, Md5sum: "d41d8cd98f00b204e9800998ecf8427e"},
}
fstest.CheckListingWithPrecision(t, flocal, local_items, fs.Config.ModifyWindow)
fstest.CheckListingWithPrecision(t, fremote, remote_items, fs.Config.ModifyWindow)
cleanTempDir(t)
}
func TestSyncAfterChangingModtimeOnly(t *testing.T) { func TestSyncAfterChangingModtimeOnly(t *testing.T) {
WriteFile("empty space", "", t1) WriteFile("empty space", "", t1)