From a81ec00a8c70de53c8664ea060b652ae2da55154 Mon Sep 17 00:00:00 2001 From: Richard Yang Date: Sat, 21 Apr 2018 22:57:08 +0100 Subject: [PATCH] dedupe: Add dedupe largest functionality - fixes #2269 --- cmd/dedupe/dedupe.go | 1 + fs/operations/dedupe.go | 17 +++++++++++++++++ fs/operations/dedupe_test.go | 16 ++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/cmd/dedupe/dedupe.go b/cmd/dedupe/dedupe.go index 1a578da8d..1915f8745 100644 --- a/cmd/dedupe/dedupe.go +++ b/cmd/dedupe/dedupe.go @@ -90,6 +90,7 @@ Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" + * ` + "`" + `--dedupe-mode first` + "`" + ` - removes identical files then keeps the first one. * ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one. * ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one. + * ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one. * ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different. For example to rename all the identically named photos in your Google Photos directory, do diff --git a/fs/operations/dedupe.go b/fs/operations/dedupe.go index 1f49fbaa6..2f3789d47 100644 --- a/fs/operations/dedupe.go +++ b/fs/operations/dedupe.go @@ -119,6 +119,7 @@ const ( DeduplicateNewest // choose the newest object DeduplicateOldest // choose the oldest object DeduplicateRename // rename the objects + DeduplicateLargest // choose the largest object ) func (x DeduplicateMode) String() string { @@ -135,6 +136,8 @@ func (x DeduplicateMode) String() string { return "oldest" case DeduplicateRename: return "rename" + case DeduplicateLargest: + return "largest" } return "unknown" } @@ -154,6 +157,8 @@ func (x *DeduplicateMode) Set(s string) error { *x = DeduplicateOldest case "rename": *x = DeduplicateRename + case "largest": + *x = DeduplicateLargest default: return errors.Errorf("Unknown mode for dedupe %q.", s) } @@ -260,6 +265,7 @@ func Deduplicate(f fs.Fs, mode DeduplicateMode) error { if err != nil { return err } + for remote, objs := range files { if len(objs) > 1 { fs.Logf(remote, "Found %d duplicates - deleting identical copies", len(objs)) @@ -281,6 +287,17 @@ func Deduplicate(f fs.Fs, mode DeduplicateMode) error { dedupeDeleteAllButOne(0, remote, objs) case DeduplicateRename: dedupeRename(remote, objs) + case DeduplicateLargest: + size, largest, largestIndex := int64(0), int64(-1), -1 + for i, obj := range objs { + size = obj.Size() + if size > largest { + largest, largestIndex = size, i + } + } + if largestIndex > -1 { + dedupeDeleteAllButOne(largestIndex, remote, objs) + } case DeduplicateSkip: // skip default: diff --git a/fs/operations/dedupe_test.go b/fs/operations/dedupe_test.go index a9b5c7ba6..abf4b1426 100644 --- a/fs/operations/dedupe_test.go +++ b/fs/operations/dedupe_test.go @@ -131,6 +131,22 @@ func TestDeduplicateOldest(t *testing.T) { fstest.CheckItems(t, r.Fremote, file1) } +func TestDeduplicateLargest(t *testing.T) { + r := fstest.NewRun(t) + defer r.Finalise() + skipIfCantDedupe(t, r.Fremote) + + file1 := r.WriteUncheckedObject("one", "This is one", t1) + file2 := r.WriteUncheckedObject("one", "This is one too", t2) + file3 := r.WriteUncheckedObject("one", "This is another one", t3) + r.CheckWithDuplicates(t, file1, file2, file3) + + err := operations.Deduplicate(r.Fremote, operations.DeduplicateLargest) + require.NoError(t, err) + + fstest.CheckItems(t, r.Fremote, file3) +} + func TestDeduplicateRename(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise()