diff --git a/cmd/dedupe/dedupe.go b/cmd/dedupe/dedupe.go index fbe38f81d..74e08a76a 100644 --- a/cmd/dedupe/dedupe.go +++ b/cmd/dedupe/dedupe.go @@ -94,6 +94,7 @@ Dedupe can be run non interactively using the ` + "`" + `--dedupe-mode` + "`" + * ` + "`" + `--dedupe-mode newest` + "`" + ` - removes identical files then keeps the newest one. * ` + "`" + `--dedupe-mode oldest` + "`" + ` - removes identical files then keeps the oldest one. * ` + "`" + `--dedupe-mode largest` + "`" + ` - removes identical files then keeps the largest one. + * ` + "`" + `--dedupe-mode smallest` + "`" + ` - removes identical files then keeps the smallest one. * ` + "`" + `--dedupe-mode rename` + "`" + ` - removes identical files then renames the rest to be different. For example to rename all the identically named photos in your Google Photos directory, do diff --git a/fs/operations/dedupe.go b/fs/operations/dedupe.go index 5fc1c0114..082c34517 100644 --- a/fs/operations/dedupe.go +++ b/fs/operations/dedupe.go @@ -125,14 +125,6 @@ func dedupeInteractive(ctx context.Context, f fs.Fs, ht hash.Type, remote string } } -type objectsSortedByModTime []fs.Object - -func (objs objectsSortedByModTime) Len() int { return len(objs) } -func (objs objectsSortedByModTime) Swap(i, j int) { objs[i], objs[j] = objs[j], objs[i] } -func (objs objectsSortedByModTime) Less(i, j int) bool { - return objs[i].ModTime(context.TODO()).Before(objs[j].ModTime(context.TODO())) -} - // DeduplicateMode is how the dedupe command chooses what to do type DeduplicateMode int @@ -145,6 +137,7 @@ const ( DeduplicateOldest // choose the oldest object DeduplicateRename // rename the objects DeduplicateLargest // choose the largest object + DeduplicateSmallest // choose the smallest object ) func (x DeduplicateMode) String() string { @@ -163,6 +156,8 @@ func (x DeduplicateMode) String() string { return "rename" case DeduplicateLargest: return "largest" + case DeduplicateSmallest: + return "smallest" } return "unknown" } @@ -184,6 +179,8 @@ func (x *DeduplicateMode) Set(s string) error { *x = DeduplicateRename case "largest": *x = DeduplicateLargest + case "smallest": + *x = DeduplicateSmallest default: return errors.Errorf("Unknown mode for dedupe %q.", s) } @@ -248,6 +245,20 @@ func dedupeMergeDuplicateDirs(ctx context.Context, f fs.Fs, duplicateDirs [][]fs return nil } +// sort oldest first +func sortOldestFirst(objs []fs.Object) { + sort.Slice(objs, func(i, j int) bool { + return objs[i].ModTime(context.TODO()).Before(objs[j].ModTime(context.TODO())) + }) +} + +// sort smallest first +func sortSmallestFirst(objs []fs.Object) { + sort.Slice(objs, func(i, j int) bool { + return objs[i].Size() < objs[j].Size() + }) +} + // Deduplicate interactively finds duplicate files and offers to // delete all but one or rename them to be different. Only useful with // Google Drive which can have duplicate file names. @@ -296,24 +307,19 @@ func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode) error { case DeduplicateFirst: dedupeDeleteAllButOne(ctx, 0, remote, objs) case DeduplicateNewest: - sort.Sort(objectsSortedByModTime(objs)) // sort oldest first + sortOldestFirst(objs) dedupeDeleteAllButOne(ctx, len(objs)-1, remote, objs) case DeduplicateOldest: - sort.Sort(objectsSortedByModTime(objs)) // sort oldest first + sortOldestFirst(objs) dedupeDeleteAllButOne(ctx, 0, remote, objs) case DeduplicateRename: dedupeRename(ctx, f, remote, objs) case DeduplicateLargest: - largest, largestIndex := int64(-1), -1 - for i, obj := range objs { - size := obj.Size() - if size > largest { - largest, largestIndex = size, i - } - } - if largestIndex > -1 { - dedupeDeleteAllButOne(ctx, largestIndex, remote, objs) - } + sortSmallestFirst(objs) + dedupeDeleteAllButOne(ctx, len(objs)-1, remote, objs) + case DeduplicateSmallest: + sortSmallestFirst(objs) + dedupeDeleteAllButOne(ctx, 0, remote, objs) case DeduplicateSkip: // skip default: diff --git a/fs/operations/dedupe_test.go b/fs/operations/dedupe_test.go index 38af8f990..7a441fd58 100644 --- a/fs/operations/dedupe_test.go +++ b/fs/operations/dedupe_test.go @@ -152,6 +152,22 @@ func TestDeduplicateLargest(t *testing.T) { fstest.CheckItems(t, r.Fremote, file3) } +func TestDeduplicateSmallest(t *testing.T) { + r := fstest.NewRun(t) + defer r.Finalise() + skipIfCantDedupe(t, r.Fremote) + + file1 := r.WriteUncheckedObject(context.Background(), "one", "This is one", t1) + file2 := r.WriteUncheckedObject(context.Background(), "one", "This is one too", t2) + file3 := r.WriteUncheckedObject(context.Background(), "one", "This is another one", t3) + r.CheckWithDuplicates(t, file1, file2, file3) + + err := operations.Deduplicate(context.Background(), r.Fremote, operations.DeduplicateSmallest) + require.NoError(t, err) + + fstest.CheckItems(t, r.Fremote, file1) +} + func TestDeduplicateRename(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise()