1
mirror of https://github.com/rclone/rclone synced 2025-01-05 06:26:34 +01:00

dedupe command to deduplicate a remote. Useful with google drive - fixes #41

This commit is contained in:
Nick Craig-Wood 2016-01-31 12:58:41 +00:00
parent 1373efaa39
commit 0f73129ab7
4 changed files with 132 additions and 0 deletions

View File

@ -136,6 +136,47 @@ Checks the files in the source and destination match. It
compares sizes and MD5SUMs and prints a report of files which compares sizes and MD5SUMs and prints a report of files which
don't match. It doesn't alter the source or destination. don't match. It doesn't alter the source or destination.
### rclone dedupe remote:path ###
Interactively find duplicate files and offer to delete all but one or
rename them to be different. Only useful with Google Drive which can
have duplicate file names.
```
$ rclone dedupe drive:dupes
2016/01/31 14:13:11 Google drive root 'dupes': Looking for duplicates
two.txt: Found 3 duplicates
1: 564374 bytes, 2016-01-31 14:07:22.159000000, md5sum 7594e7dc9fc28f727c42ee3e0749de81
2: 1744073 bytes, 2016-01-31 14:07:12.490000000, md5sum 851957f7fb6f0bc4ce76be966d336802
3: 6048320 bytes, 2016-01-31 14:07:02.111000000, md5sum 1eedaa9fe86fd4b8632e2ac549403b36
s) Skip and do nothing
k) Keep just one (choose which in next step)
r) Rename all to be different (by changing file.jpg to file-1.jpg)
s/k/r> r
two-1.txt: renamed from: two.txt
two-2.txt: renamed from: two.txt
two-3.txt: renamed from: two.txt
one.txt: Found 2 duplicates
1: 6579 bytes, 2016-01-31 14:05:01.235000000, md5sum 2b76c776249409d925ae7ccd49aea59b
2: 6579 bytes, 2016-01-31 12:50:30.318000000, md5sum 2b76c776249409d925ae7ccd49aea59b
s) Skip and do nothing
k) Keep just one (choose which in next step)
r) Rename all to be different (by changing file.jpg to file-1.jpg)
s/k/r> k
Enter the number of the file to keep> 2
one.txt: Deleted 1 extra copies
```
The result being
```
$ rclone lsl drive:dupes
564374 2016-01-31 14:07:22.159000000 two-1.txt
1744073 2016-01-31 14:07:12.490000000 two-2.txt
6048320 2016-01-31 14:07:02.111000000 two-3.txt
6579 2016-01-31 12:50:30.318000000 one.txt
```
### rclone config ### ### rclone config ###
Enter an interactive configuration session. Enter an interactive configuration session.

View File

@ -413,6 +413,25 @@ func Choose(what string, defaults, help []string, newOk bool) string {
} }
} }
// ChooseNumber asks the user to enter a number between min and max
// inclusive prompting them with what.
func ChooseNumber(what string, min, max int) int {
for {
fmt.Printf("%s> ", what)
result := ReadLine()
i, err := strconv.Atoi(result)
if err != nil {
fmt.Printf("Bad number: %v\n", err)
continue
}
if i < min || i > max {
fmt.Printf("Out of range - %d to %d inclusive\n", min, max)
continue
}
return i
}
}
// ShowRemote shows the contents of the remote // ShowRemote shows the contents of the remote
func ShowRemote(name string) { func ShowRemote(name string) {
fmt.Printf("--------------------\n") fmt.Printf("--------------------\n")

View File

@ -908,3 +908,62 @@ func Delete(f Fs) error {
close(delete) close(delete)
return err return err
} }
// Deduplicate interactively finds duplicate files and offers to
// delete all but one or rename them to be different. Only useful with
// Google Drive which can have duplicate file names.
func Deduplicate(f Fs) error {
mover, ok := f.(Mover)
if !ok {
return fmt.Errorf("%v can't Move files", f)
}
Log(f, "Looking for duplicates")
files := map[string][]Object{}
for o := range f.List() {
remote := o.Remote()
files[remote] = append(files[remote], o)
}
for remote, objs := range files {
if len(objs) > 1 {
fmt.Printf("%s: Found %d duplicates\n", remote, len(objs))
for i, o := range objs {
md5sum, err := o.Hash(HashMD5)
if err != nil {
md5sum = err.Error()
}
fmt.Printf(" %d: %12d bytes, %s, md5sum %32s\n", i+1, o.Size(), o.ModTime().Format("2006-01-02 15:04:05.000000000"), md5sum)
}
switch Command([]string{"sSkip and do nothing", "kKeep just one (choose which in next step)", "rRename all to be different (by changing file.jpg to file-1.jpg)"}) {
case 's':
case 'k':
keep := ChooseNumber("Enter the number of the file to keep", 1, len(objs))
deleted := 0
for i, o := range objs {
if i+1 == keep {
continue
}
err := o.Remove()
if err != nil {
ErrorLog(o, "Failed to delete: %v", err)
continue
}
deleted++
}
fmt.Printf("%s: Deleted %d extra copies\n", remote, deleted)
case 'r':
ext := path.Ext(remote)
base := remote[:len(remote)-len(ext)]
for i, o := range objs {
newName := fmt.Sprintf("%s-%d%s", base, i+1, ext)
newObj, err := mover.Move(o, newName)
if err != nil {
ErrorLog(o, "Failed to rename: %v", err)
continue
}
fmt.Printf("%v: renamed from: %v\n", newObj, o)
}
}
}
}
return nil
}

View File

@ -240,6 +240,19 @@ var Commands = []Command{
MinArgs: 2, MinArgs: 2,
MaxArgs: 2, MaxArgs: 2,
}, },
{
Name: "dedupe",
ArgsHelp: "remote:path",
Help: `
Interactively find duplicate files and offer to delete all
but one or rename them to be different. Only useful with
Google Drive which can have duplicate file names.`,
Run: func(fdst, fsrc fs.Fs) error {
return fs.Deduplicate(fdst)
},
MinArgs: 1,
MaxArgs: 1,
},
{ {
Name: "config", Name: "config",
Help: ` Help: `