hashsum: treat hash values in sum file as case insensitive

Also warn duplicate file paths in sum files.

Fixes https://forum.rclone.org/t/rclone-check-sum/25566/45
This commit is contained in:
Ivan Andreev 2021-10-13 15:02:49 +03:00
parent cf9b82b8db
commit 16fb608bee
6 changed files with 51 additions and 15 deletions

View File

@ -33,6 +33,8 @@ don't match. It doesn't alter the file system.
If you supply the |--download| flag, it will download the data from remote
and calculate the contents hash on the fly. This can be useful for remotes
that don't support hashes or if you really want to check all the data.
Note that hash values in the SUM file are treated as case insensitive.
`, "|", "`") + check.FlagsHelp,
RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(3, 3, command, args)

View File

@ -76,7 +76,7 @@ Then
$ rclone hashsum MD5 remote:path
Note that hash names are case insensitive.
Note that hash names are case insensitive and values are output in lower case.
`,
RunE: func(command *cobra.Command, args []string) error {
cmd.CheckArgs(0, 2, command, args)

View File

@ -20,6 +20,8 @@ If you supply the `--download` flag, it will download the data from remote
and calculate the contents hash on the fly. This can be useful for remotes
that don't support hashes or if you really want to check all the data.
Note that hash values in the SUM file are treated as case insensitive.
If you supply the `--one-way` flag, it will only check that files in
the source match the files in the destination, not the other way
around. This means that extra files in the destination that are not in

View File

@ -38,7 +38,7 @@ Then
$ rclone hashsum MD5 remote:path
Note that hash names are case insensitive.
Note that hash names are case insensitive and values are output in lower case.
```

View File

@ -7,6 +7,7 @@ import (
"io"
"os"
"regexp"
"strings"
"sync"
"sync/atomic"
@ -540,7 +541,7 @@ func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) {
}
parser := bufio.NewReader(rd)
const maxWarn = 4
const maxWarn = 3
numWarn := 0
re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`)
@ -558,19 +559,31 @@ func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) {
continue
}
if fields := re.FindStringSubmatch(line); fields != nil {
hashes[fields[2]] = fields[1]
fields := re.FindStringSubmatch(line)
if fields == nil {
numWarn++
if numWarn <= maxWarn {
fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo)
}
continue
}
numWarn++
if numWarn < maxWarn {
fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo)
} else if numWarn == maxWarn {
fs.Logf(sumFile, "more warnings suppressed...")
sum, file := fields[1], fields[2]
if hashes[file] != "" {
numWarn++
if numWarn <= maxWarn {
fs.Logf(sumFile, "duplicate file on checksum line %d", lineNo)
}
continue
}
// We've standardised on lower case checksums in rclone internals.
hashes[file] = strings.ToLower(sum)
}
if numWarn > maxWarn {
fs.Logf(sumFile, "%d warning(s) suppressed...", numWarn-maxWarn)
}
if err = rd.Close(); err != nil {
return nil, err
}

View File

@ -330,10 +330,12 @@ func testCheckSum(t *testing.T, download bool) {
hashType := hash.MD5
const (
testString1 = "Hello, World!"
testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4"
testString2 = "I am the walrus"
testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3"
testString1 = "Hello, World!"
testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4"
testDigest1Upper = "65A8E27D8879283831B664BD8B7F0AD4"
testString2 = "I am the walrus"
testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3"
testDigest2Mixed = "87396e030EF3f5b35BBf85c0a09a4FB3"
)
type wantType map[string]string
@ -428,7 +430,7 @@ func testCheckSum(t *testing.T, download bool) {
}
check := func(runNo, wantChecks, wantErrors int, wantResults wantType) {
runName := fmt.Sprintf("move%d", runNo)
runName := fmt.Sprintf("subtest%d", runNo)
t.Run(runName, func(t *testing.T) {
checkRun(runNo, wantChecks, wantErrors, wantResults)
})
@ -519,6 +521,23 @@ func testCheckSum(t *testing.T, download bool) {
"differ": "potato\n",
"error": "",
})
// test mixed-case checksums
file1 = makeFile("banana", testString1)
file2 = makeFile("potato", testString2)
fcsums = makeSums(operations.HashSums{
"banana": testDigest1Upper,
"potato": testDigest2Mixed,
})
fstest.CheckItems(t, r.Fremote, fcsums, file1, file2)
check(7, 2, 0, wantType{
"combined": "= banana\n= potato\n",
"missingonsrc": "",
"missingondst": "",
"match": "banana\npotato\n",
"differ": "",
"error": "",
})
}
func TestCheckSum(t *testing.T) {