From 16fb608bee2b46a0d971a8b59b50e45bca80aa73 Mon Sep 17 00:00:00 2001 From: Ivan Andreev Date: Wed, 13 Oct 2021 15:02:49 +0300 Subject: [PATCH] hashsum: treat hash values in sum file as case insensitive Also warn duplicate file paths in sum files. Fixes https://forum.rclone.org/t/rclone-check-sum/25566/45 --- cmd/checksum/checksum.go | 2 ++ cmd/hashsum/hashsum.go | 2 +- docs/content/commands/rclone_checksum.md | 2 ++ docs/content/commands/rclone_hashsum.md | 2 +- fs/operations/check.go | 29 +++++++++++++++++------- fs/operations/check_test.go | 29 ++++++++++++++++++++---- 6 files changed, 51 insertions(+), 15 deletions(-) diff --git a/cmd/checksum/checksum.go b/cmd/checksum/checksum.go index 2c74df1ac..9b9865b18 100644 --- a/cmd/checksum/checksum.go +++ b/cmd/checksum/checksum.go @@ -33,6 +33,8 @@ don't match. It doesn't alter the file system. If you supply the |--download| flag, it will download the data from remote and calculate the contents hash on the fly. This can be useful for remotes that don't support hashes or if you really want to check all the data. + +Note that hash values in the SUM file are treated as case insensitive. `, "|", "`") + check.FlagsHelp, RunE: func(command *cobra.Command, args []string) error { cmd.CheckArgs(3, 3, command, args) diff --git a/cmd/hashsum/hashsum.go b/cmd/hashsum/hashsum.go index 05da76273..509af664c 100644 --- a/cmd/hashsum/hashsum.go +++ b/cmd/hashsum/hashsum.go @@ -76,7 +76,7 @@ Then $ rclone hashsum MD5 remote:path -Note that hash names are case insensitive. +Note that hash names are case insensitive and values are output in lower case. `, RunE: func(command *cobra.Command, args []string) error { cmd.CheckArgs(0, 2, command, args) diff --git a/docs/content/commands/rclone_checksum.md b/docs/content/commands/rclone_checksum.md index 6fd1c712e..691c56a8b 100644 --- a/docs/content/commands/rclone_checksum.md +++ b/docs/content/commands/rclone_checksum.md @@ -20,6 +20,8 @@ If you supply the `--download` flag, it will download the data from remote and calculate the contents hash on the fly. This can be useful for remotes that don't support hashes or if you really want to check all the data. +Note that hash values in the SUM file are treated as case insensitive. + If you supply the `--one-way` flag, it will only check that files in the source match the files in the destination, not the other way around. This means that extra files in the destination that are not in diff --git a/docs/content/commands/rclone_hashsum.md b/docs/content/commands/rclone_hashsum.md index 5bd6be88f..e0dca0700 100644 --- a/docs/content/commands/rclone_hashsum.md +++ b/docs/content/commands/rclone_hashsum.md @@ -38,7 +38,7 @@ Then $ rclone hashsum MD5 remote:path -Note that hash names are case insensitive. +Note that hash names are case insensitive and values are output in lower case. ``` diff --git a/fs/operations/check.go b/fs/operations/check.go index 62e5392f9..b7c36eec1 100644 --- a/fs/operations/check.go +++ b/fs/operations/check.go @@ -7,6 +7,7 @@ import ( "io" "os" "regexp" + "strings" "sync" "sync/atomic" @@ -540,7 +541,7 @@ func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) { } parser := bufio.NewReader(rd) - const maxWarn = 4 + const maxWarn = 3 numWarn := 0 re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`) @@ -558,19 +559,31 @@ func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) { continue } - if fields := re.FindStringSubmatch(line); fields != nil { - hashes[fields[2]] = fields[1] + fields := re.FindStringSubmatch(line) + if fields == nil { + numWarn++ + if numWarn <= maxWarn { + fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo) + } continue } - numWarn++ - if numWarn < maxWarn { - fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo) - } else if numWarn == maxWarn { - fs.Logf(sumFile, "more warnings suppressed...") + sum, file := fields[1], fields[2] + if hashes[file] != "" { + numWarn++ + if numWarn <= maxWarn { + fs.Logf(sumFile, "duplicate file on checksum line %d", lineNo) + } + continue } + + // We've standardised on lower case checksums in rclone internals. + hashes[file] = strings.ToLower(sum) } + if numWarn > maxWarn { + fs.Logf(sumFile, "%d warning(s) suppressed...", numWarn-maxWarn) + } if err = rd.Close(); err != nil { return nil, err } diff --git a/fs/operations/check_test.go b/fs/operations/check_test.go index a6c643674..4615bbd13 100644 --- a/fs/operations/check_test.go +++ b/fs/operations/check_test.go @@ -330,10 +330,12 @@ func testCheckSum(t *testing.T, download bool) { hashType := hash.MD5 const ( - testString1 = "Hello, World!" - testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4" - testString2 = "I am the walrus" - testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3" + testString1 = "Hello, World!" + testDigest1 = "65a8e27d8879283831b664bd8b7f0ad4" + testDigest1Upper = "65A8E27D8879283831B664BD8B7F0AD4" + testString2 = "I am the walrus" + testDigest2 = "87396e030ef3f5b35bbf85c0a09a4fb3" + testDigest2Mixed = "87396e030EF3f5b35BBf85c0a09a4FB3" ) type wantType map[string]string @@ -428,7 +430,7 @@ func testCheckSum(t *testing.T, download bool) { } check := func(runNo, wantChecks, wantErrors int, wantResults wantType) { - runName := fmt.Sprintf("move%d", runNo) + runName := fmt.Sprintf("subtest%d", runNo) t.Run(runName, func(t *testing.T) { checkRun(runNo, wantChecks, wantErrors, wantResults) }) @@ -519,6 +521,23 @@ func testCheckSum(t *testing.T, download bool) { "differ": "potato\n", "error": "", }) + + // test mixed-case checksums + file1 = makeFile("banana", testString1) + file2 = makeFile("potato", testString2) + fcsums = makeSums(operations.HashSums{ + "banana": testDigest1Upper, + "potato": testDigest2Mixed, + }) + fstest.CheckItems(t, r.Fremote, fcsums, file1, file2) + check(7, 2, 0, wantType{ + "combined": "= banana\n= potato\n", + "missingonsrc": "", + "missingondst": "", + "match": "banana\npotato\n", + "differ": "", + "error": "", + }) } func TestCheckSum(t *testing.T) {