mirror of
https://github.com/rclone/rclone
synced 2024-12-29 22:26:24 +01:00
bisync: fallback to cryptcheck or --download when can't check hash
Bisync checks file equality before renaming sync conflicts by comparing checksums. Before this change, backends without checksum support (notably Crypt) would fall back to --size-only for these checks, which is not a very safe method (differing files can sometimes have the same size, especially if they're small.) After this change, Crypt remotes fallback to using Cryptcheck so that checksums can be compared. As a last resort when neither Check nor Cryptcheck are available, files are compared using --download so that we can be certain the files are identical regardless of checksum support.
This commit is contained in:
parent
7f854acb05
commit
422b037087
191
cmd/bisync/checkfn.go
Normal file
191
cmd/bisync/checkfn.go
Normal file
@ -0,0 +1,191 @@
|
||||
package bisync
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/rclone/rclone/backend/crypt"
|
||||
"github.com/rclone/rclone/cmd/bisync/bilib"
|
||||
"github.com/rclone/rclone/cmd/check"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/accounting"
|
||||
"github.com/rclone/rclone/fs/filter"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
)
|
||||
|
||||
var hashType hash.Type
|
||||
var fsrc, fdst fs.Fs
|
||||
var fcrypt *crypt.Fs
|
||||
|
||||
// WhichCheck determines which CheckFn we should use based on the Fs types
|
||||
// It is more robust and accurate than Check because
|
||||
// it will fallback to CryptCheck or DownloadCheck instead of --size-only!
|
||||
// it returns the *operations.CheckOpt with the CheckFn set.
|
||||
func WhichCheck(ctx context.Context, opt *operations.CheckOpt) *operations.CheckOpt {
|
||||
ci := fs.GetConfig(ctx)
|
||||
common := opt.Fsrc.Hashes().Overlap(opt.Fdst.Hashes())
|
||||
|
||||
// note that ci.IgnoreChecksum doesn't change the behavior of Check -- it's just a way to opt-out of cryptcheck/download
|
||||
if common.Count() > 0 || ci.SizeOnly || ci.IgnoreChecksum {
|
||||
// use normal check
|
||||
opt.Check = CheckFn
|
||||
return opt
|
||||
}
|
||||
|
||||
FsrcCrypt, srcIsCrypt := opt.Fsrc.(*crypt.Fs)
|
||||
FdstCrypt, dstIsCrypt := opt.Fdst.(*crypt.Fs)
|
||||
|
||||
if (srcIsCrypt && dstIsCrypt) || (!srcIsCrypt && dstIsCrypt) {
|
||||
// if both are crypt or only dst is crypt
|
||||
hashType = FdstCrypt.UnWrap().Hashes().GetOne()
|
||||
if hashType != hash.None {
|
||||
// use cryptcheck
|
||||
fsrc = opt.Fsrc
|
||||
fdst = opt.Fdst
|
||||
fcrypt = FdstCrypt
|
||||
fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)")
|
||||
opt.Check = CryptCheckFn
|
||||
return opt
|
||||
}
|
||||
} else if srcIsCrypt && !dstIsCrypt {
|
||||
// if only src is crypt
|
||||
hashType = FsrcCrypt.UnWrap().Hashes().GetOne()
|
||||
if hashType != hash.None {
|
||||
// use reverse cryptcheck
|
||||
fsrc = opt.Fdst
|
||||
fdst = opt.Fsrc
|
||||
fcrypt = FsrcCrypt
|
||||
fs.Infof(fdst, "Crypt detected! Using cryptcheck instead of check. (Use --size-only or --ignore-checksum to disable)")
|
||||
opt.Check = ReverseCryptCheckFn
|
||||
return opt
|
||||
}
|
||||
}
|
||||
|
||||
// if we've gotten this far, niether check or cryptcheck will work, so use --download
|
||||
fs.Infof(fdst, "Can't compare hashes, so using check --download for safety. (Use --size-only or --ignore-checksum to disable)")
|
||||
opt.Check = DownloadCheckFn
|
||||
return opt
|
||||
}
|
||||
|
||||
// CheckFn is a slightly modified version of Check
|
||||
func CheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {
|
||||
same, ht, err := operations.CheckHashes(ctx, src, dst)
|
||||
if err != nil {
|
||||
return true, false, err
|
||||
}
|
||||
if ht == hash.None {
|
||||
return false, true, nil
|
||||
}
|
||||
if !same {
|
||||
err = fmt.Errorf("%v differ", ht)
|
||||
fs.Errorf(src, "%v", err)
|
||||
return true, false, nil
|
||||
}
|
||||
return false, false, nil
|
||||
}
|
||||
|
||||
// CryptCheckFn is a slightly modified version of CryptCheck
|
||||
func CryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {
|
||||
cryptDst := dst.(*crypt.Object)
|
||||
underlyingDst := cryptDst.UnWrap()
|
||||
underlyingHash, err := underlyingDst.Hash(ctx, hashType)
|
||||
if err != nil {
|
||||
return true, false, fmt.Errorf("error reading hash from underlying %v: %w", underlyingDst, err)
|
||||
}
|
||||
if underlyingHash == "" {
|
||||
return false, true, nil
|
||||
}
|
||||
cryptHash, err := fcrypt.ComputeHash(ctx, cryptDst, src, hashType)
|
||||
if err != nil {
|
||||
return true, false, fmt.Errorf("error computing hash: %w", err)
|
||||
}
|
||||
if cryptHash == "" {
|
||||
return false, true, nil
|
||||
}
|
||||
if cryptHash != underlyingHash {
|
||||
err = fmt.Errorf("hashes differ (%s:%s) %q vs (%s:%s) %q", fdst.Name(), fdst.Root(), cryptHash, fsrc.Name(), fsrc.Root(), underlyingHash)
|
||||
fs.Errorf(src, err.Error())
|
||||
return true, false, nil
|
||||
}
|
||||
return false, false, nil
|
||||
}
|
||||
|
||||
// ReverseCryptCheckFn is like CryptCheckFn except src and dst are switched
|
||||
// result: src is crypt, dst is non-crypt
|
||||
func ReverseCryptCheckFn(ctx context.Context, dst, src fs.Object) (differ bool, noHash bool, err error) {
|
||||
return CryptCheckFn(ctx, src, dst)
|
||||
}
|
||||
|
||||
// DownloadCheckFn is a slightly modified version of Check with --download
|
||||
func DownloadCheckFn(ctx context.Context, a, b fs.Object) (differ bool, noHash bool, err error) {
|
||||
differ, err = operations.CheckIdenticalDownload(ctx, a, b)
|
||||
if err != nil {
|
||||
return true, true, fmt.Errorf("failed to download: %w", err)
|
||||
}
|
||||
return differ, false, nil
|
||||
}
|
||||
|
||||
// check potential conflicts (to avoid renaming if already identical)
|
||||
func (b *bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck *filter.Filter, fs1, fs2 fs.Fs) (bilib.Names, error) {
|
||||
matches := bilib.Names{}
|
||||
if filterCheck.HaveFilesFrom() {
|
||||
fs.Debugf(nil, "There are potential conflicts to check.")
|
||||
|
||||
opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2)
|
||||
if checkopterr != nil {
|
||||
b.critical = true
|
||||
b.retryable = true
|
||||
fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
|
||||
return matches, checkopterr
|
||||
}
|
||||
defer close()
|
||||
|
||||
opt.Match = new(bytes.Buffer)
|
||||
|
||||
opt = WhichCheck(ctxCheck, opt)
|
||||
|
||||
fs.Infof(nil, "Checking potential conflicts...")
|
||||
check := operations.CheckFn(ctxCheck, opt)
|
||||
fs.Infof(nil, "Finished checking the potential conflicts. %s", check)
|
||||
|
||||
//reset error count, because we don't want to count check errors as bisync errors
|
||||
accounting.Stats(ctxCheck).ResetErrors()
|
||||
|
||||
//return the list of identical files to check against later
|
||||
if len(fmt.Sprint(opt.Match)) > 0 {
|
||||
matches = bilib.ToNames(strings.Split(fmt.Sprint(opt.Match), "\n"))
|
||||
}
|
||||
if matches.NotEmpty() {
|
||||
fs.Debugf(nil, "The following potential conflicts were determined to be identical. %v", matches)
|
||||
} else {
|
||||
fs.Debugf(nil, "None of the conflicts were determined to be identical.")
|
||||
}
|
||||
|
||||
}
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
// WhichEqual is similar to WhichCheck, but checks a single object.
|
||||
// Returns true if the objects are equal, false if they differ or if we don't know
|
||||
func WhichEqual(ctx context.Context, src, dst fs.Object, Fsrc, Fdst fs.Fs) bool {
|
||||
opt, close, checkopterr := check.GetCheckOpt(Fsrc, Fdst)
|
||||
if checkopterr != nil {
|
||||
fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
|
||||
}
|
||||
defer close()
|
||||
|
||||
opt = WhichCheck(ctx, opt)
|
||||
differ, noHash, err := opt.Check(ctx, dst, src)
|
||||
if err != nil {
|
||||
fs.Errorf(src, "failed to check: %v", err)
|
||||
return false
|
||||
}
|
||||
if noHash {
|
||||
fs.Errorf(src, "failed to check as hash is missing")
|
||||
return false
|
||||
}
|
||||
return !differ
|
||||
}
|
@ -3,7 +3,6 @@
|
||||
package bisync
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
@ -11,9 +10,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/rclone/rclone/cmd/bisync/bilib"
|
||||
"github.com/rclone/rclone/cmd/check"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/accounting"
|
||||
"github.com/rclone/rclone/fs/filter"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
@ -96,47 +93,6 @@ func (ds *deltaSet) printStats() {
|
||||
ds.msg, nAll, nNew, nNewer, nOlder, nDeleted)
|
||||
}
|
||||
|
||||
// check potential conflicts (to avoid renaming if already identical)
|
||||
func (b *bisyncRun) checkconflicts(ctxCheck context.Context, filterCheck *filter.Filter, fs1, fs2 fs.Fs) (bilib.Names, error) {
|
||||
matches := bilib.Names{}
|
||||
if filterCheck.HaveFilesFrom() {
|
||||
fs.Debugf(nil, "There are potential conflicts to check.")
|
||||
|
||||
opt, close, checkopterr := check.GetCheckOpt(b.fs1, b.fs2)
|
||||
if checkopterr != nil {
|
||||
b.critical = true
|
||||
b.retryable = true
|
||||
fs.Debugf(nil, "GetCheckOpt error: %v", checkopterr)
|
||||
return matches, checkopterr
|
||||
}
|
||||
defer close()
|
||||
|
||||
opt.Match = new(bytes.Buffer)
|
||||
|
||||
// TODO: consider using custom CheckFn to act like cryptcheck, if either fs is a crypt remote and -c has been passed
|
||||
// note that cryptCheck() is not currently exported
|
||||
|
||||
fs.Infof(nil, "Checking potential conflicts...")
|
||||
check := operations.Check(ctxCheck, opt)
|
||||
fs.Infof(nil, "Finished checking the potential conflicts. %s", check)
|
||||
|
||||
//reset error count, because we don't want to count check errors as bisync errors
|
||||
accounting.Stats(ctxCheck).ResetErrors()
|
||||
|
||||
//return the list of identical files to check against later
|
||||
if len(fmt.Sprint(opt.Match)) > 0 {
|
||||
matches = bilib.ToNames(strings.Split(fmt.Sprint(opt.Match), "\n"))
|
||||
}
|
||||
if matches.NotEmpty() {
|
||||
fs.Debugf(nil, "The following potential conflicts were determined to be identical. %v", matches)
|
||||
} else {
|
||||
fs.Debugf(nil, "None of the conflicts were determined to be identical.")
|
||||
}
|
||||
|
||||
}
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
// findDeltas
|
||||
func (b *bisyncRun) findDeltas(fctx context.Context, f fs.Fs, oldListing string, now *fileList, msg string) (ds *deltaSet, err error) {
|
||||
var old *fileList
|
||||
|
@ -641,7 +641,8 @@ func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList,
|
||||
fs.Debugf(srcObj, "rechecking")
|
||||
for _, dstObj := range dstObjs {
|
||||
if srcObj.Remote() == dstObj.Remote() || srcObj.Remote() == b.aliases.Alias(dstObj.Remote()) {
|
||||
if operations.Equal(ctxRecheck, srcObj, dstObj) || b.opt.DryRun {
|
||||
// note: unlike Equal(), WhichEqual() does not update the modtime in dest if sums match but modtimes don't.
|
||||
if b.opt.DryRun || WhichEqual(ctxRecheck, srcObj, dstObj, src, dst) {
|
||||
putObj(srcObj, src, srcList)
|
||||
putObj(dstObj, dst, dstList)
|
||||
resolved = append(resolved, srcObj.Remote())
|
||||
@ -655,7 +656,8 @@ func (b *bisyncRun) recheck(ctxRecheck context.Context, src, dst fs.Fs, srcList,
|
||||
// skip and error during --resync, as rollback is not possible
|
||||
if !slices.Contains(resolved, srcObj.Remote()) && !b.opt.DryRun {
|
||||
if b.opt.Resync {
|
||||
b.handleErr(srcObj, "Unable to rollback during --resync", errors.New("no dstObj match or files not equal"), true, false)
|
||||
err = errors.New("no dstObj match or files not equal")
|
||||
b.handleErr(srcObj, "Unable to rollback during --resync", err, true, false)
|
||||
} else {
|
||||
toRollback = append(toRollback, srcObj.Remote())
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
# bisync listing v1 from test
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "file1.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/HeLlO,wOrLd!.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/éééö.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/HeLlO,wOrLd!.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/éééö.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt"
|
||||
|
@ -1,5 +1,5 @@
|
||||
# bisync listing v1 from test
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-05T00:00:00.000000000+0000 "file1.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/hello,WORLD!.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/éééö.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/hello,WORLD!.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/éééö.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt"
|
||||
|
@ -1,5 +1,5 @@
|
||||
# bisync listing v1 from test
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "file1.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/hello,WORLD!.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-03T00:00:00.000000000+0000 "folder/éééö.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/hello,WORLD!.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "folder/éééö.txt"
|
||||
- 19 md5:7fe98ed88552b828777d8630900346b8 - 2001-01-02T00:00:00.000000000+0000 "測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö/測試_Русский___ě_áñ👸🏼🧝🏾\u200d♀️💆🏿\u200d♂️🐨🤙🏼🤮🧑🏻\u200d🔧🧑\u200d🔬éééö.txt"
|
||||
|
@ -1367,6 +1367,8 @@ for performance improvements and less [risk of error](https://forum.rclone.org/t
|
||||
* Fixed handling of unicode normalization and case insensitivity, support for [`--fix-case`](/docs/#fix-case), [`--ignore-case-sync`](/docs/#ignore-case-sync), [`--no-unicode-normalization`](/docs/#no-unicode-normalization)
|
||||
* `--resync` is now much more efficient (especially for users of `--create-empty-src-dirs`)
|
||||
* Google Docs (and other files of unknown size) are now supported (with the same options as in `sync`)
|
||||
* Equality checks before a sync conflict rename now fall back to `cryptcheck` (when possible) or `--download`,
|
||||
instead of of `--size-only`, when `check` is not available.
|
||||
|
||||
### `v1.64`
|
||||
* Fixed an [issue](https://forum.rclone.org/t/bisync-bugs-and-feature-requests/37636#:~:text=1.%20Dry%20runs%20are%20not%20completely%20dry)
|
||||
|
Loading…
Reference in New Issue
Block a user