mirror of
https://github.com/rclone/rclone
synced 2025-02-17 12:51:26 +01:00
fs: re-implement check and cryptcheck using the same traversal as sync
This makes them 100% consistent with sync and also make them use less memory as they no longer build the whole tree in memory first. Fixes #1657
This commit is contained in:
parent
261c7ad9e4
commit
a8e41f081c
268
fs/operations.go
268
fs/operations.go
@ -18,8 +18,7 @@ import (
|
|||||||
|
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"github.com/spf13/pflag"
|
"github.com/spf13/pflag"
|
||||||
|
"golang.org/x/net/context"
|
||||||
"golang.org/x/text/unicode/norm"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// CalculateModifyWindow works out modify window for Fses passed in -
|
// CalculateModifyWindow works out modify window for Fses passed in -
|
||||||
@ -682,68 +681,6 @@ func filterAndSortDir(entries DirEntries, includeAll bool, dir string,
|
|||||||
return entries, nil
|
return entries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read a map of Object.Remote to Object for the given Fs.
|
|
||||||
// dir is the start directory, "" for root
|
|
||||||
// If includeAll is specified all files will be added,
|
|
||||||
// otherwise only files passing the filter will be added.
|
|
||||||
//
|
|
||||||
// This also detects duplicates and normalised duplicates
|
|
||||||
func readFilesMap(fs Fs, includeAll bool, dir string) (files map[string]Object, err error) {
|
|
||||||
files = make(map[string]Object)
|
|
||||||
normalised := make(map[string]struct{})
|
|
||||||
err = readFilesFn(fs, includeAll, dir, func(o Object) error {
|
|
||||||
remote := o.Remote()
|
|
||||||
normalisedRemote := strings.ToLower(norm.NFC.String(remote))
|
|
||||||
if _, ok := files[remote]; !ok {
|
|
||||||
files[remote] = o
|
|
||||||
if _, ok := normalised[normalisedRemote]; ok {
|
|
||||||
Logf(o, "File found with same name but different case on %v", o.Fs())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Logf(o, "Duplicate file detected")
|
|
||||||
}
|
|
||||||
normalised[normalisedRemote] = struct{}{}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
err = errors.Wrapf(err, "error listing: %s", fs)
|
|
||||||
}
|
|
||||||
return files, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// readFilesMaps runs readFilesMap on fdst and fsrc at the same time
|
|
||||||
// dir is the start directory, "" for root
|
|
||||||
func readFilesMaps(fdst Fs, fdstIncludeAll bool, fsrc Fs, fsrcIncludeAll bool, dir string) (dstFiles, srcFiles map[string]Object, err error) {
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
var srcErr, dstErr error
|
|
||||||
|
|
||||||
list := func(fs Fs, includeAll bool, pMap *map[string]Object, pErr *error) {
|
|
||||||
defer wg.Done()
|
|
||||||
Infof(fs, "Building file list")
|
|
||||||
files, listErr := readFilesMap(fs, includeAll, dir)
|
|
||||||
if listErr != nil {
|
|
||||||
Errorf(fs, "Error building file list: %v", listErr)
|
|
||||||
*pErr = listErr
|
|
||||||
} else {
|
|
||||||
Debugf(fs, "Done building file list")
|
|
||||||
*pMap = files
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
wg.Add(2)
|
|
||||||
go list(fdst, fdstIncludeAll, &dstFiles, &srcErr)
|
|
||||||
go list(fsrc, fsrcIncludeAll, &srcFiles, &dstErr)
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
if srcErr != nil {
|
|
||||||
err = srcErr
|
|
||||||
}
|
|
||||||
if dstErr != nil {
|
|
||||||
err = dstErr
|
|
||||||
}
|
|
||||||
return dstFiles, srcFiles, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// SameConfig returns true if fdst and fsrc are using the same config
|
// SameConfig returns true if fdst and fsrc are using the same config
|
||||||
// file entry
|
// file entry
|
||||||
func SameConfig(fdst, fsrc Info) bool {
|
func SameConfig(fdst, fsrc Info) bool {
|
||||||
@ -795,6 +732,107 @@ func checkIdentical(dst, src Object) (differ bool, noHash bool) {
|
|||||||
return false, false
|
return false, false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checkFn is the the type of the checking function used in CheckFn()
|
||||||
|
type checkFn func(a, b Object) (differ bool, noHash bool)
|
||||||
|
|
||||||
|
// checkMarch is used to march over two Fses in the same way as
|
||||||
|
// sync/copy
|
||||||
|
type checkMarch struct {
|
||||||
|
fdst, fsrc Fs
|
||||||
|
check checkFn
|
||||||
|
differences int32
|
||||||
|
noHashes int32
|
||||||
|
srcFilesMissing int32
|
||||||
|
dstFilesMissing int32
|
||||||
|
}
|
||||||
|
|
||||||
|
// DstOnly have an object which is in the destination only
|
||||||
|
func (c *checkMarch) DstOnly(dst DirEntry) (recurse bool) {
|
||||||
|
switch dst.(type) {
|
||||||
|
case Object:
|
||||||
|
Stats.Error()
|
||||||
|
Errorf(dst, "File not in %v", c.fsrc)
|
||||||
|
atomic.AddInt32(&c.differences, 1)
|
||||||
|
atomic.AddInt32(&c.srcFilesMissing, 1)
|
||||||
|
case Directory:
|
||||||
|
// Do the same thing to the entire contents of the directory
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
panic("Bad object in DirEntries")
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// SrcOnly have an object which is in the source only
|
||||||
|
func (c *checkMarch) SrcOnly(src DirEntry) (recurse bool) {
|
||||||
|
switch src.(type) {
|
||||||
|
case Object:
|
||||||
|
Stats.Error()
|
||||||
|
Errorf(src, "File not in %v", c.fdst)
|
||||||
|
atomic.AddInt32(&c.differences, 1)
|
||||||
|
atomic.AddInt32(&c.dstFilesMissing, 1)
|
||||||
|
case Directory:
|
||||||
|
// Do the same thing to the entire contents of the directory
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
panic("Bad object in DirEntries")
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// check to see if two objects are identical using the check function
|
||||||
|
func (c *checkMarch) checkIdentical(dst, src Object) (differ bool, noHash bool) {
|
||||||
|
Stats.Checking(src.Remote())
|
||||||
|
defer Stats.DoneChecking(src.Remote())
|
||||||
|
if !Config.IgnoreSize && src.Size() != dst.Size() {
|
||||||
|
Stats.Error()
|
||||||
|
Errorf(src, "Sizes differ")
|
||||||
|
return true, false
|
||||||
|
}
|
||||||
|
if Config.SizeOnly {
|
||||||
|
return false, false
|
||||||
|
}
|
||||||
|
return c.check(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match is called when src and dst are present, so sync src to dst
|
||||||
|
func (c *checkMarch) Match(dst, src DirEntry) (recurse bool) {
|
||||||
|
switch srcX := src.(type) {
|
||||||
|
case Object:
|
||||||
|
dstX, ok := dst.(Object)
|
||||||
|
if ok {
|
||||||
|
differ, noHash := c.checkIdentical(dstX, srcX)
|
||||||
|
if differ {
|
||||||
|
atomic.AddInt32(&c.differences, 1)
|
||||||
|
} else {
|
||||||
|
Debugf(dstX, "OK")
|
||||||
|
}
|
||||||
|
if noHash {
|
||||||
|
atomic.AddInt32(&c.noHashes, 1)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Stats.Error()
|
||||||
|
Errorf(src, "is file on %v but directory on %v", c.fsrc, c.fdst)
|
||||||
|
atomic.AddInt32(&c.differences, 1)
|
||||||
|
atomic.AddInt32(&c.dstFilesMissing, 1)
|
||||||
|
}
|
||||||
|
case Directory:
|
||||||
|
// Do the same thing to the entire contents of the directory
|
||||||
|
_, ok := dst.(Directory)
|
||||||
|
if ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
Stats.Error()
|
||||||
|
Errorf(dst, "is file on %v but directory on %v", c.fdst, c.fsrc)
|
||||||
|
atomic.AddInt32(&c.differences, 1)
|
||||||
|
atomic.AddInt32(&c.srcFilesMissing, 1)
|
||||||
|
|
||||||
|
default:
|
||||||
|
panic("Bad object in DirEntries")
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// CheckFn checks the files in fsrc and fdst according to Size and
|
// CheckFn checks the files in fsrc and fdst according to Size and
|
||||||
// hash using checkFunction on each file to check the hashes.
|
// hash using checkFunction on each file to check the hashes.
|
||||||
//
|
//
|
||||||
@ -802,91 +840,31 @@ func checkIdentical(dst, src Object) (differ bool, noHash bool) {
|
|||||||
//
|
//
|
||||||
// it returns true if differences were found
|
// it returns true if differences were found
|
||||||
// it also returns whether it couldn't be hashed
|
// it also returns whether it couldn't be hashed
|
||||||
func CheckFn(fdst, fsrc Fs, checkFunction func(a, b Object) (differ bool, noHash bool)) error {
|
func CheckFn(fdst, fsrc Fs, check checkFn) error {
|
||||||
dstFiles, srcFiles, err := readFilesMaps(fdst, false, fsrc, false, "")
|
c := &checkMarch{
|
||||||
if err != nil {
|
fdst: fdst,
|
||||||
return err
|
fsrc: fsrc,
|
||||||
}
|
check: check,
|
||||||
differences := int32(0)
|
|
||||||
noHashes := int32(0)
|
|
||||||
|
|
||||||
// FIXME could do this as it goes along and make it use less
|
|
||||||
// memory.
|
|
||||||
|
|
||||||
// Move all the common files into commonFiles and delete then
|
|
||||||
// from srcFiles and dstFiles
|
|
||||||
commonFiles := make(map[string][2]Object)
|
|
||||||
for remote, src := range srcFiles {
|
|
||||||
if dst, ok := dstFiles[remote]; ok {
|
|
||||||
commonFiles[remote] = [2]Object{dst, src}
|
|
||||||
delete(srcFiles, remote)
|
|
||||||
delete(dstFiles, remote)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Logf(fdst, "%d files not in %v", len(dstFiles), fsrc)
|
|
||||||
for _, dst := range dstFiles {
|
|
||||||
Stats.Error()
|
|
||||||
Errorf(dst, "File not in %v", fsrc)
|
|
||||||
atomic.AddInt32(&differences, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
Logf(fsrc, "%d files not in %s", len(srcFiles), fdst)
|
|
||||||
for _, src := range srcFiles {
|
|
||||||
Stats.Error()
|
|
||||||
Errorf(src, "File not in %v", fdst)
|
|
||||||
atomic.AddInt32(&differences, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
checks := make(chan [2]Object, Config.Transfers)
|
|
||||||
go func() {
|
|
||||||
for _, check := range commonFiles {
|
|
||||||
checks <- check
|
|
||||||
}
|
|
||||||
close(checks)
|
|
||||||
}()
|
|
||||||
|
|
||||||
checkIdentical := func(dst, src Object) (differ bool, noHash bool) {
|
|
||||||
Stats.Checking(src.Remote())
|
|
||||||
defer Stats.DoneChecking(src.Remote())
|
|
||||||
if !Config.IgnoreSize && src.Size() != dst.Size() {
|
|
||||||
Stats.Error()
|
|
||||||
Errorf(src, "Sizes differ")
|
|
||||||
return true, false
|
|
||||||
}
|
|
||||||
if Config.SizeOnly {
|
|
||||||
return false, false
|
|
||||||
}
|
|
||||||
return checkFunction(dst, src)
|
|
||||||
}
|
|
||||||
|
|
||||||
var checkerWg sync.WaitGroup
|
|
||||||
checkerWg.Add(Config.Checkers)
|
|
||||||
for i := 0; i < Config.Checkers; i++ {
|
|
||||||
go func() {
|
|
||||||
defer checkerWg.Done()
|
|
||||||
for check := range checks {
|
|
||||||
differ, noHash := checkIdentical(check[0], check[1])
|
|
||||||
if differ {
|
|
||||||
atomic.AddInt32(&differences, 1)
|
|
||||||
} else {
|
|
||||||
Debugf(check[0], "OK")
|
|
||||||
}
|
|
||||||
if noHash {
|
|
||||||
atomic.AddInt32(&noHashes, 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set up a march over fdst and fsrc
|
||||||
|
m := newMarch(context.Background(), fdst, fsrc, "", c)
|
||||||
Infof(fdst, "Waiting for checks to finish")
|
Infof(fdst, "Waiting for checks to finish")
|
||||||
checkerWg.Wait()
|
m.run()
|
||||||
Logf(fdst, "%d differences found", Stats.GetErrors())
|
|
||||||
if noHashes > 0 {
|
if c.dstFilesMissing > 0 {
|
||||||
Logf(fdst, "%d hashes could not be checked", noHashes)
|
Logf(fdst, "%d files missing", c.dstFilesMissing)
|
||||||
}
|
}
|
||||||
if differences > 0 {
|
if c.srcFilesMissing > 0 {
|
||||||
return errors.Errorf("%d differences found", differences)
|
Logf(fsrc, "%d files missing", c.srcFilesMissing)
|
||||||
|
}
|
||||||
|
|
||||||
|
Logf(fdst, "%d differences found", Stats.GetErrors())
|
||||||
|
if c.noHashes > 0 {
|
||||||
|
Logf(fdst, "%d hashes could not be checked", c.noHashes)
|
||||||
|
}
|
||||||
|
if c.differences > 0 {
|
||||||
|
return errors.Errorf("%d differences found", c.differences)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user