From 541929258b81a078dbad56843e27119315eb7ca0 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 13 Feb 2017 10:48:26 +0000 Subject: [PATCH] check: Add --download flag to check all the data, not just hashes --- cmd/check/check.go | 23 ++++-- cmd/cryptcheck/cryptcheck.go | 7 -- fs/operations.go | 131 +++++++++++++++++++++++++++++------ fs/operations_test.go | 118 ++++++++++++++++++++++++++++++- 4 files changed, 245 insertions(+), 34 deletions(-) diff --git a/cmd/check/check.go b/cmd/check/check.go index fc5db7860..a95df902c 100644 --- a/cmd/check/check.go +++ b/cmd/check/check.go @@ -6,24 +6,39 @@ import ( "github.com/spf13/cobra" ) +// Globals +var ( + download = false +) + func init() { cmd.Root.AddCommand(commandDefintion) + commandDefintion.Flags().BoolVarP(&download, "download", "", download, "Check by downloading rather than with hash.") } var commandDefintion = &cobra.Command{ Use: "check source:path dest:path", Short: `Checks the files in the source and destination match.`, Long: ` -Checks the files in the source and destination match. It -compares sizes and MD5SUMs and prints a report of files which -don't match. It doesn't alter the source or destination. +Checks the files in the source and destination match. It compares +sizes and hashes (MD5 or SHA1) and logs a report of files which don't +match. It doesn't alter the source or destination. -` + "`" + `--size-only` + "`" + ` may be used to only compare the sizes, not the MD5SUMs. +If you supply the --size-only flag, it will only compare the sizes not +the hashes as well. Use this for a quick check. + +If you supply the --download flag, it will download the data from +both remotes and check them against each other on the fly. This can +be useful for remotes that don't support hashes or if you really want +to check all the data. `, Run: func(command *cobra.Command, args []string) { cmd.CheckArgs(2, 2, command, args) fsrc, fdst := cmd.NewFsSrcDst(args) cmd.Run(false, false, command, func() error { + if download { + return fs.CheckDownload(fdst, fsrc) + } return fs.Check(fdst, fsrc) }) }, diff --git a/cmd/cryptcheck/cryptcheck.go b/cmd/cryptcheck/cryptcheck.go index c3f89c321..960cbd87a 100644 --- a/cmd/cryptcheck/cryptcheck.go +++ b/cmd/cryptcheck/cryptcheck.go @@ -68,13 +68,6 @@ func cryptCheck(fdst, fsrc fs.Fs) error { // it returns true if differences were found // it also returns whether it couldn't be hashed checkIdentical := func(dst, src fs.Object) (differ bool, noHash bool) { - fs.Stats.Checking(src.Remote()) - defer fs.Stats.DoneChecking(src.Remote()) - if src.Size() != dst.Size() { - fs.Stats.Error() - fs.Errorf(src, "Sizes differ") - return true, false - } cryptDst := dst.(*crypt.Object) underlyingDst := cryptDst.UnWrap() underlyingHash, err := underlyingDst.Hash(hashType) diff --git a/fs/operations.go b/fs/operations.go index 6a149b4f6..320a351fb 100644 --- a/fs/operations.go +++ b/fs/operations.go @@ -3,6 +3,7 @@ package fs import ( + "bytes" "fmt" "io" "log" @@ -669,29 +670,19 @@ func Overlapping(fdst, fsrc Info) bool { // it returns true if differences were found // it also returns whether it couldn't be hashed func checkIdentical(dst, src Object) (differ bool, noHash bool) { - Stats.Checking(src.Remote()) - defer Stats.DoneChecking(src.Remote()) - if src.Size() != dst.Size() { - Stats.Error() - Errorf(src, "Sizes differ") + same, hash, err := CheckHashes(src, dst) + if err != nil { + // CheckHashes will log and count errors return true, false } - if !Config.SizeOnly { - same, hash, err := CheckHashes(src, dst) - if err != nil { - // CheckHashes will log and count errors - return true, false - } - if hash == HashNone { - return false, true - } - if !same { - Stats.Error() - Errorf(src, "%v differ", hash) - return true, false - } + if hash == HashNone { + return false, true + } + if !same { + Stats.Error() + Errorf(src, "%v differ", hash) + return true, false } - Debugf(src, "OK") return false, false } @@ -746,15 +737,31 @@ func CheckFn(fdst, fsrc Fs, checkFunction func(a, b Object) (differ bool, noHash close(checks) }() + checkIdentical := func(dst, src Object) (differ bool, noHash bool) { + Stats.Checking(src.Remote()) + defer Stats.DoneChecking(src.Remote()) + if src.Size() != dst.Size() { + Stats.Error() + Errorf(src, "Sizes differ") + return true, false + } + if Config.SizeOnly { + return false, false + } + return checkFunction(dst, src) + } + var checkerWg sync.WaitGroup checkerWg.Add(Config.Checkers) for i := 0; i < Config.Checkers; i++ { go func() { defer checkerWg.Done() for check := range checks { - differ, noHash := checkFunction(check[0], check[1]) + differ, noHash := checkIdentical(check[0], check[1]) if differ { atomic.AddInt32(&differences, 1) + } else { + Debugf(check[0], "OK") } if noHash { atomic.AddInt32(&noHashes, 1) @@ -780,6 +787,88 @@ func Check(fdst, fsrc Fs) error { return CheckFn(fdst, fsrc, checkIdentical) } +// ReadFill reads as much data from r into buf as it can +// +// It reads until the buffer is full or r.Read returned an error. +// +// This is io.ReadFull but when you just want as much data as +// possible, not an exact size of block. +func ReadFill(r io.Reader, buf []byte) (n int, err error) { + var nn int + for n < len(buf) && err == nil { + nn, err = r.Read(buf[n:]) + n += nn + } + return n, err +} + +// CheckEqualReaders checks to see if in1 and in2 have the same +// content when read. +// +// it returns true if differences were found +func CheckEqualReaders(in1, in2 io.Reader) (differ bool, err error) { + const bufSize = 64 * 1024 + buf1 := make([]byte, bufSize) + buf2 := make([]byte, bufSize) + for { + n1, err1 := ReadFill(in1, buf1) + n2, err2 := ReadFill(in2, buf2) + // check errors + if err1 != nil && err1 != io.EOF { + return true, err1 + } else if err2 != nil && err2 != io.EOF { + return true, err2 + } + // err1 && err2 are nil or io.EOF here + // process the data + if n1 != n2 || !bytes.Equal(buf1[:n1], buf2[:n2]) { + return true, nil + } + // if both streams finished the we have finished + if err1 == io.EOF && err2 == io.EOF { + break + } + } + return false, nil +} + +// CheckIdentical checks to see if dst and src are identical by +// reading all their bytes if necessary. +// +// it returns true if differences were found +func CheckIdentical(dst, src Object) (differ bool, err error) { + in1, err := dst.Open() + if err != nil { + return true, errors.Wrapf(err, "failed to open %q", dst) + } + in1 = NewAccountWithBuffer(in1, dst) // account and buffer the transfer + defer CheckClose(in1, &err) + + in2, err := src.Open() + if err != nil { + return true, errors.Wrapf(err, "failed to open %q", src) + } + in2 = NewAccountWithBuffer(in2, src) // account and buffer the transfer + defer CheckClose(in2, &err) + + return CheckEqualReaders(in1, in2) +} + +// CheckDownload checks the files in fsrc and fdst according to Size +// and the actual contents of the files. +func CheckDownload(fdst, fsrc Fs) error { + check := func(a, b Object) (differ bool, noHash bool) { + differ, err := CheckIdentical(a, b) + if err != nil { + Stats.Error() + Errorf(a, "Failed to download: %v", err) + return true, true + } + return differ, false + } + return CheckFn(fdst, fsrc, check) +} + // ListFn lists the Fs to the supplied function // // Lists in parallel which may get them out of order diff --git a/fs/operations_test.go b/fs/operations_test.go index df98843e7..a95e9c424 100644 --- a/fs/operations_test.go +++ b/fs/operations_test.go @@ -21,8 +21,10 @@ package fs_test import ( "bytes" + "errors" "flag" "fmt" + "io" "io/ioutil" "log" "os" @@ -469,14 +471,14 @@ func TestDelete(t *testing.T) { fstest.CheckItems(t, r.fremote, file3) } -func TestCheck(t *testing.T) { +func testCheck(t *testing.T, checkFunction func(fdst, fsrc fs.Fs) error) { r := NewRun(t) defer r.Finalise() check := func(i int, wantErrors int64) { fs.Debugf(r.fremote, "%d: Starting check test", i) oldErrors := fs.Stats.GetErrors() - err := fs.Check(r.flocal, r.fremote) + err := checkFunction(r.flocal, r.fremote) gotErrors := fs.Stats.GetErrors() - oldErrors if wantErrors == 0 && err != nil { t.Errorf("%d: Got error when not expecting one: %v", i, err) @@ -517,6 +519,14 @@ func TestCheck(t *testing.T) { check(5, 0) } +func TestCheck(t *testing.T) { + testCheck(t, fs.Check) +} + +func TestCheckDownload(t *testing.T) { + testCheck(t, fs.CheckDownload) +} + func TestCheckSizeOnly(t *testing.T) { fs.Config.SizeOnly = true defer func() { fs.Config.SizeOnly = false }() @@ -954,3 +964,107 @@ func TestListDirSorted(t *testing.T) { require.Len(t, items, 1) assert.Equal(t, "sub dir/sub sub dir/", str(0)) } + +type byteReader struct { + c byte +} + +func (br *byteReader) Read(p []byte) (n int, err error) { + if br.c == 0 { + err = io.EOF + } else if len(p) >= 1 { + p[0] = br.c + n = 1 + br.c-- + } + return +} + +func TestReadFill(t *testing.T) { + buf := []byte{9, 9, 9, 9, 9} + + n, err := fs.ReadFill(&byteReader{0}, buf) + assert.Equal(t, io.EOF, err) + assert.Equal(t, 0, n) + assert.Equal(t, []byte{9, 9, 9, 9, 9}, buf) + + n, err = fs.ReadFill(&byteReader{3}, buf) + assert.Equal(t, io.EOF, err) + assert.Equal(t, 3, n) + assert.Equal(t, []byte{3, 2, 1, 9, 9}, buf) + + n, err = fs.ReadFill(&byteReader{8}, buf) + assert.Equal(t, nil, err) + assert.Equal(t, 5, n) + assert.Equal(t, []byte{8, 7, 6, 5, 4}, buf) +} + +type errorReader struct { + err error +} + +func (er errorReader) Read(p []byte) (n int, err error) { + return 0, er.err +} + +func TestCheckEqualReaders(t *testing.T) { + b65a := make([]byte, 65*1024) + b65b := make([]byte, 65*1024) + b65b[len(b65b)-1] = 1 + b66 := make([]byte, 66*1024) + + differ, err := fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65a)) + assert.NoError(t, err) + assert.Equal(t, differ, false) + + differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b65b)) + assert.NoError(t, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), bytes.NewBuffer(b66)) + assert.NoError(t, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), bytes.NewBuffer(b65a)) + assert.NoError(t, err) + assert.Equal(t, differ, true) + + myErr := errors.New("sentinel") + wrap := func(b []byte) io.Reader { + r := bytes.NewBuffer(b) + e := errorReader{myErr} + return io.MultiReader(r, e) + } + + differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65a)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b65b)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(wrap(b65a), bytes.NewBuffer(b66)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(wrap(b66), bytes.NewBuffer(b65a)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65a)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b65b)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b65a), wrap(b66)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) + + differ, err = fs.CheckEqualReaders(bytes.NewBuffer(b66), wrap(b65a)) + assert.Equal(t, myErr, err) + assert.Equal(t, differ, true) +}