diff --git a/backend/jottacloud/jottacloud.go b/backend/jottacloud/jottacloud.go index adccdc633..59a8ba697 100644 --- a/backend/jottacloud/jottacloud.go +++ b/backend/jottacloud/jottacloud.go @@ -17,6 +17,7 @@ import ( "github.com/ncw/rclone/backend/jottacloud/api" "github.com/ncw/rclone/fs" + "github.com/ncw/rclone/fs/accounting" "github.com/ncw/rclone/fs/config" "github.com/ncw/rclone/fs/config/configmap" "github.com/ncw/rclone/fs/config/configstruct" @@ -67,7 +68,7 @@ func init() { }}, }, { Name: "md5_memory_limit", - Help: "Files with a size bigger than this value will be cached on disk otherwise they just read into memory ", + Help: "Files bigger than this will be cached on disk to calculate the MD5 if required.", Default: fs.SizeSuffix(10 * 1024 * 1024), Advanced: true, }}, @@ -753,67 +754,86 @@ func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) { return resp.Body, err } +// Read the md5 of in returning a reader which will read the same contents +// +// The cleanup function should be called when out is finished with +// regardless of whether this function returned an error or not. +func readMD5(in io.Reader, size, threshold int64) (md5sum string, out io.Reader, cleanup func(), err error) { + // we need a MD5 + md5Hasher := md5.New() + // use the teeReader to write to the local file AND caclulate the MD5 while doing so + teeReader := io.TeeReader(in, md5Hasher) + + // nothing to clean up by default + cleanup = func() {} + + // don't cache small files on disk to reduce wear of the disk + if size > threshold { + var tempFile *os.File + + // create the cache file + tempFile, err = ioutil.TempFile("", cachePrefix) + if err != nil { + return + } + + _ = os.Remove(tempFile.Name()) // Delete the file - may not work on Windows + + // clean up the file after we are done downloading + cleanup = func() { + // the file should normally already be close, but just to make sure + _ = tempFile.Close() + _ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already + } + + // copy the ENTIRE file to disc and calculate the MD5 in the process + if _, err = io.Copy(tempFile, teeReader); err != nil { + return + } + // jump to the start of the local file so we can pass it along + if _, err = tempFile.Seek(0, 0); err != nil { + return + } + + // replace the already read source with a reader of our cached file + out = tempFile + } else { + // that's a small file, just read it into memory + var inData []byte + inData, err = ioutil.ReadAll(teeReader) + if err != nil { + return + } + + // set the reader to our read memory block + out = bytes.NewReader(inData) + } + return hex.EncodeToString(md5Hasher.Sum(nil)), out, cleanup, nil +} + // Update the object with the contents of the io.Reader, modTime and size // // If existing is set then it updates the object rather than creating a new one // // The new object may have been created if an error is returned func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) { + size := src.Size() md5String, err := src.Hash(hash.MD5) if err != nil || md5String == "" { - // if the source can't provide a MD5 hash we are screwed and need to calculate it ourselfs - // we read the entire file and cache it in the localdir and send it AFTERwards - - // we need a MD5 - md5Hasher := md5.New() - // use the teeReader to write to the local file AND caclulate the MD5 while doing so - teeReader := io.TeeReader(in, md5Hasher) - - // don't cache small files on disk to reduce wear of the disk - if src.Size() > int64(o.fs.opt.MD5MemoryThreshold) { - // create the cache file - tempFile, err := ioutil.TempFile("", cachePrefix) - if err != nil { - return err - } - - // clean up the file after we are done downloading - defer func() { - // the file should normally already be close, but just to make sure - _ = tempFile.Close() - // delete the cache file after we are done - _ = os.Remove(tempFile.Name()) - }() - - // reade the ENTIRE file to disc and calculate the MD5 in the process - if _, err = io.Copy(tempFile, teeReader); err != nil { - return err - } - // jump to the start of the local file so we can pass it along - if _, err = tempFile.Seek(0, 0); err != nil { - return err - } - - // replace the already read source with a reader of our cached file - in = tempFile - } else { - // that's a small file, just read it into memory - var inData []byte - inData, err = ioutil.ReadAll(teeReader) - if err != nil { - return err - } - - // set the reader to our read memory block - in = bytes.NewReader(inData) + // unwrap the accounting from the input, we use wrap to put it + // back on after the buffering + var wrap accounting.WrapFn + in, wrap = accounting.UnWrap(in) + var cleanup func() + md5String, in, cleanup, err = readMD5(in, size, int64(o.fs.opt.MD5MemoryThreshold)) + defer cleanup() + if err != nil { + return errors.Wrap(err, "failed to calculate MD5") } - - // YEAH, the thing we need the MD5 string - md5String = hex.EncodeToString(md5Hasher.Sum(nil)) + // Wrap the accounting back onto the stream + in = wrap(in) } - size := src.Size() - var resp *http.Response var result api.JottaFile opts := rest.Opts{ diff --git a/backend/jottacloud/jottacloud_internal_test.go b/backend/jottacloud/jottacloud_internal_test.go new file mode 100644 index 000000000..f5fe7b2bd --- /dev/null +++ b/backend/jottacloud/jottacloud_internal_test.go @@ -0,0 +1,67 @@ +package jottacloud + +import ( + "crypto/md5" + "fmt" + "io" + "io/ioutil" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// A test reader to return a test pattern of size +type testReader struct { + size int64 + c byte +} + +// Reader is the interface that wraps the basic Read method. +func (r *testReader) Read(p []byte) (n int, err error) { + for i := range p { + if r.size <= 0 { + return n, io.EOF + } + p[i] = r.c + r.c = (r.c + 1) % 253 + r.size-- + n++ + } + return +} + +func TestReadMD5(t *testing.T) { + // smoke test the reader + b, err := ioutil.ReadAll(&testReader{size: 10}) + require.NoError(t, err) + assert.Equal(t, []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, b) + + // Check readMD5 for different size and threshold + for _, size := range []int64{0, 1024, 10 * 1024, 100 * 1024} { + t.Run(fmt.Sprintf("%d", size), func(t *testing.T) { + hasher := md5.New() + n, err := io.Copy(hasher, &testReader{size: size}) + require.NoError(t, err) + assert.Equal(t, n, size) + wantMD5 := fmt.Sprintf("%x", hasher.Sum(nil)) + for _, threshold := range []int64{512, 1024, 10 * 1024, 20 * 1024} { + t.Run(fmt.Sprintf("%d", threshold), func(t *testing.T) { + in := &testReader{size: size} + gotMD5, out, cleanup, err := readMD5(in, size, threshold) + defer cleanup() + require.NoError(t, err) + assert.Equal(t, wantMD5, gotMD5) + + // check md5hash of out + hasher := md5.New() + n, err := io.Copy(hasher, out) + require.NoError(t, err) + assert.Equal(t, n, size) + outMD5 := fmt.Sprintf("%x", hasher.Sum(nil)) + assert.Equal(t, wantMD5, outMD5) + }) + } + }) + } +} diff --git a/docs/content/jottacloud.md b/docs/content/jottacloud.md index 5a05cd9f0..85c832c60 100644 --- a/docs/content/jottacloud.md +++ b/docs/content/jottacloud.md @@ -91,6 +91,12 @@ not. Jottacloud supports MD5 type hashes, so you can use the `--checksum` flag. +Note that Jottacloud requires the MD5 hash before upload so if the +source does not have an MD5 checksum then the file will be cached +temporarily on disk (wherever the `TMPDIR` environment variable points +to) before it is uploaded. Small files will be cached in memory - see +the `--jottacloud-md5-memory-limit` flag. + ### Deleting files ### Any files you delete with rclone will end up in the trash. Due to a lack of API documentation emptying the trash is currently only possible via the Jottacloud website. @@ -108,6 +114,16 @@ There are quite a few characters that can't be in Jottacloud file names. Rclone Jottacloud only supports filenames up to 255 characters in length. +### Specific options ### + +Here are the command line options specific to this cloud storage +system. + +#### --jottacloud-md5-memory-limit SizeSuffix + +Files bigger than this will be cached on disk to calculate the MD5 if +required. (default 10M) + ### Troubleshooting ### Jottacloud exhibits some inconsistent behaviours regarding deleted files and folders which may cause Copy, Move and DirMove operations to previously deleted paths to fail. Emptying the trash should help in such cases. \ No newline at end of file