mirror of
https://github.com/rclone/rclone
synced 2025-01-02 02:26:24 +01:00
jottacloud: docs, fixes and tests for MD5 calculation
* Add docs for --jottacloud-md5-memory-limit * Factor out readMD5 function and add tests * Fix accounting * Make sure temp file is deleted at the start (not Windows)
This commit is contained in:
parent
ee4485a316
commit
f9cf70e3aa
@ -17,6 +17,7 @@ import (
|
||||
|
||||
"github.com/ncw/rclone/backend/jottacloud/api"
|
||||
"github.com/ncw/rclone/fs"
|
||||
"github.com/ncw/rclone/fs/accounting"
|
||||
"github.com/ncw/rclone/fs/config"
|
||||
"github.com/ncw/rclone/fs/config/configmap"
|
||||
"github.com/ncw/rclone/fs/config/configstruct"
|
||||
@ -67,7 +68,7 @@ func init() {
|
||||
}},
|
||||
}, {
|
||||
Name: "md5_memory_limit",
|
||||
Help: "Files with a size bigger than this value will be cached on disk otherwise they just read into memory ",
|
||||
Help: "Files bigger than this will be cached on disk to calculate the MD5 if required.",
|
||||
Default: fs.SizeSuffix(10 * 1024 * 1024),
|
||||
Advanced: true,
|
||||
}},
|
||||
@ -753,66 +754,85 @@ func (o *Object) Open(options ...fs.OpenOption) (in io.ReadCloser, err error) {
|
||||
return resp.Body, err
|
||||
}
|
||||
|
||||
// Read the md5 of in returning a reader which will read the same contents
|
||||
//
|
||||
// The cleanup function should be called when out is finished with
|
||||
// regardless of whether this function returned an error or not.
|
||||
func readMD5(in io.Reader, size, threshold int64) (md5sum string, out io.Reader, cleanup func(), err error) {
|
||||
// we need a MD5
|
||||
md5Hasher := md5.New()
|
||||
// use the teeReader to write to the local file AND caclulate the MD5 while doing so
|
||||
teeReader := io.TeeReader(in, md5Hasher)
|
||||
|
||||
// nothing to clean up by default
|
||||
cleanup = func() {}
|
||||
|
||||
// don't cache small files on disk to reduce wear of the disk
|
||||
if size > threshold {
|
||||
var tempFile *os.File
|
||||
|
||||
// create the cache file
|
||||
tempFile, err = ioutil.TempFile("", cachePrefix)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
_ = os.Remove(tempFile.Name()) // Delete the file - may not work on Windows
|
||||
|
||||
// clean up the file after we are done downloading
|
||||
cleanup = func() {
|
||||
// the file should normally already be close, but just to make sure
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already
|
||||
}
|
||||
|
||||
// copy the ENTIRE file to disc and calculate the MD5 in the process
|
||||
if _, err = io.Copy(tempFile, teeReader); err != nil {
|
||||
return
|
||||
}
|
||||
// jump to the start of the local file so we can pass it along
|
||||
if _, err = tempFile.Seek(0, 0); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// replace the already read source with a reader of our cached file
|
||||
out = tempFile
|
||||
} else {
|
||||
// that's a small file, just read it into memory
|
||||
var inData []byte
|
||||
inData, err = ioutil.ReadAll(teeReader)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// set the reader to our read memory block
|
||||
out = bytes.NewReader(inData)
|
||||
}
|
||||
return hex.EncodeToString(md5Hasher.Sum(nil)), out, cleanup, nil
|
||||
}
|
||||
|
||||
// Update the object with the contents of the io.Reader, modTime and size
|
||||
//
|
||||
// If existing is set then it updates the object rather than creating a new one
|
||||
//
|
||||
// The new object may have been created if an error is returned
|
||||
func (o *Object) Update(in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) {
|
||||
size := src.Size()
|
||||
md5String, err := src.Hash(hash.MD5)
|
||||
if err != nil || md5String == "" {
|
||||
// if the source can't provide a MD5 hash we are screwed and need to calculate it ourselfs
|
||||
// we read the entire file and cache it in the localdir and send it AFTERwards
|
||||
|
||||
// we need a MD5
|
||||
md5Hasher := md5.New()
|
||||
// use the teeReader to write to the local file AND caclulate the MD5 while doing so
|
||||
teeReader := io.TeeReader(in, md5Hasher)
|
||||
|
||||
// don't cache small files on disk to reduce wear of the disk
|
||||
if src.Size() > int64(o.fs.opt.MD5MemoryThreshold) {
|
||||
// create the cache file
|
||||
tempFile, err := ioutil.TempFile("", cachePrefix)
|
||||
// unwrap the accounting from the input, we use wrap to put it
|
||||
// back on after the buffering
|
||||
var wrap accounting.WrapFn
|
||||
in, wrap = accounting.UnWrap(in)
|
||||
var cleanup func()
|
||||
md5String, in, cleanup, err = readMD5(in, size, int64(o.fs.opt.MD5MemoryThreshold))
|
||||
defer cleanup()
|
||||
if err != nil {
|
||||
return err
|
||||
return errors.Wrap(err, "failed to calculate MD5")
|
||||
}
|
||||
|
||||
// clean up the file after we are done downloading
|
||||
defer func() {
|
||||
// the file should normally already be close, but just to make sure
|
||||
_ = tempFile.Close()
|
||||
// delete the cache file after we are done
|
||||
_ = os.Remove(tempFile.Name())
|
||||
}()
|
||||
|
||||
// reade the ENTIRE file to disc and calculate the MD5 in the process
|
||||
if _, err = io.Copy(tempFile, teeReader); err != nil {
|
||||
return err
|
||||
// Wrap the accounting back onto the stream
|
||||
in = wrap(in)
|
||||
}
|
||||
// jump to the start of the local file so we can pass it along
|
||||
if _, err = tempFile.Seek(0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// replace the already read source with a reader of our cached file
|
||||
in = tempFile
|
||||
} else {
|
||||
// that's a small file, just read it into memory
|
||||
var inData []byte
|
||||
inData, err = ioutil.ReadAll(teeReader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// set the reader to our read memory block
|
||||
in = bytes.NewReader(inData)
|
||||
}
|
||||
|
||||
// YEAH, the thing we need the MD5 string
|
||||
md5String = hex.EncodeToString(md5Hasher.Sum(nil))
|
||||
}
|
||||
|
||||
size := src.Size()
|
||||
|
||||
var resp *http.Response
|
||||
var result api.JottaFile
|
||||
|
67
backend/jottacloud/jottacloud_internal_test.go
Normal file
67
backend/jottacloud/jottacloud_internal_test.go
Normal file
@ -0,0 +1,67 @@
|
||||
package jottacloud
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// A test reader to return a test pattern of size
|
||||
type testReader struct {
|
||||
size int64
|
||||
c byte
|
||||
}
|
||||
|
||||
// Reader is the interface that wraps the basic Read method.
|
||||
func (r *testReader) Read(p []byte) (n int, err error) {
|
||||
for i := range p {
|
||||
if r.size <= 0 {
|
||||
return n, io.EOF
|
||||
}
|
||||
p[i] = r.c
|
||||
r.c = (r.c + 1) % 253
|
||||
r.size--
|
||||
n++
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func TestReadMD5(t *testing.T) {
|
||||
// smoke test the reader
|
||||
b, err := ioutil.ReadAll(&testReader{size: 10})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, b)
|
||||
|
||||
// Check readMD5 for different size and threshold
|
||||
for _, size := range []int64{0, 1024, 10 * 1024, 100 * 1024} {
|
||||
t.Run(fmt.Sprintf("%d", size), func(t *testing.T) {
|
||||
hasher := md5.New()
|
||||
n, err := io.Copy(hasher, &testReader{size: size})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, n, size)
|
||||
wantMD5 := fmt.Sprintf("%x", hasher.Sum(nil))
|
||||
for _, threshold := range []int64{512, 1024, 10 * 1024, 20 * 1024} {
|
||||
t.Run(fmt.Sprintf("%d", threshold), func(t *testing.T) {
|
||||
in := &testReader{size: size}
|
||||
gotMD5, out, cleanup, err := readMD5(in, size, threshold)
|
||||
defer cleanup()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, wantMD5, gotMD5)
|
||||
|
||||
// check md5hash of out
|
||||
hasher := md5.New()
|
||||
n, err := io.Copy(hasher, out)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, n, size)
|
||||
outMD5 := fmt.Sprintf("%x", hasher.Sum(nil))
|
||||
assert.Equal(t, wantMD5, outMD5)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
@ -91,6 +91,12 @@ not.
|
||||
Jottacloud supports MD5 type hashes, so you can use the `--checksum`
|
||||
flag.
|
||||
|
||||
Note that Jottacloud requires the MD5 hash before upload so if the
|
||||
source does not have an MD5 checksum then the file will be cached
|
||||
temporarily on disk (wherever the `TMPDIR` environment variable points
|
||||
to) before it is uploaded. Small files will be cached in memory - see
|
||||
the `--jottacloud-md5-memory-limit` flag.
|
||||
|
||||
### Deleting files ###
|
||||
|
||||
Any files you delete with rclone will end up in the trash. Due to a lack of API documentation emptying the trash is currently only possible via the Jottacloud website.
|
||||
@ -108,6 +114,16 @@ There are quite a few characters that can't be in Jottacloud file names. Rclone
|
||||
|
||||
Jottacloud only supports filenames up to 255 characters in length.
|
||||
|
||||
### Specific options ###
|
||||
|
||||
Here are the command line options specific to this cloud storage
|
||||
system.
|
||||
|
||||
#### --jottacloud-md5-memory-limit SizeSuffix
|
||||
|
||||
Files bigger than this will be cached on disk to calculate the MD5 if
|
||||
required. (default 10M)
|
||||
|
||||
### Troubleshooting ###
|
||||
|
||||
Jottacloud exhibits some inconsistent behaviours regarding deleted files and folders which may cause Copy, Move and DirMove operations to previously deleted paths to fail. Emptying the trash should help in such cases.
|
Loading…
Reference in New Issue
Block a user