1
mirror of https://github.com/rclone/rclone synced 2025-01-17 22:27:30 +01:00

dropbox: remove datastore - Fixes #55 #84

This means that dropbox no longer stores MD5SUMs and modified times.

Fixup the tests so that blank MD5SUMs are ignored, and that if
Precision is set to a fs.ModTimeNotSupported, ModTimes can be ignored too.

This opens the door for other FSs which don't support metadata easily.
This commit is contained in:
Nick Craig-Wood 2015-08-16 23:24:34 +01:00
parent 337110b7a0
commit 1d9e76bb0f
7 changed files with 91 additions and 299 deletions

View File

@ -71,22 +71,12 @@ To copy a local directory to a dropbox directory called backup
rclone copy /home/source remote:backup
### Modified time ###
### Modified time and MD5SUMs ###
Md5sums and timestamps in RFC3339 format accurate to 1ns are stored in
a Dropbox datastore called "rclone".
Dropbox doesn't have the capability of storing modification times or
MD5SUMs so syncs will effectively have the `--size-only` flag set.
### Limitations ###
Dropbox datastores are limited to 100,000 rows so this is the maximum
number of files rclone can manage on Dropbox.
Note that Dropbox is case sensitive so you can't have a file called
"Hello.doc" and one called "hello.doc".
If you use the desktop sync tool and rclone on the same files then the
md5sums and modification times may get out of sync as far as rclone is
concerned. This will cause `Corrupted on transfer: md5sums differ`
error message when fetching files. You can work around this by using
the `--size-only` flag to ignore the md5sums and modification times
for these files.

View File

@ -6,11 +6,6 @@ Limitations of dropbox
File system is case insensitive
The datastore is limited to 100,000 records which therefore is the
limit of the number of files that rclone can use on dropbox.
FIXME only open datastore if we need it?
FIXME Getting this sometimes
Failed to copy: Upload failed: invalid character '<' looking for beginning of value
This is a JSON decode error - from Update / UploadByChunk
@ -38,10 +33,10 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"path"
"strings"
"sync"
"time"
"github.com/ncw/rclone/fs"
@ -50,17 +45,10 @@ import (
// Constants
const (
rcloneAppKey = "5jcck7diasz0rqy"
rcloneAppSecret = "1n9m04y2zx7bf26"
uploadChunkSize = 64 * 1024 // chunk size for upload
metadataLimit = dropbox.MetadataLimitDefault // max items to fetch at once
datastoreName = "rclone"
tableName = "metadata"
md5sumField = "md5sum"
mtimeField = "mtime"
maxCommitRetries = 5
timeFormatIn = time.RFC3339
timeFormatOut = "2006-01-02T15:04:05.000000000Z07:00"
rcloneAppKey = "5jcck7diasz0rqy"
rcloneAppSecret = "1n9m04y2zx7bf26"
uploadChunkSize = 64 * 1024 // chunk size for upload
metadataLimit = dropbox.MetadataLimitDefault // max items to fetch at once
)
// Register with Fs
@ -111,24 +99,19 @@ func configHelper(name string) {
// FsDropbox represents a remote dropbox server
type FsDropbox struct {
db *dropbox.Dropbox // the connection to the dropbox server
root string // the path we are working on
slashRoot string // root with "/" prefix, lowercase
slashRootSlash string // root with "/" prefix and postfix, lowercase
datastoreManager *dropbox.DatastoreManager
datastore *dropbox.Datastore
table *dropbox.Table
datastoreMutex sync.Mutex // lock this when using the datastore
datastoreErr error // pending errors on the datastore
db *dropbox.Dropbox // the connection to the dropbox server
root string // the path we are working on
slashRoot string // root with "/" prefix, lowercase
slashRootSlash string // root with "/" prefix and postfix, lowercase
}
// FsObjectDropbox describes a dropbox object
type FsObjectDropbox struct {
dropbox *FsDropbox // what this object is part of
remote string // The remote path
md5sum string // md5sum of the object
bytes int64 // size of the object
modTime time.Time // time it was last modified
dropbox *FsDropbox // what this object is part of
remote string // The remote path
bytes int64 // size of the object
modTime time.Time // time it was last modified
hasMetadata bool // metadata is valid
}
// ------------------------------------------------------------
@ -170,12 +153,6 @@ func NewFs(name, root string) (fs.Fs, error) {
// Authorize the client
db.SetAccessToken(token)
// Make a db to store rclone metadata in
f.datastoreManager = db.NewDatastoreManager()
// Open the datastore in the background
go f.openDataStore()
// See if the root is actually an object
entry, err := f.db.Metadata(f.slashRoot, false, false, "", "", metadataLimit)
if err == nil && !entry.IsDir {
@ -205,30 +182,6 @@ func (f *FsDropbox) setRoot(root string) {
}
}
// Opens the datastore in f
func (f *FsDropbox) openDataStore() {
f.datastoreMutex.Lock()
defer f.datastoreMutex.Unlock()
fs.Debug(f, "Open rclone datastore")
// Open the rclone datastore
var err error
f.datastore, err = f.datastoreManager.OpenDatastore(datastoreName)
if err != nil {
fs.Log(f, "Failed to open datastore: %v", err)
f.datastoreErr = err
return
}
// Get the table we are using
f.table, err = f.datastore.GetTable(tableName)
if err != nil {
fs.Log(f, "Failed to open datastore table: %v", err)
f.datastoreErr = err
return
}
fs.Debug(f, "Open rclone datastore finished")
}
// Return an FsObject from a path
//
// May return nil if an error occurred
@ -295,13 +248,6 @@ func (f *FsDropbox) list(out fs.ObjectsChan) {
entry := deltaEntry.Entry
if entry == nil {
// This notifies of a deleted object
fs.Debug(f, "Deleting metadata for %q", deltaEntry.Path)
key := metadataKey(deltaEntry.Path) // Path is lowercased
err := f.deleteMetadata(key)
if err != nil {
fs.Debug(f, "Failed to delete metadata for %q", deltaEntry.Path)
// Don't accumulate Error here
}
} else {
if len(entry.Path) <= 1 || entry.Path[0] != '/' {
fs.Stats.Error()
@ -452,8 +398,8 @@ func (f *FsDropbox) Rmdir() error {
}
// Return the precision
func (fs *FsDropbox) Precision() time.Duration {
return time.Nanosecond
func (f *FsDropbox) Precision() time.Duration {
return fs.ModTimeNotSupported
}
// Purge deletes all the files and the container
@ -462,81 +408,11 @@ func (fs *FsDropbox) Precision() time.Duration {
// deleting all the files quicker than just running Remove() on the
// result of List()
func (f *FsDropbox) Purge() error {
// Delete metadata first
var wg sync.WaitGroup
to_be_deleted := f.List()
wg.Add(fs.Config.Transfers)
for i := 0; i < fs.Config.Transfers; i++ {
go func() {
defer wg.Done()
for dst := range to_be_deleted {
o := dst.(*FsObjectDropbox)
o.deleteMetadata()
}
}()
}
wg.Wait()
// Let dropbox delete the filesystem tree
_, err := f.db.Delete(f.slashRoot)
return err
}
// Tries the transaction in fn then calls commit, repeating until retry limit
//
// Holds datastore mutex while in progress
func (f *FsDropbox) transaction(fn func() error) error {
f.datastoreMutex.Lock()
defer f.datastoreMutex.Unlock()
if f.datastoreErr != nil {
return f.datastoreErr
}
var err error
for i := 1; i <= maxCommitRetries; i++ {
err = fn()
if err != nil {
return err
}
err = f.datastore.Commit()
if err == nil {
break
}
fs.Debug(f, "Retrying transaction %d/%d", i, maxCommitRetries)
}
if err != nil {
return fmt.Errorf("Failed to commit metadata changes: %s", err)
}
return nil
}
// Deletes the medadata associated with this key
func (f *FsDropbox) deleteMetadata(key string) error {
return f.transaction(func() error {
record, err := f.table.Get(key)
if err != nil {
return fmt.Errorf("Couldn't get record: %s", err)
}
if record == nil {
return nil
}
record.DeleteRecord()
return nil
})
}
// Reads the record attached to key
//
// Holds datastore mutex while in progress
func (f *FsDropbox) readRecord(key string) (*dropbox.Record, error) {
f.datastoreMutex.Lock()
defer f.datastoreMutex.Unlock()
if f.datastoreErr != nil {
return nil, f.datastoreErr
}
return f.table.Get(key)
}
// ------------------------------------------------------------
// Return the parent Fs
@ -558,33 +434,8 @@ func (o *FsObjectDropbox) Remote() string {
}
// Md5sum returns the Md5sum of an object returning a lowercase hex string
//
// FIXME has to download the file!
func (o *FsObjectDropbox) Md5sum() (string, error) {
if o.md5sum != "" {
return o.md5sum, nil
}
err := o.readMetaData()
if err != nil {
fs.Log(o, "Failed to read metadata: %s", err)
return "", fmt.Errorf("Failed to read metadata: %s", err)
}
// For pre-existing files which have no md5sum can read it and set it?
// in, err := o.Open()
// if err != nil {
// return "", err
// }
// defer in.Close()
// hash := md5.New()
// _, err = io.Copy(hash, in)
// if err != nil {
// return "", err
// }
// o.md5sum = fmt.Sprintf("%x", hash.Sum(nil))
return o.md5sum, nil
return "", nil
}
// Size returns the size of an object in bytes
@ -598,6 +449,7 @@ func (o *FsObjectDropbox) Size() int64 {
func (o *FsObjectDropbox) setMetadataFromEntry(info *dropbox.Entry) {
o.bytes = info.Bytes
o.modTime = time.Time(info.ClientMtime)
o.hasMetadata = true
}
// Reads the entry from dropbox
@ -645,55 +497,9 @@ func (o *FsObjectDropbox) metadataKey() string {
// readMetaData gets the info if it hasn't already been fetched
func (o *FsObjectDropbox) readMetaData() (err error) {
if o.md5sum != "" {
if o.hasMetadata {
return nil
}
// fs.Debug(o, "Reading metadata from datastore")
record, err := o.dropbox.readRecord(o.metadataKey())
if err != nil {
fs.Debug(o, "Couldn't read metadata: %s", err)
record = nil
}
if record != nil {
// Read md5sum
md5sumInterface, ok, err := record.Get(md5sumField)
if err != nil {
return err
}
if !ok {
fs.Debug(o, "Couldn't find md5sum in record")
} else {
md5sum, ok := md5sumInterface.(string)
if !ok {
fs.Debug(o, "md5sum not a string")
} else {
o.md5sum = md5sum
}
}
// read mtime
mtimeInterface, ok, err := record.Get(mtimeField)
if err != nil {
return err
}
if !ok {
fs.Debug(o, "Couldn't find mtime in record")
} else {
mtime, ok := mtimeInterface.(string)
if !ok {
fs.Debug(o, "mtime not a string")
} else {
modTime, err := time.Parse(timeFormatIn, mtime)
if err != nil {
return err
}
o.modTime = modTime
}
}
}
// Last resort
return o.readEntryAndSetMetadata()
}
@ -711,59 +517,12 @@ func (o *FsObjectDropbox) ModTime() time.Time {
return o.modTime
}
// Sets the modification time of the local fs object into the record
// FIXME if we don't set md5sum what will that do?
func (o *FsObjectDropbox) setModTimeAndMd5sum(modTime time.Time, md5sum string) error {
key := o.metadataKey()
// fs.Debug(o, "Writing metadata to datastore")
return o.dropbox.transaction(func() error {
record, err := o.dropbox.table.GetOrInsert(key)
if err != nil {
return fmt.Errorf("Couldn't read record: %s", err)
}
if md5sum != "" {
err = record.Set(md5sumField, md5sum)
if err != nil {
return fmt.Errorf("Couldn't set md5sum record: %s", err)
}
o.md5sum = md5sum
}
if !modTime.IsZero() {
mtime := modTime.Format(timeFormatOut)
err := record.Set(mtimeField, mtime)
if err != nil {
return fmt.Errorf("Couldn't set mtime record: %s", err)
}
o.modTime = modTime
}
return nil
})
}
// Deletes the medadata associated with this file
//
// It logs any errors
func (o *FsObjectDropbox) deleteMetadata() {
fs.Debug(o, "Deleting metadata from datastore")
err := o.dropbox.deleteMetadata(o.metadataKey())
if err != nil {
fs.ErrorLog(o, "Error deleting metadata: %v", err)
fs.Stats.Error()
}
}
// Sets the modification time of the local fs object
//
// Commits the datastore
func (o *FsObjectDropbox) SetModTime(modTime time.Time) {
err := o.setModTimeAndMd5sum(modTime, "")
if err != nil {
fs.Stats.Error()
fs.ErrorLog(o, err.Error())
}
// FIXME not implemented
return
}
// Is this object storable
@ -783,22 +542,16 @@ func (o *FsObjectDropbox) Open() (in io.ReadCloser, err error) {
//
// The new object may have been created if an error is returned
func (o *FsObjectDropbox) Update(in io.Reader, modTime time.Time, size int64) error {
// Calculate md5sum as we upload it
hash := md5.New()
rc := &readCloser{in: io.TeeReader(in, hash)}
entry, err := o.dropbox.db.UploadByChunk(rc, uploadChunkSize, o.remotePath(), true, "")
entry, err := o.dropbox.db.UploadByChunk(ioutil.NopCloser(in), uploadChunkSize, o.remotePath(), true, "")
if err != nil {
return fmt.Errorf("Upload failed: %s", err)
}
o.setMetadataFromEntry(entry)
md5sum := fmt.Sprintf("%x", hash.Sum(nil))
return o.setModTimeAndMd5sum(modTime, md5sum)
return nil
}
// Remove an object
func (o *FsObjectDropbox) Remove() error {
o.deleteMetadata()
_, err := o.dropbox.db.Delete(o.remotePath())
return err
}

View File

@ -14,6 +14,8 @@ import (
const (
// User agent for Fs which can set it
UserAgent = "rclone/" + Version
// Very large precision value to show mod time isn't supported
ModTimeNotSupported = 100 * 365 * 24 * time.Hour
)
// Globals
@ -107,9 +109,11 @@ type Object interface {
Remote() string
// Md5sum returns the md5 checksum of the file
// If no Md5sum is available it returns ""
Md5sum() (string, error)
// ModTime returns the modification date of the file
// It should return a best guess if one isn't available
ModTime() time.Time
// SetModTime sets the metadata on the object to set the modification date

View File

@ -27,6 +27,14 @@ func CalculateModifyWindow(fs ...Fs) {
Debug(fs[0], "Modify window is %s\n", Config.ModifyWindow)
}
// Md5sumsEqual checks to see if src == dst, but ignores empty strings
func Md5sumsEqual(src, dst string) bool {
if src == "" || dst == "" {
return true
}
return src == dst
}
// Check the two files to see if the MD5sums are the same
//
// May return an error which will already have been logged
@ -47,7 +55,7 @@ func CheckMd5sums(src, dst Object) (bool, error) {
}
// Debug("Src MD5 %s", srcMd5)
// Debug("Dst MD5 %s", obj.Hash)
return srcMd5 == dstMd5, nil
return Md5sumsEqual(srcMd5, dstMd5), nil
}
// Checks to see if the src and dst objects are equal by looking at
@ -203,7 +211,7 @@ tryAgain:
if md5sumErr != nil {
Stats.Error()
ErrorLog(dst, "Failed to read md5sum: %s", md5sumErr)
} else if dstMd5sum != "" && srcMd5sum != dstMd5sum {
} else if !Md5sumsEqual(srcMd5sum, dstMd5sum) {
Stats.Error()
err = fmt.Errorf("Corrupted on transfer: md5sums differ %q vs %q", srcMd5sum, dstMd5sum)
ErrorLog(dst, "%s", err)
@ -525,7 +533,7 @@ func Md5sum(f Fs, w io.Writer) error {
Stats.DoneChecking(o)
if err != nil {
Debug(o, "Failed to read MD5: %v", err)
md5sum = "UNKNOWN"
md5sum = "ERROR"
}
syncFprintf(w, "%32s %s\n", md5sum, o.Remote())
})

View File

@ -398,14 +398,37 @@ func TestLsLong(t *testing.T) {
t.Fatalf("List failed: %v", err)
}
res := buf.String()
timeFormat := "2006-01-02 15:04:05"
m1 := regexp.MustCompile(`(?m)^ 0 ` + t2.Local().Format(timeFormat) + `\.\d{9} empty space$`)
if !m1.MatchString(res) {
t.Errorf("empty space missing: %q", res)
lines := strings.Split(strings.Trim(res, "\n"), "\n")
if len(lines) != 2 {
t.Fatalf("Wrong number of lines in list: %q", lines)
}
m2 := regexp.MustCompile(`(?m)^ 60 ` + t1.Local().Format(timeFormat) + `\.\d{9} potato2$`)
if !m2.MatchString(res) {
timeFormat := "2006-01-02 15:04:05.000000000"
precision := fremote.Precision()
checkTime := func(m, filename string, expected time.Time) {
modTime, err := time.Parse(timeFormat, m)
if err != nil {
t.Errorf("Error parsing %q: %v", m, err)
} else {
dt, ok := fstest.CheckTimeEqualWithPrecision(expected, modTime, precision)
if !ok {
t.Errorf("%s: Modification time difference too big |%s| > %s (%s vs %s) (precision %s)", filename, dt, precision, modTime, expected, precision)
}
}
}
m1 := regexp.MustCompile(`(?m)^ 0 (\d{4}-\d\d-\d\d \d\d:\d\d:\d\d\.\d{9}) empty space$`)
if ms := m1.FindStringSubmatch(res); ms == nil {
t.Errorf("empty space missing: %q", res)
} else {
checkTime(ms[1], "empty space", t2.Local())
}
m2 := regexp.MustCompile(`(?m)^ 60 (\d{4}-\d\d-\d\d \d\d:\d\d:\d\d\.\d{9}) potato2$`)
if ms := m2.FindStringSubmatch(res); ms == nil {
t.Errorf("potato2 missing: %q", res)
} else {
checkTime(ms[1], "potato2", t1.Local())
}
}
@ -416,10 +439,12 @@ func TestMd5sum(t *testing.T) {
t.Fatalf("List failed: %v", err)
}
res := buf.String()
if !strings.Contains(res, "d41d8cd98f00b204e9800998ecf8427e empty space\n") {
if !strings.Contains(res, "d41d8cd98f00b204e9800998ecf8427e empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "6548b156ea68a4e003e786df99eee76 potato2\n") {
if !strings.Contains(res, "6548b156ea68a4e003e786df99eee76 potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
}

View File

@ -30,10 +30,19 @@ type Item struct {
Size int64
}
// Checks the times are equal within the precision, returns the delta and a flag
func CheckTimeEqualWithPrecision(t0, t1 time.Time, precision time.Duration) (time.Duration, bool) {
dt := t0.Sub(t1)
if dt >= precision || dt <= -precision {
return dt, false
}
return dt, true
}
// check the mod time to the given precision
func (i *Item) CheckModTime(t *testing.T, obj fs.Object, modTime time.Time, precision time.Duration) {
dt := modTime.Sub(i.ModTime)
if dt >= precision || dt <= -precision {
dt, ok := CheckTimeEqualWithPrecision(modTime, i.ModTime, precision)
if !ok {
t.Errorf("%s: Modification time difference too big |%s| > %s (%s vs %s) (precision %s)", obj.Remote(), dt, precision, modTime, i.ModTime, precision)
}
}
@ -47,7 +56,7 @@ func (i *Item) Check(t *testing.T, obj fs.Object, precision time.Duration) {
if err != nil {
t.Fatalf("Failed to read md5sum for %q: %v", obj.Remote(), err)
}
if i.Md5sum != Md5sum {
if !fs.Md5sumsEqual(i.Md5sum, Md5sum) {
t.Errorf("%s: Md5sum incorrect - expecting %q got %q", obj.Remote(), i.Md5sum, Md5sum)
}
if i.Size != obj.Size() {

View File

@ -258,6 +258,9 @@ func TestFsRmdirFull(t *testing.T) {
func TestFsPrecision(t *testing.T) {
skipIfNotOk(t)
precision := remote.Precision()
if precision == fs.ModTimeNotSupported {
return
}
if precision > time.Second || precision < 0 {
t.Fatalf("Precision out of range %v", precision)
}
@ -301,7 +304,7 @@ func TestObjectMd5sum(t *testing.T) {
if err != nil {
t.Errorf("Error in Md5sum: %v", err)
}
if Md5sum != file1.Md5sum {
if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) {
t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum)
}
}
@ -351,7 +354,7 @@ func TestObjectOpen(t *testing.T) {
t.Fatalf("in.Close() return error: %v", err)
}
Md5sum := hex.EncodeToString(hash.Sum(nil))
if Md5sum != file1.Md5sum {
if !fs.Md5sumsEqual(Md5sum, file1.Md5sum) {
t.Errorf("Md5sum is wrong %v != %v", Md5sum, file1.Md5sum)
}
}