2014-03-28 18:56:04 +01:00
// Generic operations on filesystems and objects
package fs
import (
"fmt"
2014-08-01 18:58:39 +02:00
"io"
2016-03-05 17:10:51 +01:00
"log"
2015-03-01 13:38:31 +01:00
"mime"
"path"
2016-03-05 17:10:51 +01:00
"sort"
2016-01-23 21:16:47 +01:00
"strings"
2014-03-28 18:56:04 +01:00
"sync"
2015-10-02 20:48:48 +02:00
"sync/atomic"
2015-06-03 16:08:27 +02:00
"time"
2016-01-23 21:16:47 +01:00
"golang.org/x/text/unicode/norm"
2014-03-28 18:56:04 +01:00
)
2015-09-22 19:47:16 +02:00
// CalculateModifyWindow works out modify window for Fses passed in -
// sets Config.ModifyWindow
2014-03-28 18:56:04 +01:00
//
// This is the largest modify window of all the fses in use, and the
// user configured value
func CalculateModifyWindow ( fs ... Fs ) {
for _ , f := range fs {
if f != nil {
precision := f . Precision ( )
if precision > Config . ModifyWindow {
Config . ModifyWindow = precision
}
2015-08-20 21:48:58 +02:00
if precision == ModTimeNotSupported {
Debug ( f , "Modify window not supported" )
return
}
2014-03-28 18:56:04 +01:00
}
}
2015-08-20 21:48:58 +02:00
Debug ( fs [ 0 ] , "Modify window is %s" , Config . ModifyWindow )
2014-03-28 18:56:04 +01:00
}
2016-01-11 13:39:33 +01:00
// HashEquals checks to see if src == dst, but ignores empty strings
// and returns true if either is empty.
func HashEquals ( src , dst string ) bool {
2015-08-17 00:24:34 +02:00
if src == "" || dst == "" {
return true
}
return src == dst
}
2016-01-11 13:39:33 +01:00
// CheckHashes checks the two files to see if they have common
// known hash types and compares them
2014-03-28 18:56:04 +01:00
//
2016-01-24 19:06:57 +01:00
// Returns
2015-08-20 21:48:58 +02:00
//
2016-01-24 19:06:57 +01:00
// equal - which is equality of the hashes
//
// hash - the HashType. This is HashNone if either of the hashes were
// unset or a compatible hash couldn't be found.
//
// err - may return an error which will already have been logged
2014-03-28 18:56:04 +01:00
//
2015-08-20 21:48:58 +02:00
// If an error is returned it will return equal as false
2016-01-24 19:06:57 +01:00
func CheckHashes ( src , dst Object ) ( equal bool , hash HashType , err error ) {
2016-01-11 13:39:33 +01:00
common := src . Fs ( ) . Hashes ( ) . Overlap ( dst . Fs ( ) . Hashes ( ) )
2016-01-24 19:06:57 +01:00
// Debug(nil, "Shared hashes: %v", common)
2016-01-11 13:39:33 +01:00
if common . Count ( ) == 0 {
2016-01-24 19:06:57 +01:00
return true , HashNone , nil
2016-01-11 13:39:33 +01:00
}
2016-01-24 19:06:57 +01:00
hash = common . GetOne ( )
srcHash , err := src . Hash ( hash )
2014-03-28 18:56:04 +01:00
if err != nil {
Stats . Error ( )
2016-01-11 13:39:33 +01:00
ErrorLog ( src , "Failed to calculate src hash: %s" , err )
2016-01-24 19:06:57 +01:00
return false , hash , err
2015-08-20 21:48:58 +02:00
}
2016-01-11 13:39:33 +01:00
if srcHash == "" {
2016-01-24 19:06:57 +01:00
return true , HashNone , nil
2014-03-28 18:56:04 +01:00
}
2016-01-24 19:06:57 +01:00
dstHash , err := dst . Hash ( hash )
2014-03-28 18:56:04 +01:00
if err != nil {
Stats . Error ( )
2016-01-11 13:39:33 +01:00
ErrorLog ( dst , "Failed to calculate dst hash: %s" , err )
2016-01-24 19:06:57 +01:00
return false , hash , err
2015-08-20 21:48:58 +02:00
}
2016-01-11 13:39:33 +01:00
if dstHash == "" {
2016-01-24 19:06:57 +01:00
return true , HashNone , nil
2014-03-28 18:56:04 +01:00
}
2016-01-24 19:06:57 +01:00
return srcHash == dstHash , hash , nil
2014-03-28 18:56:04 +01:00
}
2015-09-22 19:47:16 +02:00
// Equal checks to see if the src and dst objects are equal by looking at
2016-01-11 13:39:33 +01:00
// size, mtime and hash
2014-03-28 18:56:04 +01:00
//
// If the src and dst size are different then it is considered to be
2015-06-06 09:38:45 +02:00
// not equal. If --size-only is in effect then this is the only check
// that is done.
2014-03-28 18:56:04 +01:00
//
// If the size is the same and the mtime is the same then it is
2015-06-06 09:38:45 +02:00
// considered to be equal. This check is skipped if using --checksum.
2014-03-28 18:56:04 +01:00
//
2015-06-06 09:38:45 +02:00
// If the size is the same and mtime is different, unreadable or
2016-01-11 13:39:33 +01:00
// --checksum is set and the hash is the same then the file is
2015-06-06 09:38:45 +02:00
// considered to be equal. In this case the mtime on the dst is
// updated if --checksum is not set.
2014-03-28 18:56:04 +01:00
//
// Otherwise the file is considered to be not equal including if there
// were errors reading info.
func Equal ( src , dst Object ) bool {
if src . Size ( ) != dst . Size ( ) {
Debug ( src , "Sizes differ" )
return false
}
2015-06-06 09:38:45 +02:00
if Config . SizeOnly {
Debug ( src , "Sizes identical" )
return true
}
2014-03-28 18:56:04 +01:00
2015-06-03 16:08:27 +02:00
var srcModTime time . Time
if ! Config . CheckSum {
2015-08-20 21:48:58 +02:00
if Config . ModifyWindow == ModTimeNotSupported {
Debug ( src , "Sizes identical" )
return true
}
2015-06-03 16:08:27 +02:00
// Size the same so check the mtime
srcModTime = src . ModTime ( )
dstModTime := dst . ModTime ( )
dt := dstModTime . Sub ( srcModTime )
ModifyWindow := Config . ModifyWindow
if dt >= ModifyWindow || dt <= - ModifyWindow {
Debug ( src , "Modification times differ by %s: %v, %v" , dt , srcModTime , dstModTime )
} else {
Debug ( src , "Size and modification time the same (differ by %s, within tolerance %s)" , dt , ModifyWindow )
return true
}
2014-03-28 18:56:04 +01:00
}
// mtime is unreadable or different but size is the same so
2016-01-11 13:39:33 +01:00
// check the hash
2016-01-24 19:06:57 +01:00
same , hash , _ := CheckHashes ( src , dst )
2014-03-28 18:56:04 +01:00
if ! same {
2016-01-11 13:39:33 +01:00
Debug ( src , "Hash differ" )
2014-03-28 18:56:04 +01:00
return false
}
2015-06-03 16:08:27 +02:00
if ! Config . CheckSum {
2016-01-11 13:39:33 +01:00
// Size and hash the same but mtime different so update the
2015-06-03 16:08:27 +02:00
// mtime of the dst object here
2016-03-22 16:07:10 +01:00
err := dst . SetModTime ( srcModTime )
if err != nil {
Stats . Error ( )
ErrorLog ( dst , "Failed to read set modification time: %s" , err )
}
2015-06-03 16:08:27 +02:00
}
2014-03-28 18:56:04 +01:00
2016-01-24 19:06:57 +01:00
if hash == HashNone {
2015-08-20 21:48:58 +02:00
Debug ( src , "Size of src and dst objects identical" )
} else {
2016-01-24 19:06:57 +01:00
Debug ( src , "Size and %v of src and dst objects identical" , hash )
2015-08-20 21:48:58 +02:00
}
2014-03-28 18:56:04 +01:00
return true
}
2015-09-22 19:47:16 +02:00
// MimeType returns a guess at the mime type from the extension
2015-03-01 13:38:31 +01:00
func MimeType ( o Object ) string {
mimeType := mime . TypeByExtension ( path . Ext ( o . Remote ( ) ) )
if mimeType == "" {
mimeType = "application/octet-stream"
}
return mimeType
}
2014-07-15 20:27:05 +02:00
// Used to remove a failed copy
2015-03-14 18:54:41 +01:00
//
// Returns whether the file was succesfully removed or not
func removeFailedCopy ( dst Object ) bool {
if dst == nil {
return false
}
Debug ( dst , "Removing failed copy" )
removeErr := dst . Remove ( )
if removeErr != nil {
Debug ( dst , "Failed to remove failed copy: %s" , removeErr )
return false
2014-07-15 20:27:05 +02:00
}
2015-03-14 18:54:41 +01:00
return true
2014-07-15 20:27:05 +02:00
}
2014-04-18 18:04:21 +02:00
// Copy src object to dst or f if nil
//
// If dst is nil then the object must not exist already. If you do
// call Copy() with dst nil on a pre-existing file then some filing
// systems (eg Drive) may duplicate the file.
func Copy ( f Fs , dst , src Object ) {
2016-01-12 18:38:28 +01:00
maxTries := Config . LowLevelRetries
2015-02-02 18:29:08 +01:00
tries := 0
doUpdate := dst != nil
2015-02-14 19:48:08 +01:00
var err , inErr error
2015-02-02 18:29:08 +01:00
tryAgain :
2015-02-14 19:48:08 +01:00
// Try server side copy first - if has optional interface and
// is same underlying remote
actionTaken := "Copied (server side copy)"
if fCopy , ok := f . ( Copier ) ; ok && src . Fs ( ) . Name ( ) == f . Name ( ) {
var newDst Object
newDst , err = fCopy . Copy ( src , src . Remote ( ) )
if err == nil {
dst = newDst
}
} else {
err = ErrorCantCopy
2014-03-28 18:56:04 +01:00
}
2015-02-14 19:48:08 +01:00
// If can't server side copy, do it manually
if err == ErrorCantCopy {
var in0 io . ReadCloser
in0 , err = src . Open ( )
if err != nil {
Stats . Error ( )
ErrorLog ( src , "Failed to open: %s" , err )
return
}
2015-10-06 16:35:22 +02:00
// On big files add a buffer
if src . Size ( ) > 10 << 20 {
in0 , _ = newAsyncReader ( in0 , 4 , 4 << 20 )
}
2015-09-15 16:46:06 +02:00
in := NewAccount ( in0 , src ) // account the transfer
2014-03-28 18:56:04 +01:00
2015-02-14 19:48:08 +01:00
if doUpdate {
actionTaken = "Copied (updated existing)"
2016-02-18 12:35:25 +01:00
err = dst . Update ( in , src )
2015-02-14 19:48:08 +01:00
} else {
actionTaken = "Copied (new)"
2016-02-18 12:35:25 +01:00
dst , err = f . Put ( in , src )
2015-02-14 19:48:08 +01:00
}
inErr = in . Close ( )
2014-04-18 18:04:21 +02:00
}
2015-02-02 18:29:08 +01:00
// Retry if err returned a retry error
if r , ok := err . ( Retry ) ; ok && r . Retry ( ) && tries < maxTries {
tries ++
2016-01-12 18:38:28 +01:00
Log ( src , "Received error: %v - low level retry %d/%d" , err , tries , maxTries )
2015-03-14 18:54:41 +01:00
if removeFailedCopy ( dst ) {
// If we removed dst, then nil it out and note we are not updating
dst = nil
doUpdate = false
}
2015-02-02 18:29:08 +01:00
goto tryAgain
}
2014-03-28 18:56:04 +01:00
if err == nil {
err = inErr
}
if err != nil {
Stats . Error ( )
2015-08-08 21:10:31 +02:00
ErrorLog ( src , "Failed to copy: %s" , err )
2014-07-15 20:27:05 +02:00
removeFailedCopy ( dst )
2014-03-28 18:56:04 +01:00
return
}
2014-07-15 20:27:05 +02:00
2014-07-19 13:38:58 +02:00
// Verify sizes are the same after transfer
if src . Size ( ) != dst . Size ( ) {
Stats . Error ( )
err = fmt . Errorf ( "Corrupted on transfer: sizes differ %d vs %d" , src . Size ( ) , dst . Size ( ) )
2015-08-08 21:10:31 +02:00
ErrorLog ( dst , "%s" , err )
2014-07-19 13:38:58 +02:00
removeFailedCopy ( dst )
return
}
2016-01-11 13:39:33 +01:00
// Verify hashes are the same after transfer - ignoring blank hashes
// TODO(klauspost): This could be extended, so we always create a hash type matching
// the destination, and calculate it while sending.
common := src . Fs ( ) . Hashes ( ) . Overlap ( dst . Fs ( ) . Hashes ( ) )
2016-01-24 19:06:57 +01:00
// Debug(src, "common hashes: %v", common)
2016-01-11 13:39:33 +01:00
if ! Config . SizeOnly && common . Count ( ) > 0 {
// Get common hash type
hashType := common . GetOne ( )
srcSum , err := src . Hash ( hashType )
if err != nil {
2014-07-15 20:27:05 +02:00
Stats . Error ( )
2016-01-11 13:39:33 +01:00
ErrorLog ( src , "Failed to read src hash: %s" , err )
} else if srcSum != "" {
dstSum , err := dst . Hash ( hashType )
if err != nil {
2015-06-09 14:18:40 +02:00
Stats . Error ( )
2016-01-11 13:39:33 +01:00
ErrorLog ( dst , "Failed to read hash: %s" , err )
} else if ! HashEquals ( srcSum , dstSum ) {
2015-06-09 14:18:40 +02:00
Stats . Error ( )
2016-01-11 13:39:33 +01:00
err = fmt . Errorf ( "Corrupted on transfer: %v hash differ %q vs %q" , hashType , srcSum , dstSum )
2015-08-08 21:10:31 +02:00
ErrorLog ( dst , "%s" , err )
2015-06-09 14:18:40 +02:00
removeFailedCopy ( dst )
return
}
2014-07-15 20:27:05 +02:00
}
}
2014-04-18 18:46:57 +02:00
Debug ( src , actionTaken )
2014-03-28 18:56:04 +01:00
}
// Check to see if src needs to be copied to dst and if so puts it in out
2014-04-18 17:34:59 +02:00
func checkOne ( pair ObjectPair , out ObjectPairChan ) {
src , dst := pair . src , pair . dst
2014-03-28 18:56:04 +01:00
if dst == nil {
2014-04-18 17:34:59 +02:00
Debug ( src , "Couldn't find file - need to transfer" )
out <- pair
2014-03-28 18:56:04 +01:00
return
}
// Check to see if can store this
if ! src . Storable ( ) {
return
}
2016-01-05 11:35:36 +01:00
// If we should ignore existing files, don't transfer
if Config . IgnoreExisting {
Debug ( src , "Destination exists, skipping" )
return
}
2016-02-29 18:46:40 +01:00
// If UpdateOlder is in effect, skip if dst is newer than src
if Config . UpdateOlder {
srcModTime := src . ModTime ( )
dstModTime := dst . ModTime ( )
dt := dstModTime . Sub ( srcModTime )
// If have a mutually agreed precision then use that
modifyWindow := Config . ModifyWindow
if modifyWindow == ModTimeNotSupported {
// Otherwise use 1 second as a safe default as
// the resolution of the time a file was
// uploaded.
modifyWindow = time . Second
}
switch {
case dt >= modifyWindow :
Debug ( src , "Destination is newer than source, skipping" )
return
case dt <= - modifyWindow :
Debug ( src , "Destination is older than source, transferring" )
default :
if src . Size ( ) == dst . Size ( ) {
Debug ( src , "Destination mod time is within %v of source and sizes identical, skipping" , modifyWindow )
return
}
Debug ( src , "Destination mod time is within %v of source but sizes differ, transferring" , modifyWindow )
}
} else {
// Check to see if changed or not
if Equal ( src , dst ) {
Debug ( src , "Unchanged skipping" )
return
}
2014-03-28 18:56:04 +01:00
}
2014-04-18 17:34:59 +02:00
out <- pair
2014-03-28 18:56:04 +01:00
}
2015-09-22 19:47:16 +02:00
// PairChecker reads Objects~s on in send to out if they need transferring.
2014-03-28 18:56:04 +01:00
//
2016-01-11 13:39:33 +01:00
// FIXME potentially doing lots of hashes at once
2014-04-18 17:34:59 +02:00
func PairChecker ( in ObjectPairChan , out ObjectPairChan , wg * sync . WaitGroup ) {
2014-03-28 18:56:04 +01:00
defer wg . Done ( )
for pair := range in {
src := pair . src
Stats . Checking ( src )
2014-04-18 17:34:59 +02:00
checkOne ( pair , out )
2014-03-28 18:56:04 +01:00
Stats . DoneChecking ( src )
}
}
2015-09-22 19:47:16 +02:00
// PairCopier reads Objects on in and copies them.
2015-02-14 19:48:08 +01:00
func PairCopier ( in ObjectPairChan , fdst Fs , wg * sync . WaitGroup ) {
2014-03-28 18:56:04 +01:00
defer wg . Done ( )
2014-04-18 17:34:59 +02:00
for pair := range in {
src := pair . src
2014-03-28 18:56:04 +01:00
Stats . Transferring ( src )
2014-06-26 16:33:06 +02:00
if Config . DryRun {
2016-01-31 16:53:09 +01:00
Log ( src , "Not copying as --dry-run" )
2014-06-26 16:33:06 +02:00
} else {
Copy ( fdst , pair . dst , src )
}
2014-03-28 18:56:04 +01:00
Stats . DoneTransferring ( src )
}
}
2015-09-22 19:47:16 +02:00
// PairMover reads Objects on in and moves them if possible, or copies
// them if not
2015-08-24 22:42:23 +02:00
func PairMover ( in ObjectPairChan , fdst Fs , wg * sync . WaitGroup ) {
defer wg . Done ( )
// See if we have Move available
fdstMover , haveMover := fdst . ( Mover )
for pair := range in {
src := pair . src
dst := pair . dst
Stats . Transferring ( src )
if Config . DryRun {
2016-01-31 16:53:09 +01:00
Log ( src , "Not moving as --dry-run" )
2016-02-25 21:05:34 +01:00
} else if haveMover && src . Fs ( ) . Name ( ) == fdst . Name ( ) {
2015-08-24 22:42:23 +02:00
// Delete destination if it exists
if pair . dst != nil {
err := dst . Remove ( )
if err != nil {
Stats . Error ( )
2015-09-22 08:31:12 +02:00
ErrorLog ( dst , "Couldn't delete: %v" , err )
2015-08-24 22:42:23 +02:00
}
}
2015-09-22 08:31:12 +02:00
_ , err := fdstMover . Move ( src , src . Remote ( ) )
if err != nil {
Stats . Error ( )
ErrorLog ( dst , "Couldn't move: %v" , err )
} else {
Debug ( src , "Moved" )
}
2015-08-24 22:42:23 +02:00
} else {
Copy ( fdst , pair . dst , src )
}
Stats . DoneTransferring ( src )
}
}
2016-03-05 17:10:51 +01:00
// DeleteFile deletes a single file respecting --dry-run and accumulating stats and errors.
func DeleteFile ( dst Object ) {
if Config . DryRun {
Log ( dst , "Not deleting as --dry-run" )
} else {
Stats . Checking ( dst )
err := dst . Remove ( )
Stats . DoneChecking ( dst )
if err != nil {
Stats . Error ( )
ErrorLog ( dst , "Couldn't delete: %s" , err )
} else {
Debug ( dst , "Deleted" )
}
}
}
2015-09-22 19:47:16 +02:00
// DeleteFiles removes all the files passed in the channel
func DeleteFiles ( toBeDeleted ObjectsChan ) {
2014-03-28 18:56:04 +01:00
var wg sync . WaitGroup
wg . Add ( Config . Transfers )
for i := 0 ; i < Config . Transfers ; i ++ {
go func ( ) {
defer wg . Done ( )
2015-09-22 19:47:16 +02:00
for dst := range toBeDeleted {
2016-03-05 17:10:51 +01:00
DeleteFile ( dst )
2014-03-28 18:56:04 +01:00
}
} ( )
}
2014-07-23 00:03:14 +02:00
Log ( nil , "Waiting for deletions to finish" )
2014-03-28 18:56:04 +01:00
wg . Wait ( )
}
2016-01-12 14:33:03 +01:00
// Read a map of Object.Remote to Object for the given Fs.
// If includeAll is specified all files will be added,
// otherwise only files passing the filter will be added.
func readFilesMap ( fs Fs , includeAll bool ) map [ string ] Object {
2015-03-14 18:11:24 +01:00
files := make ( map [ string ] Object )
2016-01-23 21:16:47 +01:00
normalised := make ( map [ string ] struct { } )
2015-03-14 18:11:24 +01:00
for o := range fs . List ( ) {
remote := o . Remote ( )
2016-01-23 21:16:47 +01:00
normalisedRemote := strings . ToLower ( norm . NFC . String ( remote ) )
2015-03-14 18:11:24 +01:00
if _ , ok := files [ remote ] ; ! ok {
2015-11-12 12:46:04 +01:00
// Make sure we don't delete excluded files if not required
2016-01-12 14:33:03 +01:00
if includeAll || Config . Filter . IncludeObject ( o ) {
2015-11-12 12:46:04 +01:00
files [ remote ] = o
2016-01-23 21:16:47 +01:00
if _ , ok := normalised [ normalisedRemote ] ; ok {
Log ( o , "Warning: File found with same name but different case on %v" , o . Fs ( ) )
}
2015-11-12 12:46:04 +01:00
} else {
Debug ( o , "Excluded from sync (and deletion)" )
}
2015-03-14 18:11:24 +01:00
} else {
Log ( o , "Duplicate file detected" )
}
2016-01-23 21:16:47 +01:00
normalised [ normalisedRemote ] = struct { } { }
2015-03-14 18:11:24 +01:00
}
return files
}
2015-09-22 19:47:16 +02:00
// Same returns true if fdst and fsrc point to the same underlying Fs
func Same ( fdst , fsrc Fs ) bool {
2015-09-01 21:50:28 +02:00
return fdst . Name ( ) == fsrc . Name ( ) && fdst . Root ( ) == fsrc . Root ( )
}
2014-03-28 18:56:04 +01:00
// Syncs fsrc into fdst
2014-04-18 17:34:59 +02:00
//
// If Delete is true then it deletes any files in fdst that aren't in fsrc
2015-08-24 22:42:23 +02:00
//
// If DoMove is true then files will be moved instead of copied
func syncCopyMove ( fdst , fsrc Fs , Delete bool , DoMove bool ) error {
2015-09-22 19:47:16 +02:00
if Same ( fdst , fsrc ) {
2015-09-01 21:50:28 +02:00
ErrorLog ( fdst , "Nothing to do as source and destination are the same" )
return nil
}
2016-02-28 20:47:22 +01:00
err := Mkdir ( fdst )
2014-03-28 18:56:04 +01:00
if err != nil {
return err
}
Log ( fdst , "Building file list" )
2016-01-12 14:33:03 +01:00
// Read the files of both source and destination
var listWg sync . WaitGroup
listWg . Add ( 2 )
var dstFiles map [ string ] Object
var srcFiles map [ string ] Object
var srcObjects = make ( ObjectsChan , Config . Transfers )
2016-01-23 19:26:01 +01:00
// Read dst files including excluded files if DeleteExcluded is set
2016-01-12 14:33:03 +01:00
go func ( ) {
dstFiles = readFilesMap ( fdst , Config . Filter . DeleteExcluded )
listWg . Done ( )
} ( )
2016-01-23 19:26:01 +01:00
// Read src file not including excluded files
2016-01-12 14:33:03 +01:00
go func ( ) {
srcFiles = readFilesMap ( fsrc , false )
listWg . Done ( )
for _ , v := range srcFiles {
srcObjects <- v
}
close ( srcObjects )
} ( )
startDeletion := make ( chan struct { } , 0 )
// Delete files if asked
var delWg sync . WaitGroup
delWg . Add ( 1 )
go func ( ) {
if ! Delete {
return
}
defer func ( ) {
Debug ( fdst , "Deletion finished" )
delWg . Done ( )
} ( )
_ = <- startDeletion
Debug ( fdst , "Starting deletion" )
if Stats . Errored ( ) {
ErrorLog ( fdst , "Not deleting files as there were IO errors" )
return
}
// Delete the spare files
toDelete := make ( ObjectsChan , Config . Transfers )
go func ( ) {
for key , fs := range dstFiles {
_ , exists := srcFiles [ key ]
if ! exists {
toDelete <- fs
}
}
close ( toDelete )
} ( )
DeleteFiles ( toDelete )
} ( )
// Wait for all files to be read
listWg . Wait ( )
// Start deleting, unless we must delete after transfer
if Delete && ! Config . DeleteAfter {
close ( startDeletion )
}
// If deletes must finish before starting transfers, we must wait now.
if Delete && Config . DeleteBefore {
Log ( fdst , "Waiting for deletes to finish (before)" )
delWg . Wait ( )
}
2014-03-28 18:56:04 +01:00
// Read source files checking them off against dest files
2015-09-22 19:47:16 +02:00
toBeChecked := make ( ObjectPairChan , Config . Transfers )
toBeUploaded := make ( ObjectPairChan , Config . Transfers )
2014-03-28 18:56:04 +01:00
var checkerWg sync . WaitGroup
checkerWg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
2015-09-22 19:47:16 +02:00
go PairChecker ( toBeChecked , toBeUploaded , & checkerWg )
2014-03-28 18:56:04 +01:00
}
var copierWg sync . WaitGroup
copierWg . Add ( Config . Transfers )
for i := 0 ; i < Config . Transfers ; i ++ {
2015-08-24 22:42:23 +02:00
if DoMove {
2015-09-22 19:47:16 +02:00
go PairMover ( toBeUploaded , fdst , & copierWg )
2015-08-24 22:42:23 +02:00
} else {
2015-09-22 19:47:16 +02:00
go PairCopier ( toBeUploaded , fdst , & copierWg )
2015-08-24 22:42:23 +02:00
}
2014-03-28 18:56:04 +01:00
}
go func ( ) {
2016-01-12 14:33:03 +01:00
for src := range srcObjects {
2016-01-23 19:26:01 +01:00
remote := src . Remote ( )
if dst , dstFound := dstFiles [ remote ] ; dstFound {
toBeChecked <- ObjectPair { src , dst }
2014-03-28 18:56:04 +01:00
} else {
2016-01-23 19:26:01 +01:00
// No need to check since doesn't exist
toBeUploaded <- ObjectPair { src , nil }
2014-03-28 18:56:04 +01:00
}
}
2015-09-22 19:47:16 +02:00
close ( toBeChecked )
2014-03-28 18:56:04 +01:00
} ( )
Log ( fdst , "Waiting for checks to finish" )
checkerWg . Wait ( )
2015-09-22 19:47:16 +02:00
close ( toBeUploaded )
2014-03-28 18:56:04 +01:00
Log ( fdst , "Waiting for transfers to finish" )
copierWg . Wait ( )
2016-01-12 14:33:03 +01:00
// If deleting after, start deletion now
if Delete && Config . DeleteAfter {
close ( startDeletion )
}
// Unless we have already waited, wait for deletion to finish.
if Delete && ! Config . DeleteBefore {
Log ( fdst , "Waiting for deletes to finish (during+after)" )
delWg . Wait ( )
2014-04-18 17:34:59 +02:00
}
2016-01-12 14:33:03 +01:00
2014-03-28 18:56:04 +01:00
return nil
}
2015-09-22 19:47:16 +02:00
// Sync fsrc into fdst
2015-08-24 22:42:23 +02:00
func Sync ( fdst , fsrc Fs ) error {
return syncCopyMove ( fdst , fsrc , true , false )
}
2015-09-22 19:47:16 +02:00
// CopyDir copies fsrc into fdst
2015-08-24 22:42:23 +02:00
func CopyDir ( fdst , fsrc Fs ) error {
return syncCopyMove ( fdst , fsrc , false , false )
}
2015-09-22 19:47:16 +02:00
// MoveDir moves fsrc into fdst
2015-08-24 22:42:23 +02:00
func MoveDir ( fdst , fsrc Fs ) error {
2015-09-22 19:47:16 +02:00
if Same ( fdst , fsrc ) {
2015-09-01 21:50:28 +02:00
ErrorLog ( fdst , "Nothing to do as source and destination are the same" )
return nil
}
2016-02-25 21:05:34 +01:00
// First attempt to use DirMover if exists, same Fs and no filters are active
if fdstDirMover , ok := fdst . ( DirMover ) ; ok && fsrc . Name ( ) == fdst . Name ( ) && Config . Filter . InActive ( ) {
2015-08-24 22:42:23 +02:00
err := fdstDirMover . DirMove ( fsrc )
Debug ( fdst , "Using server side directory move" )
switch err {
case ErrorCantDirMove , ErrorDirExists :
Debug ( fdst , "Server side directory move failed - fallback to copy/delete: %v" , err )
case nil :
Debug ( fdst , "Server side directory move succeeded" )
return nil
default :
Stats . Error ( )
ErrorLog ( fdst , "Server side directory move failed: %v" , err )
return err
}
}
// Now move the files
err := syncCopyMove ( fdst , fsrc , false , true )
if err != nil || Stats . Errored ( ) {
ErrorLog ( fdst , "Not deleting files as there were IO errors" )
return err
}
2016-02-25 21:05:34 +01:00
// If no filters then purge
if Config . Filter . InActive ( ) {
return Purge ( fsrc )
}
// Otherwise remove any remaining files obeying filters
err = Delete ( fsrc )
if err != nil {
return err
}
// and try to remove the directory if empty - ignoring error
_ = TryRmdir ( fsrc )
return nil
2015-08-24 22:42:23 +02:00
}
2016-01-11 13:39:33 +01:00
// Check the files in fsrc and fdst according to Size and hash
2014-03-28 18:56:04 +01:00
func Check ( fdst , fsrc Fs ) error {
2016-01-17 11:08:28 +01:00
differences := int32 ( 0 )
2015-11-24 17:54:12 +01:00
var (
wg sync . WaitGroup
dstFiles , srcFiles map [ string ] Object
)
2014-03-28 18:56:04 +01:00
2015-11-24 17:54:12 +01:00
wg . Add ( 2 )
go func ( ) {
defer wg . Done ( )
// Read the destination files
Log ( fdst , "Building file list" )
2016-01-12 14:33:03 +01:00
dstFiles = readFilesMap ( fdst , false )
2015-11-24 17:54:12 +01:00
Debug ( fdst , "Done building file list" )
} ( )
2014-03-28 18:56:04 +01:00
2015-11-24 17:54:12 +01:00
go func ( ) {
defer wg . Done ( )
// Read the source files
Log ( fsrc , "Building file list" )
2016-01-12 14:33:03 +01:00
srcFiles = readFilesMap ( fsrc , false )
2015-11-24 17:54:12 +01:00
Debug ( fdst , "Done building file list" )
} ( )
wg . Wait ( )
// FIXME could do this as it goes along and make it use less
// memory.
2015-03-14 18:11:24 +01:00
// Move all the common files into commonFiles and delete then
// from srcFiles and dstFiles
2014-03-28 18:56:04 +01:00
commonFiles := make ( map [ string ] [ ] Object )
2015-03-14 18:11:24 +01:00
for remote , src := range srcFiles {
2014-03-28 18:56:04 +01:00
if dst , ok := dstFiles [ remote ] ; ok {
commonFiles [ remote ] = [ ] Object { dst , src }
2015-03-14 18:11:24 +01:00
delete ( srcFiles , remote )
2014-03-28 18:56:04 +01:00
delete ( dstFiles , remote )
}
}
Log ( fdst , "%d files not in %v" , len ( dstFiles ) , fsrc )
for _ , dst := range dstFiles {
Stats . Error ( )
2015-08-08 21:10:31 +02:00
ErrorLog ( dst , "File not in %v" , fsrc )
2016-01-17 11:08:28 +01:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 18:56:04 +01:00
}
Log ( fsrc , "%d files not in %s" , len ( srcFiles ) , fdst )
for _ , src := range srcFiles {
Stats . Error ( )
2015-08-08 21:10:31 +02:00
ErrorLog ( src , "File not in %v" , fdst )
2016-01-17 11:08:28 +01:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 18:56:04 +01:00
}
checks := make ( chan [ ] Object , Config . Transfers )
go func ( ) {
for _ , check := range commonFiles {
checks <- check
}
close ( checks )
} ( )
var checkerWg sync . WaitGroup
checkerWg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
go func ( ) {
defer checkerWg . Done ( )
for check := range checks {
dst , src := check [ 0 ] , check [ 1 ]
Stats . Checking ( src )
if src . Size ( ) != dst . Size ( ) {
Stats . DoneChecking ( src )
Stats . Error ( )
2015-08-08 21:10:31 +02:00
ErrorLog ( src , "Sizes differ" )
2016-01-17 11:08:28 +01:00
atomic . AddInt32 ( & differences , 1 )
2014-03-28 18:56:04 +01:00
continue
}
2016-01-11 13:39:33 +01:00
same , _ , err := CheckHashes ( src , dst )
2014-03-28 18:56:04 +01:00
Stats . DoneChecking ( src )
if err != nil {
continue
}
if ! same {
Stats . Error ( )
2016-01-17 11:08:28 +01:00
atomic . AddInt32 ( & differences , 1 )
2015-08-08 21:10:31 +02:00
ErrorLog ( src , "Md5sums differ" )
2014-03-28 18:56:04 +01:00
}
Debug ( src , "OK" )
}
} ( )
}
Log ( fdst , "Waiting for checks to finish" )
checkerWg . Wait ( )
Log ( fdst , "%d differences found" , Stats . GetErrors ( ) )
2016-01-17 11:08:28 +01:00
if differences > 0 {
return fmt . Errorf ( "%d differences found" , differences )
2014-03-28 18:56:04 +01:00
}
return nil
}
2015-09-22 19:47:16 +02:00
// ListFn lists the Fs to the supplied function
2014-03-28 18:56:04 +01:00
//
// Lists in parallel which may get them out of order
2014-07-12 13:09:20 +02:00
func ListFn ( f Fs , fn func ( Object ) ) error {
2014-03-28 18:56:04 +01:00
in := f . List ( )
var wg sync . WaitGroup
wg . Add ( Config . Checkers )
for i := 0 ; i < Config . Checkers ; i ++ {
go func ( ) {
defer wg . Done ( )
for o := range in {
2015-11-24 17:54:12 +01:00
if Config . Filter . IncludeObject ( o ) {
fn ( o )
}
2014-03-28 18:56:04 +01:00
}
} ( )
}
wg . Wait ( )
return nil
}
2015-02-28 16:30:40 +01:00
// mutex for synchronized output
var outMutex sync . Mutex
// Synchronized fmt.Fprintf
2015-09-22 08:31:12 +02:00
//
// Ignores errors from Fprintf
func syncFprintf ( w io . Writer , format string , a ... interface { } ) {
2015-02-28 16:30:40 +01:00
outMutex . Lock ( )
defer outMutex . Unlock ( )
2015-09-22 08:31:12 +02:00
_ , _ = fmt . Fprintf ( w , format , a ... )
2015-02-28 16:30:40 +01:00
}
2015-09-15 16:46:06 +02:00
// List the Fs to the supplied writer
2014-07-12 13:09:20 +02:00
//
2015-11-24 17:54:12 +01:00
// Shows size and path - obeys includes and excludes
2014-07-12 13:09:20 +02:00
//
// Lists in parallel which may get them out of order
2014-08-01 18:58:39 +02:00
func List ( f Fs , w io . Writer ) error {
2014-07-12 13:09:20 +02:00
return ListFn ( f , func ( o Object ) {
2015-02-28 16:30:40 +01:00
syncFprintf ( w , "%9d %s\n" , o . Size ( ) , o . Remote ( ) )
2014-07-12 13:09:20 +02:00
} )
}
2015-09-22 19:47:16 +02:00
// ListLong lists the Fs to the supplied writer
2014-07-12 13:09:20 +02:00
//
2015-11-24 17:54:12 +01:00
// Shows size, mod time and path - obeys includes and excludes
2014-07-12 13:09:20 +02:00
//
// Lists in parallel which may get them out of order
2014-08-01 18:58:39 +02:00
func ListLong ( f Fs , w io . Writer ) error {
2014-07-12 13:09:20 +02:00
return ListFn ( f , func ( o Object ) {
Stats . Checking ( o )
modTime := o . ModTime ( )
Stats . DoneChecking ( o )
2015-09-22 20:04:12 +02:00
syncFprintf ( w , "%9d %s %s\n" , o . Size ( ) , modTime . Local ( ) . Format ( "2006-01-02 15:04:05.000000000" ) , o . Remote ( ) )
2014-07-12 13:09:20 +02:00
} )
}
2015-09-22 19:47:16 +02:00
// Md5sum list the Fs to the supplied writer
2014-07-12 13:09:20 +02:00
//
2015-11-24 17:54:12 +01:00
// Produces the same output as the md5sum command - obeys includes and
// excludes
2014-07-12 13:09:20 +02:00
//
// Lists in parallel which may get them out of order
2014-08-01 18:58:39 +02:00
func Md5sum ( f Fs , w io . Writer ) error {
2016-01-11 13:39:33 +01:00
return hashLister ( HashMD5 , f , w )
}
// Sha1sum list the Fs to the supplied writer
//
// Obeys includes and excludes
//
// Lists in parallel which may get them out of order
func Sha1sum ( f Fs , w io . Writer ) error {
return hashLister ( HashSHA1 , f , w )
}
func hashLister ( ht HashType , f Fs , w io . Writer ) error {
2014-07-12 13:09:20 +02:00
return ListFn ( f , func ( o Object ) {
Stats . Checking ( o )
2016-01-11 13:39:33 +01:00
sum , err := o . Hash ( ht )
2014-07-12 13:09:20 +02:00
Stats . DoneChecking ( o )
2016-01-11 13:39:33 +01:00
if err == ErrHashUnsupported {
sum = "UNSUPPORTED"
} else if err != nil {
Debug ( o , "Failed to read %v: %v" , ht , err )
sum = "ERROR"
2014-07-12 13:09:20 +02:00
}
2016-01-17 14:56:00 +01:00
syncFprintf ( w , "%*s %s\n" , HashWidth [ ht ] , sum , o . Remote ( ) )
2014-07-12 13:09:20 +02:00
} )
}
2015-10-02 20:48:48 +02:00
// Count counts the objects and their sizes in the Fs
2015-11-24 17:54:12 +01:00
//
// Obeys includes and excludes
2015-10-02 20:48:48 +02:00
func Count ( f Fs ) ( objects int64 , size int64 , err error ) {
err = ListFn ( f , func ( o Object ) {
atomic . AddInt64 ( & objects , 1 )
atomic . AddInt64 ( & size , o . Size ( ) )
} )
return
}
2015-09-22 19:47:16 +02:00
// ListDir lists the directories/buckets/containers in the Fs to the supplied writer
2014-08-01 18:58:39 +02:00
func ListDir ( f Fs , w io . Writer ) error {
2014-03-28 18:56:04 +01:00
for dir := range f . ListDir ( ) {
2015-02-28 16:30:40 +01:00
syncFprintf ( w , "%12d %13s %9d %s\n" , dir . Bytes , dir . When . Format ( "2006-01-02 15:04:05" ) , dir . Count , dir . Name )
2014-03-28 18:56:04 +01:00
}
return nil
}
2015-09-22 19:47:16 +02:00
// Mkdir makes a destination directory or container
2014-03-28 18:56:04 +01:00
func Mkdir ( f Fs ) error {
2016-02-28 20:47:22 +01:00
if Config . DryRun {
Log ( f , "Not making directory as dry run is set" )
return nil
}
2014-03-28 18:56:04 +01:00
err := f . Mkdir ( )
if err != nil {
Stats . Error ( )
return err
}
return nil
}
2016-02-25 21:05:34 +01:00
// TryRmdir removes a container but not if not empty. It doesn't
// count errors but may return one.
func TryRmdir ( f Fs ) error {
2014-03-28 18:56:04 +01:00
if Config . DryRun {
Log ( f , "Not deleting as dry run is set" )
2016-02-25 21:05:34 +01:00
return nil
2014-03-28 18:56:04 +01:00
}
2016-02-25 21:05:34 +01:00
return f . Rmdir ( )
}
// Rmdir removes a container but not if not empty
func Rmdir ( f Fs ) error {
err := TryRmdir ( f )
if err != nil {
Stats . Error ( )
return err
}
return err
2014-03-28 18:56:04 +01:00
}
2015-09-22 19:47:16 +02:00
// Purge removes a container and all of its contents
2014-03-28 18:56:04 +01:00
//
// FIXME doesn't delete local directories
func Purge ( f Fs ) error {
2015-11-08 15:16:00 +01:00
doFallbackPurge := true
2014-07-25 19:19:49 +02:00
var err error
2014-03-28 18:56:04 +01:00
if purger , ok := f . ( Purger ) ; ok {
2015-11-08 15:16:00 +01:00
doFallbackPurge = false
2014-07-13 11:45:13 +02:00
if Config . DryRun {
2016-01-31 16:53:09 +01:00
Log ( f , "Not purging as --dry-run set" )
2014-07-13 11:45:13 +02:00
} else {
2014-07-25 19:19:49 +02:00
err = purger . Purge ( )
2015-11-08 15:16:00 +01:00
if err == ErrorCantPurge {
doFallbackPurge = true
}
2014-03-28 18:56:04 +01:00
}
2015-11-08 15:16:00 +01:00
}
if doFallbackPurge {
2014-07-25 19:19:49 +02:00
// DeleteFiles and Rmdir observe --dry-run
2014-03-28 18:56:04 +01:00
DeleteFiles ( f . List ( ) )
2014-07-25 19:19:49 +02:00
err = Rmdir ( f )
}
if err != nil {
Stats . Error ( )
return err
2014-03-28 18:56:04 +01:00
}
return nil
}
2015-12-02 23:25:32 +01:00
// Delete removes all the contents of a container. Unlike Purge, it
// obeys includes and excludes.
func Delete ( f Fs ) error {
wg := new ( sync . WaitGroup )
delete := make ( ObjectsChan , Config . Transfers )
wg . Add ( 1 )
go func ( ) {
defer wg . Done ( )
DeleteFiles ( delete )
} ( )
err := ListFn ( f , func ( o Object ) {
delete <- o
} )
close ( delete )
2016-02-15 17:43:59 +01:00
wg . Wait ( )
2015-12-02 23:25:32 +01:00
return err
}
2016-01-31 13:58:41 +01:00
2016-03-05 17:10:51 +01:00
// dedupeRename renames the objs slice to different names
func dedupeRename ( remote string , objs [ ] Object ) {
f := objs [ 0 ] . Fs ( )
2016-01-31 13:58:41 +01:00
mover , ok := f . ( Mover )
if ! ok {
2016-03-05 17:10:51 +01:00
log . Fatalf ( "Fs %v doesn't support Move" , f )
}
ext := path . Ext ( remote )
base := remote [ : len ( remote ) - len ( ext ) ]
for i , o := range objs {
newName := fmt . Sprintf ( "%s-%d%s" , base , i + 1 , ext )
if ! Config . DryRun {
newObj , err := mover . Move ( o , newName )
if err != nil {
Stats . Error ( )
ErrorLog ( o , "Failed to rename: %v" , err )
continue
}
Log ( newObj , "renamed from: %v" , o )
} else {
Log ( remote , "Not renaming to %q as --dry-run" , newName )
}
}
}
// dedupeDeleteAllButOne deletes all but the one in keep
func dedupeDeleteAllButOne ( keep int , remote string , objs [ ] Object ) {
for i , o := range objs {
if i == keep {
continue
}
DeleteFile ( o )
}
Log ( remote , "Deleted %d extra copies" , len ( objs ) - 1 )
}
// dedupeDeleteIdentical deletes all but one of identical (by hash) copies
func dedupeDeleteIdentical ( remote string , objs [ ] Object ) [ ] Object {
// See how many of these duplicates are identical
byHash := make ( map [ string ] [ ] Object , len ( objs ) )
for _ , o := range objs {
md5sum , err := o . Hash ( HashMD5 )
if err == nil {
byHash [ md5sum ] = append ( byHash [ md5sum ] , o )
}
2016-01-31 13:58:41 +01:00
}
2016-03-05 17:10:51 +01:00
// Delete identical duplicates, refilling obj with the ones remaining
objs = nil
for md5sum , hashObjs := range byHash {
if len ( hashObjs ) > 1 {
Log ( remote , "Deleting %d/%d identical duplicates (md5sum %q)" , len ( hashObjs ) - 1 , len ( hashObjs ) , md5sum )
for _ , o := range hashObjs [ 1 : ] {
DeleteFile ( o )
}
}
objs = append ( objs , hashObjs [ 0 ] )
}
return objs
}
// dedupeInteractive interactively dedupes the slice of objects
func dedupeInteractive ( remote string , objs [ ] Object ) {
fmt . Printf ( "%s: %d duplicates remain\n" , remote , len ( objs ) )
for i , o := range objs {
md5sum , err := o . Hash ( HashMD5 )
if err != nil {
md5sum = err . Error ( )
}
fmt . Printf ( " %d: %12d bytes, %s, md5sum %32s\n" , i + 1 , o . Size ( ) , o . ModTime ( ) . Format ( "2006-01-02 15:04:05.000000000" ) , md5sum )
}
switch Command ( [ ] string { "sSkip and do nothing" , "kKeep just one (choose which in next step)" , "rRename all to be different (by changing file.jpg to file-1.jpg)" } ) {
case 's' :
case 'k' :
keep := ChooseNumber ( "Enter the number of the file to keep" , 1 , len ( objs ) )
dedupeDeleteAllButOne ( keep - 1 , remote , objs )
case 'r' :
dedupeRename ( remote , objs )
}
}
type objectsSortedByModTime [ ] Object
func ( objs objectsSortedByModTime ) Len ( ) int { return len ( objs ) }
func ( objs objectsSortedByModTime ) Swap ( i , j int ) { objs [ i ] , objs [ j ] = objs [ j ] , objs [ i ] }
func ( objs objectsSortedByModTime ) Less ( i , j int ) bool {
return objs [ i ] . ModTime ( ) . Before ( objs [ j ] . ModTime ( ) )
}
// DeduplicateMode is how the dedupe command chooses what to do
type DeduplicateMode int
// Deduplicate modes
const (
DeduplicateInteractive DeduplicateMode = iota // interactively ask the user
DeduplicateSkip // skip all conflicts
DeduplicateFirst // choose the first object
DeduplicateNewest // choose the newest object
DeduplicateOldest // choose the oldest object
DeduplicateRename // rename the objects
)
func ( mode DeduplicateMode ) String ( ) string {
switch mode {
case DeduplicateInteractive :
return "interactive"
case DeduplicateSkip :
return "skip"
case DeduplicateFirst :
return "first"
case DeduplicateNewest :
return "newest"
case DeduplicateOldest :
return "oldest"
case DeduplicateRename :
return "rename"
}
return "unknown"
}
// Deduplicate interactively finds duplicate files and offers to
// delete all but one or rename them to be different. Only useful with
// Google Drive which can have duplicate file names.
func Deduplicate ( f Fs , mode DeduplicateMode ) error {
Log ( f , "Looking for duplicates using %v mode." , mode )
2016-01-31 13:58:41 +01:00
files := map [ string ] [ ] Object { }
for o := range f . List ( ) {
remote := o . Remote ( )
files [ remote ] = append ( files [ remote ] , o )
}
for remote , objs := range files {
if len ( objs ) > 1 {
2016-03-05 17:10:51 +01:00
Log ( remote , "Found %d duplicates - deleting identical copies" , len ( objs ) )
objs = dedupeDeleteIdentical ( remote , objs )
if len ( objs ) <= 1 {
Log ( remote , "All duplicates removed" )
continue
2016-01-31 13:58:41 +01:00
}
2016-03-05 17:10:51 +01:00
switch mode {
case DeduplicateInteractive :
dedupeInteractive ( remote , objs )
case DeduplicateFirst :
dedupeDeleteAllButOne ( 0 , remote , objs )
case DeduplicateNewest :
sort . Sort ( objectsSortedByModTime ( objs ) ) // sort oldest first
dedupeDeleteAllButOne ( len ( objs ) - 1 , remote , objs )
case DeduplicateOldest :
sort . Sort ( objectsSortedByModTime ( objs ) ) // sort oldest first
dedupeDeleteAllButOne ( 0 , remote , objs )
case DeduplicateRename :
dedupeRename ( remote , objs )
case DeduplicateSkip :
// skip
default :
//skip
2016-01-31 13:58:41 +01:00
}
}
}
return nil
}