From d033e9223485cff2fe431acee3c6f12e7f0df432 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Fri, 7 Oct 2016 11:39:39 +0100 Subject: [PATCH] Stop single file and `--files-from` operations iterating through the source bucket. This works by making sure directory listings that use a filter only iterate the files provided in the filter (if any). Single file copies now don't iterate the source or destination buckets. Note that this could potentially slow down very long `--files-from` lists - this is easy to fix (with another flag probably) if it causes anyone a problem. Fixes #610 Fixes #769 --- fs/filter.go | 19 +++++++++++++------ fs/filter_test.go | 6 +++--- fs/fs.go | 8 ++++---- fs/lister.go | 48 ++++++++++++++++++++++++++++++++++++++++++++--- fs/lister_test.go | 32 +++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 16 deletions(-) diff --git a/fs/filter.go b/fs/filter.go index 5866f1a29..e093cb879 100644 --- a/fs/filter.go +++ b/fs/filter.go @@ -94,8 +94,8 @@ func (rs *rules) len() int { return len(rs.rules) } -// filesMap describes the map of files to transfer -type filesMap map[string]struct{} +// FilesMap describes the map of files to transfer +type FilesMap map[string]struct{} // Filter describes any filtering in operation type Filter struct { @@ -106,8 +106,8 @@ type Filter struct { ModTimeTo time.Time fileRules rules dirRules rules - files filesMap // files if filesFrom - dirs filesMap // dirs from filesFrom + files FilesMap // files if filesFrom + dirs FilesMap // dirs from filesFrom } // We use time conventions @@ -313,8 +313,8 @@ func (f *Filter) AddRule(rule string) error { // AddFile adds a single file to the files from list func (f *Filter) AddFile(file string) error { if f.files == nil { - f.files = make(filesMap) - f.dirs = make(filesMap) + f.files = make(FilesMap) + f.dirs = make(FilesMap) } file = strings.Trim(file, "/") f.files[file] = struct{}{} @@ -332,6 +332,13 @@ func (f *Filter) AddFile(file string) error { return nil } +// Files returns all the files from the `--files-from` list +// +// It may be nil if the list is empty +func (f *Filter) Files() FilesMap { + return f.files +} + // Clear clears all the filter rules func (f *Filter) Clear() { f.fileRules.clear() diff --git a/fs/filter_test.go b/fs/filter_test.go index d49717d1f..e582a3ecf 100644 --- a/fs/filter_test.go +++ b/fs/filter_test.go @@ -180,11 +180,11 @@ func TestNewFilterIncludeFiles(t *testing.T) { require.NoError(t, err) err = f.AddFile("/file2.jpg") require.NoError(t, err) - assert.Equal(t, filesMap{ + assert.Equal(t, FilesMap{ "file1.jpg": {}, "file2.jpg": {}, }, f.files) - assert.Equal(t, filesMap{}, f.dirs) + assert.Equal(t, FilesMap{}, f.dirs) testInclude(t, f, []includeTest{ {"file1.jpg", 0, 0, true}, {"file2.jpg", 1, 0, true}, @@ -206,7 +206,7 @@ func TestNewFilterIncludeFilesDirs(t *testing.T) { err = f.AddFile(path) require.NoError(t, err) } - assert.Equal(t, filesMap{ + assert.Equal(t, FilesMap{ "path": {}, "path/to": {}, "path/to/dir": {}, diff --git a/fs/fs.go b/fs/fs.go index 049503c08..2b5fa055d 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -114,6 +114,10 @@ type ListFser interface { // Fses must support recursion levels of fs.MaxLevel and 1. // They may return ErrorLevelNotSupported otherwise. List(out ListOpts, dir string) + + // NewObject finds the Object at remote. If it can't be found + // it returns the error ErrorObjectNotFound. + NewObject(remote string) (Object, error) } // Fs is the interface a cloud storage system must provide @@ -121,10 +125,6 @@ type Fs interface { Info ListFser - // NewObject finds the Object at remote. If it can't be found - // it returns the error ErrorObjectNotFound. - NewObject(remote string) (Object, error) - // Put in to the remote path with the modTime given of the given size // // May create the object even if it returns an error - if so diff --git a/fs/lister.go b/fs/lister.go index 3361a75fd..c9daf5838 100644 --- a/fs/lister.go +++ b/fs/lister.go @@ -31,13 +31,55 @@ func NewLister() *Lister { return o.SetLevel(-1).SetBuffer(Config.Checkers) } +// Finds and lists the files passed in +// +// Note we ignore the dir and just return all the files in the list +func (o *Lister) listFiles(f ListFser, dir string, files FilesMap) { + buffer := o.Buffer() + jobs := make(chan string, buffer) + var wg sync.WaitGroup + + // Start some listing go routines so we find those name in parallel + wg.Add(buffer) + for i := 0; i < buffer; i++ { + go func() { + defer wg.Done() + for remote := range jobs { + obj, err := f.NewObject(remote) + if err == ErrorObjectNotFound { + // silently ignore files that aren't found in the files list + } else if err != nil { + o.SetError(err) + } else { + o.Add(obj) + } + } + }() + } + + // Pump the names in + for name := range files { + jobs <- name + if o.IsFinished() { + break + } + } + close(jobs) + wg.Wait() + + // Signal that this listing is over + o.Finished() +} + // Start starts a go routine listing the Fs passed in. It returns the // same Lister that was passed in for convenience. func (o *Lister) Start(f ListFser, dir string) *Lister { o.results = make(chan listerResult, o.buffer) - go func() { - f.List(o, dir) - }() + if o.filter != nil && o.filter.Files() != nil { + go o.listFiles(f, dir, o.filter.Files()) + } else { + go f.List(o, dir) + } return o } diff --git a/fs/lister_test.go b/fs/lister_test.go index aab4a6e07..7e5f2a881 100644 --- a/fs/lister_test.go +++ b/fs/lister_test.go @@ -2,6 +2,7 @@ package fs import ( "io" + "sort" "testing" "time" @@ -42,6 +43,10 @@ func (f *mockFs) List(o ListOpts, dir string) { f.listFn(o, dir) } +func (f *mockFs) NewObject(remote string) (Object, error) { + return mockObject(remote), nil +} + func TestListerStart(t *testing.T) { f := &mockFs{} ranList := false @@ -56,6 +61,33 @@ func TestListerStart(t *testing.T) { assert.Equal(t, true, ranList) } +func TestListerStartWithFiles(t *testing.T) { + f := &mockFs{} + ranList := false + f.listFn = func(o ListOpts, dir string) { + ranList = true + } + filter, err := NewFilter() + require.NoError(t, err) + wantNames := []string{"potato", "sausage", "rutabaga", "carrot", "lettuce"} + sort.Strings(wantNames) + for _, name := range wantNames { + err = filter.AddFile(name) + require.NoError(t, err) + } + o := NewLister().SetFilter(filter).Start(f, "") + objs, dirs, err := o.GetAll() + require.Nil(t, err) + assert.Len(t, dirs, 0) + assert.Equal(t, false, ranList) + var gotNames []string + for _, obj := range objs { + gotNames = append(gotNames, obj.Remote()) + } + sort.Strings(gotNames) + assert.Equal(t, wantNames, gotNames) +} + func TestListerSetLevel(t *testing.T) { o := NewLister() o.SetLevel(1)