package hasher import ( "context" "errors" "fmt" "io" "path" "time" "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/fs/operations" ) // obtain hash for an object func (o *Object) getHash(ctx context.Context, hashType hash.Type) (string, error) { maxAge := time.Duration(o.f.opt.MaxAge) if maxAge <= 0 { return "", nil } fp := o.fingerprint(ctx) if fp == "" { return "", errors.New("fingerprint failed") } return o.f.getRawHash(ctx, hashType, o.Remote(), fp, maxAge) } // obtain hash for a path func (f *Fs) getRawHash(ctx context.Context, hashType hash.Type, remote, fp string, age time.Duration) (string, error) { key := path.Join(f.Fs.Root(), remote) op := &kvGet{ key: key, fp: fp, hash: hashType.String(), age: age, } err := f.db.Do(false, op) return op.val, err } // put new hashes for an object func (o *Object) putHashes(ctx context.Context, rawHashes hashMap) error { if o.f.opt.MaxAge <= 0 { return nil } fp := o.fingerprint(ctx) if fp == "" { return nil } key := path.Join(o.f.Fs.Root(), o.Remote()) hashes := operations.HashSums{} for hashType, hashVal := range rawHashes { hashes[hashType.String()] = hashVal } return o.f.putRawHashes(ctx, key, fp, hashes) } // set hashes for a path without any validation func (f *Fs) putRawHashes(ctx context.Context, key, fp string, hashes operations.HashSums) error { return f.db.Do(true, &kvPut{ key: key, fp: fp, hashes: hashes, age: time.Duration(f.opt.MaxAge), }) } // Hash returns the selected checksum of the file or "" if unavailable. func (o *Object) Hash(ctx context.Context, hashType hash.Type) (hashVal string, err error) { f := o.f if f.passHashes.Contains(hashType) { fs.Debugf(o, "pass %s", hashType) hashVal, err = o.Object.Hash(ctx, hashType) if hashVal != "" { return hashVal, err } if err != nil { fs.Debugf(o, "error passing %s: %v", hashType, err) } fs.Debugf(o, "passed %s is blank -- trying other methods", hashType) } if !f.suppHashes.Contains(hashType) { fs.Debugf(o, "unsupp %s", hashType) return "", hash.ErrUnsupported } if hashVal, err = o.getHash(ctx, hashType); err != nil { fs.Debugf(o, "getHash: %v", err) err = nil hashVal = "" } if hashVal != "" { fs.Debugf(o, "cached %s = %q", hashType, hashVal) return hashVal, nil } if f.slowHashes.Contains(hashType) { fs.Debugf(o, "slow %s", hashType) hashVal, err = o.Object.Hash(ctx, hashType) if err == nil && hashVal != "" && f.keepHashes.Contains(hashType) { if err = o.putHashes(ctx, hashMap{hashType: hashVal}); err != nil { fs.Debugf(o, "putHashes: %v", err) err = nil } } return hashVal, err } if f.autoHashes.Contains(hashType) && o.Size() < int64(f.opt.AutoSize) { _ = o.updateHashes(ctx) if hashVal, err = o.getHash(ctx, hashType); err != nil { fs.Debugf(o, "auto %s = %q (%v)", hashType, hashVal, err) err = nil } } return hashVal, err } // updateHashes performs implicit "rclone hashsum --download" and updates cache. func (o *Object) updateHashes(ctx context.Context) error { r, err := o.Open(ctx) if err != nil { fs.Infof(o, "update failed (open): %v", err) return err } defer func() { _ = r.Close() }() if _, err = io.Copy(io.Discard, r); err != nil { fs.Infof(o, "update failed (copy): %v", err) return err } return nil } // Update the object with the given data, time and size. func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { _ = o.f.pruneHash(src.Remote()) return o.Object.Update(ctx, in, src, options...) } // Remove an object. func (o *Object) Remove(ctx context.Context) error { _ = o.f.pruneHash(o.Remote()) return o.Object.Remove(ctx) } // SetModTime sets the modification time of the file. // Also prunes the cache entry when modtime changes so that // touching a file will trigger checksum recalculation even // on backends that don't provide modTime with fingerprint. func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error { if mtime != o.Object.ModTime(ctx) { _ = o.f.pruneHash(o.Remote()) } return o.Object.SetModTime(ctx, mtime) } // Open opens the file for read. // Full reads will also update object hashes. func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (r io.ReadCloser, err error) { size := o.Size() var offset, limit int64 = 0, -1 for _, option := range options { switch opt := option.(type) { case *fs.SeekOption: offset = opt.Offset case *fs.RangeOption: offset, limit = opt.Decode(size) } } if offset < 0 { return nil, errors.New("invalid offset") } if limit < 0 { limit = size - offset } if r, err = o.Object.Open(ctx, options...); err != nil { return nil, err } if offset != 0 || limit < size { // It's a partial read return r, err } return o.f.newHashingReader(ctx, r, func(sums hashMap) { if err := o.putHashes(ctx, sums); err != nil { fs.Infof(o, "auto hashing error: %v", err) } }) } // Put data into the remote path with given modTime and size func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { var ( o fs.Object common hash.Set rehash bool hashes hashMap ) if fsrc := src.Fs(); fsrc != nil { common = fsrc.Hashes().Overlap(f.keepHashes) // Rehash if source does not have all required hashes or hashing is slow rehash = fsrc.Features().SlowHash || common != f.keepHashes } wrapIn := in if rehash { r, err := f.newHashingReader(ctx, in, func(sums hashMap) { hashes = sums }) fs.Debugf(src, "Rehash in-fly due to incomplete or slow source set %v (err: %v)", common, err) if err == nil { wrapIn = r } else { rehash = false } } _ = f.pruneHash(src.Remote()) oResult, err := f.Fs.Put(ctx, wrapIn, src, options...) o, err = f.wrapObject(oResult, err) if err != nil { return nil, err } if !rehash { hashes = hashMap{} for _, ht := range common.Array() { if h, e := src.Hash(ctx, ht); e == nil && h != "" { hashes[ht] = h } } } if len(hashes) > 0 { err := o.(*Object).putHashes(ctx, hashes) fs.Debugf(o, "Applied %d source hashes, err: %v", len(hashes), err) } return o, err } type hashingReader struct { rd io.Reader hasher *hash.MultiHasher fun func(hashMap) } func (f *Fs) newHashingReader(ctx context.Context, rd io.Reader, fun func(hashMap)) (*hashingReader, error) { hasher, err := hash.NewMultiHasherTypes(f.keepHashes) if err != nil { return nil, err } hr := &hashingReader{ rd: rd, hasher: hasher, fun: fun, } return hr, nil } func (r *hashingReader) Read(p []byte) (n int, err error) { n, err = r.rd.Read(p) if err != nil && err != io.EOF { r.hasher = nil } if r.hasher != nil { if _, errHash := r.hasher.Write(p[:n]); errHash != nil { r.hasher = nil err = errHash } } if err == io.EOF && r.hasher != nil { r.fun(r.hasher.Sums()) r.hasher = nil } return } func (r *hashingReader) Close() error { if rc, ok := r.rd.(io.ReadCloser); ok { return rc.Close() } return nil } // Return object fingerprint or empty string in case of errors // // Note that we can't use the generic `fs.Fingerprint` here because // this fingerprint is used to pick _derived hashes_ that are slow // to calculate or completely unsupported by the base remote. // // The hasher fingerprint must be based on `fsHash`, the first _fast_ // hash supported _by the underlying remote_ (if there is one), // while `fs.Fingerprint` would select a hash _produced by hasher_ // creating unresolvable fingerprint loop. func (o *Object) fingerprint(ctx context.Context) string { size := o.Object.Size() timeStr := "-" if o.f.fpTime { timeStr = o.Object.ModTime(ctx).UTC().Format(timeFormat) if timeStr == "" { return "" } } hashStr := "-" if o.f.fpHash != hash.None { var err error hashStr, err = o.Object.Hash(ctx, o.f.fpHash) if hashStr == "" || err != nil { return "" } } return fmt.Sprintf("%d,%s,%s", size, timeStr, hashStr) }