1
mirror of https://github.com/rclone/rclone synced 2025-01-11 14:26:24 +01:00

chunker: improve detection of incompatible metadata #4917

Before this patch chunker required that there is at least one
data chunk to start checking for a composite object.

Now if chunker finds at least one potential temporary or control
chunk, it marks found files as a suspected composite object.
When later rclone tries a concrete operation on the object,
it performs postponed metadata read and decides: is this a native
composite object, incompatible metadata or just garbage.
This commit is contained in:
Ivan Andreev 2021-01-04 04:08:22 +03:00
parent 3877df4e62
commit 847625822f

View File

@ -121,6 +121,8 @@ const maxTransactionProbes = 100
// standard chunker errors // standard chunker errors
var ( var (
ErrChunkOverflow = errors.New("chunk number overflow") ErrChunkOverflow = errors.New("chunk number overflow")
ErrMetaTooBig = errors.New("metadata is too big")
ErrMetaUnknown = errors.New("unknown metadata, please upgrade rclone")
) )
// variants of baseMove's parameter delMode // variants of baseMove's parameter delMode
@ -693,43 +695,47 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP
switch entry := dirOrObject.(type) { switch entry := dirOrObject.(type) {
case fs.Object: case fs.Object:
remote := entry.Remote() remote := entry.Remote()
if mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(remote); mainRemote != "" { mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(remote)
if xactID != "" { if mainRemote == "" {
if revealHidden { // this is meta object or standalone file
fs.Infof(f, "ignore temporary chunk %q", remote) object := f.newObject("", entry, nil)
} byRemote[remote] = object
break tempEntries = append(tempEntries, object)
break
}
// this is some kind of chunk
// metobject should have been created above if present
isSpecial := xactID != "" || ctrlType != ""
mainObject := byRemote[mainRemote]
if mainObject == nil && f.useMeta && !isSpecial {
fs.Debugf(f, "skip orphan data chunk %q", remote)
break
}
if mainObject == nil && !f.useMeta {
// this is the "nometa" case
// create dummy chunked object without metadata
mainObject = f.newObject(mainRemote, nil, nil)
byRemote[mainRemote] = mainObject
if !badEntry[mainRemote] {
tempEntries = append(tempEntries, mainObject)
} }
if ctrlType != "" { }
if revealHidden { if isSpecial {
fs.Infof(f, "ignore control chunk %q", remote) if revealHidden {
} fs.Infof(f, "ignore non-data chunk %q", remote)
break
} }
mainObject := byRemote[mainRemote] // need to read metadata to ensure actual object type
if mainObject == nil && f.useMeta { if f.useMeta && mainObject != nil && mainObject.size <= maxMetadataSize {
fs.Debugf(f, "skip chunk %q without meta object", remote) mainObject.unsure = true
break
}
if mainObject == nil {
// useMeta is false - create chunked object without metadata
mainObject = f.newObject(mainRemote, nil, nil)
byRemote[mainRemote] = mainObject
if !badEntry[mainRemote] {
tempEntries = append(tempEntries, mainObject)
}
}
if err := mainObject.addChunk(entry, chunkNo); err != nil {
if f.opt.FailHard {
return nil, err
}
badEntry[mainRemote] = true
} }
break break
} }
object := f.newObject("", entry, nil) if err := mainObject.addChunk(entry, chunkNo); err != nil {
byRemote[remote] = object if f.opt.FailHard {
tempEntries = append(tempEntries, object) return nil, err
}
badEntry[mainRemote] = true
}
case fs.Directory: case fs.Directory:
isSubdir[entry.Remote()] = true isSubdir[entry.Remote()] = true
wrapDir := fs.NewDirCopy(ctx, entry) wrapDir := fs.NewDirCopy(ctx, entry)
@ -784,6 +790,13 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP
// but opening even a small file can be slow on some backends. // but opening even a small file can be slow on some backends.
// //
func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
return f.scanObject(ctx, remote, false)
}
// scanObject is like NewObject with optional quick scan mode.
// The quick mode avoids directory requests other than `List`,
// ignores non-chunked objects and skips chunk size checks.
func (f *Fs) scanObject(ctx context.Context, remote string, quickScan bool) (fs.Object, error) {
if err := f.forbidChunk(false, remote); err != nil { if err := f.forbidChunk(false, remote); err != nil {
return nil, errors.Wrap(err, "can't access") return nil, errors.Wrap(err, "can't access")
} }
@ -844,8 +857,15 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
continue // bypass regexp to save cpu continue // bypass regexp to save cpu
} }
mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote) mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote)
if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactID != "" { if mainRemote == "" || mainRemote != remote {
continue // skip non-conforming, temporary and control chunks continue // skip non-conforming chunks
}
if ctrlType != "" || xactID != "" {
if f.useMeta {
// temporary/control chunk calls for lazy metadata read
o.unsure = true
}
continue
} }
//fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo) //fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo)
if err := o.addChunk(entry, chunkNo); err != nil { if err := o.addChunk(entry, chunkNo); err != nil {
@ -855,7 +875,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
if o.main == nil && (o.chunks == nil || len(o.chunks) == 0) { if o.main == nil && (o.chunks == nil || len(o.chunks) == 0) {
// Scanning hasn't found data chunks with conforming names. // Scanning hasn't found data chunks with conforming names.
if f.useMeta { if f.useMeta || quickScan {
// Metadata is required but absent and there are no chunks. // Metadata is required but absent and there are no chunks.
return nil, fs.ErrorObjectNotFound return nil, fs.ErrorObjectNotFound
} }
@ -878,8 +898,10 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
// file without metadata. Validate it and update the total data size. // file without metadata. Validate it and update the total data size.
// As an optimization, skip metadata reading here - we will call // As an optimization, skip metadata reading here - we will call
// readMetadata lazily when needed (reading can be expensive). // readMetadata lazily when needed (reading can be expensive).
if err := o.validate(); err != nil { if !quickScan {
return nil, err if err := o.validate(); err != nil {
return nil, err
}
} }
return o, nil return o, nil
} }
@ -888,13 +910,24 @@ func (o *Object) readMetadata(ctx context.Context) error {
if o.isFull { if o.isFull {
return nil return nil
} }
if !o.isComposite() || !o.f.useMeta { if !o.f.useMeta || (!o.isComposite() && !o.unsure) {
o.isFull = true o.isFull = true
return nil return nil
} }
// validate metadata // validate metadata
metaObject := o.main metaObject := o.main
if metaObject.Size() > maxMetadataSize {
if o.unsure {
// this is not metadata but a foreign object
o.unsure = false
o.chunks = nil // make isComposite return false
o.isFull = true // cache results
return nil
}
return ErrMetaTooBig
}
reader, err := metaObject.Open(ctx) reader, err := metaObject.Open(ctx)
if err != nil { if err != nil {
return err return err
@ -907,8 +940,22 @@ func (o *Object) readMetadata(ctx context.Context) error {
switch o.f.opt.MetaFormat { switch o.f.opt.MetaFormat {
case "simplejson": case "simplejson":
metaInfo, err := unmarshalSimpleJSON(ctx, metaObject, metadata, true) metaInfo, madeByChunker, err := unmarshalSimpleJSON(ctx, metaObject, metadata)
if err != nil { if o.unsure {
o.unsure = false
if !madeByChunker {
// this is not metadata but a foreign object
o.chunks = nil // make isComposite return false
o.isFull = true // cache results
return nil
}
}
switch err {
case nil:
// fall thru
case ErrMetaTooBig, ErrMetaUnknown:
return err // return these errors unwrapped for unit tests
default:
return errors.Wrap(err, "invalid metadata") return errors.Wrap(err, "invalid metadata")
} }
if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks { if o.size != metaInfo.Size() || len(o.chunks) != metaInfo.nChunks {
@ -923,7 +970,27 @@ func (o *Object) readMetadata(ctx context.Context) error {
} }
// put implements Put, PutStream, PutUnchecked, Update // put implements Put, PutStream, PutUnchecked, Update
func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote string, options []fs.OpenOption, basePut putFn) (obj fs.Object, err error) { func (f *Fs) put(
ctx context.Context, in io.Reader, src fs.ObjectInfo, remote string, options []fs.OpenOption,
basePut putFn, action string, target fs.Object) (obj fs.Object, err error) {
if err := f.forbidChunk(src, remote); err != nil {
return nil, errors.Wrap(err, action+" refused")
}
if target == nil {
// Get target object with a quick directory scan
if obj, err := f.scanObject(ctx, remote, true); err == nil {
target = obj
}
}
if target != nil {
obj := target.(*Object)
if err := obj.readMetadata(ctx); err == ErrMetaUnknown {
// refuse to update a file of unsupported format
return nil, errors.Wrap(err, "refusing to "+action)
}
}
c := f.newChunkingReader(src) c := f.newChunkingReader(src)
wrapIn := c.wrapStream(ctx, in, src) wrapIn := c.wrapStream(ctx, in, src)
@ -1013,8 +1080,8 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st
// Check for input that looks like valid metadata // Check for input that looks like valid metadata
needMeta := len(c.chunks) > 1 needMeta := len(c.chunks) > 1
if c.readCount <= maxMetadataSize && len(c.chunks) == 1 { if c.readCount <= maxMetadataSize && len(c.chunks) == 1 {
_, err := unmarshalSimpleJSON(ctx, c.chunks[0], c.smallHead, false) _, madeByChunker, _ := unmarshalSimpleJSON(ctx, c.chunks[0], c.smallHead)
needMeta = err == nil needMeta = madeByChunker
} }
// Finalize small object as non-chunked. // Finalize small object as non-chunked.
@ -1273,29 +1340,16 @@ func (f *Fs) removeOldChunks(ctx context.Context, remote string) {
// will return the object and the error, otherwise will return // will return the object and the error, otherwise will return
// nil and the error // nil and the error
func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
if err := f.forbidChunk(src, src.Remote()); err != nil { return f.put(ctx, in, src, src.Remote(), options, f.base.Put, "put", nil)
return nil, errors.Wrap(err, "refusing to put")
}
return f.put(ctx, in, src, src.Remote(), options, f.base.Put)
} }
// PutStream uploads to the remote path with the modTime given of indeterminate size // PutStream uploads to the remote path with the modTime given of indeterminate size
func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
if err := f.forbidChunk(src, src.Remote()); err != nil { return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream, "upload", nil)
return nil, errors.Wrap(err, "refusing to upload")
}
return f.put(ctx, in, src, src.Remote(), options, f.base.Features().PutStream)
} }
// Update in to the object with the modTime given of the given size // Update in to the object with the modTime given of the given size
func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
if err := o.f.forbidChunk(o, o.Remote()); err != nil {
return errors.Wrap(err, "update refused")
}
if err := o.readMetadata(ctx); err != nil {
// refuse to update a file of unsupported format
return errors.Wrap(err, "refusing to update")
}
basePut := o.f.base.Put basePut := o.f.base.Put
if src.Size() < 0 { if src.Size() < 0 {
basePut = o.f.base.Features().PutStream basePut = o.f.base.Features().PutStream
@ -1303,7 +1357,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
return errors.New("wrapped file system does not support streaming uploads") return errors.New("wrapped file system does not support streaming uploads")
} }
} }
oNew, err := o.f.put(ctx, in, src, o.Remote(), options, basePut) oNew, err := o.f.put(ctx, in, src, o.Remote(), options, basePut, "update", o)
if err == nil { if err == nil {
*o = *oNew.(*Object) *o = *oNew.(*Object)
} }
@ -1417,7 +1471,7 @@ func (o *Object) Remove(ctx context.Context) (err error) {
// to corrupt file in hard mode. Hence, refuse to Remove, too. // to corrupt file in hard mode. Hence, refuse to Remove, too.
return errors.Wrap(err, "refuse to corrupt") return errors.Wrap(err, "refuse to corrupt")
} }
if err := o.readMetadata(ctx); err != nil { if err := o.readMetadata(ctx); err == ErrMetaUnknown {
// Proceed but warn user that unexpected things can happen. // Proceed but warn user that unexpected things can happen.
fs.Errorf(o, "Removing a file with unsupported metadata: %v", err) fs.Errorf(o, "Removing a file with unsupported metadata: %v", err)
} }
@ -1445,6 +1499,11 @@ func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMo
if err := f.forbidChunk(o, remote); err != nil { if err := f.forbidChunk(o, remote); err != nil {
return nil, errors.Wrapf(err, "can't %s", opName) return nil, errors.Wrapf(err, "can't %s", opName)
} }
if err := o.readMetadata(ctx); err != nil {
// Refuse to copy/move composite files with invalid or future
// metadata format which might involve unsupported chunk types.
return nil, errors.Wrapf(err, "can't %s this file", opName)
}
if !o.isComposite() { if !o.isComposite() {
fs.Debugf(o, "%s non-chunked object...", opName) fs.Debugf(o, "%s non-chunked object...", opName)
oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk oResult, err := do(ctx, o.mainChunk(), remote) // chain operation to a single wrapped chunk
@ -1453,11 +1512,6 @@ func (f *Fs) copyOrMove(ctx context.Context, o *Object, remote string, do copyMo
} }
return f.newObject("", oResult, nil), nil return f.newObject("", oResult, nil), nil
} }
if err := o.readMetadata(ctx); err != nil {
// Refuse to copy/move composite files with invalid or future
// metadata format which might involve unsupported chunk types.
return nil, errors.Wrapf(err, "can't %s this file", opName)
}
fs.Debugf(o, "%s %d data chunks...", opName, len(o.chunks)) fs.Debugf(o, "%s %d data chunks...", opName, len(o.chunks))
mainRemote := o.remote mainRemote := o.remote
@ -1548,6 +1602,10 @@ func (f *Fs) okForServerSide(ctx context.Context, src fs.Object, opName string)
return return
} }
if obj.unsure {
// ensure object is composite if need to re-read metadata
_ = obj.readMetadata(ctx)
}
requireMetaHash := obj.isComposite() && f.opt.MetaFormat == "simplejson" requireMetaHash := obj.isComposite() && f.opt.MetaFormat == "simplejson"
if !requireMetaHash && !f.hashAll { if !requireMetaHash && !f.hashAll {
ok = true // hash is not required for metadata ok = true // hash is not required for metadata
@ -1741,6 +1799,7 @@ type Object struct {
chunks []fs.Object // active data chunks if file is composite, or wrapped file as a single chunk if meta format is 'none' chunks []fs.Object // active data chunks if file is composite, or wrapped file as a single chunk if meta format is 'none'
size int64 // cached total size of chunks in a composite file or -1 for non-chunked files size int64 // cached total size of chunks in a composite file or -1 for non-chunked files
isFull bool // true if metadata has been read isFull bool // true if metadata has been read
unsure bool // true if need to read metadata to detect object type
md5 string md5 string
sha1 string sha1 string
f *Fs f *Fs
@ -1891,15 +1950,16 @@ func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error {
// on the level of wrapped remote but chunker is unaware of that. // on the level of wrapped remote but chunker is unaware of that.
// //
func (o *Object) Hash(ctx context.Context, hashType hash.Type) (string, error) { func (o *Object) Hash(ctx context.Context, hashType hash.Type) (string, error) {
if err := o.readMetadata(ctx); err != nil {
return "", err // valid metadata is required to get hash, abort
}
if !o.isComposite() { if !o.isComposite() {
// First, chain to the wrapped non-chunked file if possible. // First, chain to the wrapped non-chunked file if possible.
if value, err := o.mainChunk().Hash(ctx, hashType); err == nil && value != "" { if value, err := o.mainChunk().Hash(ctx, hashType); err == nil && value != "" {
return value, nil return value, nil
} }
} }
if err := o.readMetadata(ctx); err != nil {
return "", err // valid metadata is required to get hash, abort
}
// Try hash from metadata if the file is composite or if wrapped remote fails. // Try hash from metadata if the file is composite or if wrapped remote fails.
switch hashType { switch hashType {
case hash.MD5: case hash.MD5:
@ -1924,13 +1984,13 @@ func (o *Object) UnWrap() fs.Object {
// Open opens the file for read. Call Close() on the returned io.ReadCloser // Open opens the file for read. Call Close() on the returned io.ReadCloser
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) { func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (rc io.ReadCloser, err error) {
if !o.isComposite() {
return o.mainChunk().Open(ctx, options...) // chain to wrapped non-chunked file
}
if err := o.readMetadata(ctx); err != nil { if err := o.readMetadata(ctx); err != nil {
// refuse to open unsupported format // refuse to open unsupported format
return nil, errors.Wrap(err, "can't open") return nil, errors.Wrap(err, "can't open")
} }
if !o.isComposite() {
return o.mainChunk().Open(ctx, options...) // chain to wrapped non-chunked file
}
var openOptions []fs.OpenOption var openOptions []fs.OpenOption
var offset, limit int64 = 0, -1 var offset, limit int64 = 0, -1
@ -2203,57 +2263,57 @@ func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 s
// handled by current implementation. // handled by current implementation.
// The version check below will then explicitly ask user to upgrade rclone. // The version check below will then explicitly ask user to upgrade rclone.
// //
func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte, strictChecks bool) (info *ObjectInfo, err error) { func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte) (info *ObjectInfo, madeByChunker bool, err error) {
// Be strict about JSON format // Be strict about JSON format
// to reduce possibility that a random small file resembles metadata. // to reduce possibility that a random small file resembles metadata.
if data != nil && len(data) > maxMetadataSize { if data != nil && len(data) > maxMetadataSize {
return nil, errors.New("too big") return nil, false, ErrMetaTooBig
} }
if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' { if data == nil || len(data) < 2 || data[0] != '{' || data[len(data)-1] != '}' {
return nil, errors.New("invalid json") return nil, false, errors.New("invalid json")
} }
var metadata metaSimpleJSON var metadata metaSimpleJSON
err = json.Unmarshal(data, &metadata) err = json.Unmarshal(data, &metadata)
if err != nil { if err != nil {
return nil, err return nil, false, err
} }
// Basic fields are strictly required // Basic fields are strictly required
// to reduce possibility that a random small file resembles metadata. // to reduce possibility that a random small file resembles metadata.
if metadata.Version == nil || metadata.Size == nil || metadata.ChunkNum == nil { if metadata.Version == nil || metadata.Size == nil || metadata.ChunkNum == nil {
return nil, errors.New("missing required field") return nil, false, errors.New("missing required field")
} }
// Perform strict checks, avoid corruption of future metadata formats. // Perform strict checks, avoid corruption of future metadata formats.
if *metadata.Version < 1 { if *metadata.Version < 1 {
return nil, errors.New("wrong version") return nil, false, errors.New("wrong version")
} }
if *metadata.Size < 0 { if *metadata.Size < 0 {
return nil, errors.New("negative file size") return nil, false, errors.New("negative file size")
} }
if *metadata.ChunkNum < 0 { if *metadata.ChunkNum < 0 {
return nil, errors.New("negative number of chunks") return nil, false, errors.New("negative number of chunks")
} }
if *metadata.ChunkNum > maxSafeChunkNumber { if *metadata.ChunkNum > maxSafeChunkNumber {
return nil, ErrChunkOverflow return nil, true, ErrChunkOverflow // produced by incompatible version of rclone
} }
if metadata.MD5 != "" { if metadata.MD5 != "" {
_, err = hex.DecodeString(metadata.MD5) _, err = hex.DecodeString(metadata.MD5)
if len(metadata.MD5) != 32 || err != nil { if len(metadata.MD5) != 32 || err != nil {
return nil, errors.New("wrong md5 hash") return nil, false, errors.New("wrong md5 hash")
} }
} }
if metadata.SHA1 != "" { if metadata.SHA1 != "" {
_, err = hex.DecodeString(metadata.SHA1) _, err = hex.DecodeString(metadata.SHA1)
if len(metadata.SHA1) != 40 || err != nil { if len(metadata.SHA1) != 40 || err != nil {
return nil, errors.New("wrong sha1 hash") return nil, false, errors.New("wrong sha1 hash")
} }
} }
// ChunkNum is allowed to be 0 in future versions // ChunkNum is allowed to be 0 in future versions
if *metadata.ChunkNum < 1 && *metadata.Version <= metadataVersion { if *metadata.ChunkNum < 1 && *metadata.Version <= metadataVersion {
return nil, errors.New("wrong number of chunks") return nil, false, errors.New("wrong number of chunks")
} }
// Non-strict mode also accepts future metadata versions // Non-strict mode also accepts future metadata versions
if *metadata.Version > metadataVersion && strictChecks { if *metadata.Version > metadataVersion {
return nil, fmt.Errorf("version %d is not supported, please upgrade rclone", metadata.Version) return nil, true, ErrMetaUnknown // produced by incompatible version of rclone
} }
var nilFs *Fs // nil object triggers appropriate type method var nilFs *Fs // nil object triggers appropriate type method
@ -2261,7 +2321,7 @@ func unmarshalSimpleJSON(ctx context.Context, metaObject fs.Object, data []byte,
info.nChunks = *metadata.ChunkNum info.nChunks = *metadata.ChunkNum
info.md5 = metadata.MD5 info.md5 = metadata.MD5
info.sha1 = metadata.SHA1 info.sha1 = metadata.SHA1
return info, nil return info, true, nil
} }
func silentlyRemove(ctx context.Context, o fs.Object) { func silentlyRemove(ctx context.Context, o fs.Object) {