From 41ba1bba2bf53aa1385beeb50c6afdd0bd7e5aa4 Mon Sep 17 00:00:00 2001 From: Ivan Andreev Date: Wed, 4 Dec 2019 13:43:58 +0300 Subject: [PATCH] chunker: reduce length of temporary suffix --- backend/chunker/chunker.go | 239 ++++++++++------ backend/chunker/chunker_internal_test.go | 332 ++++++++++++++--------- docs/content/chunker.md | 19 +- 3 files changed, 384 insertions(+), 206 deletions(-) diff --git a/backend/chunker/chunker.go b/backend/chunker/chunker.go index ca7305201..42c9cf5b4 100644 --- a/backend/chunker/chunker.go +++ b/backend/chunker/chunker.go @@ -12,11 +12,13 @@ import ( gohash "hash" "io" "io/ioutil" + "math/rand" "path" "regexp" "sort" "strconv" "strings" + "sync" "time" "github.com/pkg/errors" @@ -34,46 +36,57 @@ import ( // and optional metadata object. If it's present, // meta object is named after the original file. // +// The only supported metadata format is simplejson atm. +// It supports only per-file meta objects that are rudimentary, +// used mostly for consistency checks (lazily for performance reasons). +// Other formats can be developed that use an external meta store +// free of these limitations, but this needs some support from +// rclone core (eg. metadata store interfaces). +// // The following types of chunks are supported: // data and control, active and temporary. // Chunk type is identified by matching chunk file name // based on the chunk name format configured by user. // -// Both data and control chunks can be either temporary or -// active (non-temporary). +// Both data and control chunks can be either temporary (aka hidden) +// or active (non-temporary aka normal aka permanent). // An operation creates temporary chunks while it runs. -// By completion it removes temporary and leaves active -// (aka normal aka permanent) chunks. +// By completion it removes temporary and leaves active chunks. // -// Temporary (aka hidden) chunks have a special hardcoded suffix -// in addition to the configured name pattern. The suffix comes last -// to prevent name collisions with non-temporary chunks. -// Temporary suffix includes so called transaction number usually -// abbreviated as `xactNo` below, a generic non-negative integer +// Temporary chunks have a special hardcoded suffix in addition +// to the configured name pattern. +// Temporary suffix includes so called transaction identifier +// (abbreviated as `xactID` below), a generic non-negative base-36 "number" // used by parallel operations to share a composite object. +// Chunker also accepts the longer decimal temporary suffix (obsolete), +// which is transparently converted to the new format. In its maximum +// length of 13 decimals it makes a 7-digit base-36 number. // // Chunker can tell data chunks from control chunks by the characters // located in the "hash placeholder" position of configured format. // Data chunks have decimal digits there. -// Control chunks have a short lowercase literal prepended by underscore -// in that position. +// Control chunks have in that position a short lowercase alphanumeric +// string (starting with a letter) prepended by underscore. // // Metadata format v1 does not define any control chunk types, // they are currently ignored aka reserved. // In future they can be used to implement resumable uploads etc. // const ( - ctrlTypeRegStr = `[a-z]{3,9}` - tempChunkFormat = `%s..tmp_%010d` - tempChunkRegStr = `\.\.tmp_([0-9]{10,19})` + ctrlTypeRegStr = `[a-z][a-z0-9]{2,6}` + tempSuffixFormat = `_%04s` + tempSuffixRegStr = `_([0-9a-z]{4,9})` + tempSuffixRegOld = `\.\.tmp_([0-9]{10,13})` ) var ( - ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`) + // regular expressions to validate control type and temporary suffix + ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`) + tempSuffixRegexp = regexp.MustCompile(`^` + tempSuffixRegStr + `$`) ) // Normally metadata is a small piece of JSON (about 100-300 bytes). -// The size of valid metadata size must never exceed this limit. +// The size of valid metadata must never exceed this limit. // Current maximum provides a reasonable room for future extensions. // // Please refrain from increasing it, this can cause old rclone versions @@ -101,6 +114,9 @@ const revealHidden = false // Prevent memory overflow due to specially crafted chunk name const maxSafeChunkNumber = 10000000 +// Number of attempts to find unique transaction identifier +const maxTransactionProbes = 100 + // standard chunker errors var ( ErrChunkOverflow = errors.New("chunk number overflow") @@ -113,13 +129,6 @@ const ( delFailed = 2 // move, then delete and try again if failed ) -// Note: metadata logic is tightly coupled with chunker code in many -// places, eg. in checks whether a file should have meta object or is -// eligible for chunking. -// If more metadata formats (or versions of a format) are added in future, -// it may be advisable to factor it into a "metadata strategy" interface -// similar to chunkingReader or linearReader below. - // Register with Fs func init() { fs.Register(&fs.RegInfo{ @@ -261,7 +270,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) { // detects a composite file because it finds the first chunk! // (yet can't satisfy fstest.CheckListing, will ignore) if err == nil && !f.useMeta && strings.Contains(rpath, "/") { - firstChunkPath := f.makeChunkName(remotePath, 0, "", -1) + firstChunkPath := f.makeChunkName(remotePath, 0, "", "") _, testErr := baseInfo.NewFs(baseName, firstChunkPath, baseConfig) if testErr == fs.ErrorIsFile { err = testErr @@ -310,12 +319,16 @@ type Fs struct { dataNameFmt string // name format of data chunks ctrlNameFmt string // name format of control chunks nameRegexp *regexp.Regexp // regular expression to match chunk names + xactIDRand *rand.Rand // generator of random transaction identifiers + xactIDMutex sync.Mutex // mutex for the source of randomness opt Options // copy of Options features *fs.Features // optional features dirSort bool // reserved for future, ignored } -// configure must be called only from NewFs or by unit tests +// configure sets up chunker for given name format, meta format and hash type. +// It also seeds the source of random transaction identifiers. +// configure must be called only from NewFs or by unit tests. func (f *Fs) configure(nameFormat, metaFormat, hashType string) error { if err := f.setChunkNameFormat(nameFormat); err != nil { return errors.Wrapf(err, "invalid name format '%s'", nameFormat) @@ -326,6 +339,10 @@ func (f *Fs) configure(nameFormat, metaFormat, hashType string) error { if err := f.setHashType(hashType); err != nil { return err } + + randomSeed := time.Now().UnixNano() + f.xactIDRand = rand.New(rand.NewSource(randomSeed)) + return nil } @@ -414,13 +431,13 @@ func (f *Fs) setChunkNameFormat(pattern string) error { } reDataOrCtrl := fmt.Sprintf("(?:(%s)|_(%s))", reDigits, ctrlTypeRegStr) - // this must be non-greedy or else it can eat up temporary suffix + // this must be non-greedy or else it could eat up temporary suffix const mainNameRegStr = "(.+?)" strRegex := regexp.QuoteMeta(pattern) strRegex = reHashes.ReplaceAllLiteralString(strRegex, reDataOrCtrl) strRegex = strings.Replace(strRegex, "\\*", mainNameRegStr, -1) - strRegex = fmt.Sprintf("^%s(?:%s)?$", strRegex, tempChunkRegStr) + strRegex = fmt.Sprintf("^%s(?:%s|%s)?$", strRegex, tempSuffixRegStr, tempSuffixRegOld) f.nameRegexp = regexp.MustCompile(strRegex) // craft printf formats for active data/control chunks @@ -435,34 +452,36 @@ func (f *Fs) setChunkNameFormat(pattern string) error { return nil } -// makeChunkName produces chunk name (or path) for given file. +// makeChunkName produces chunk name (or path) for a given file. // -// mainPath can be name, relative or absolute path of main file. +// filePath can be name, relative or absolute path of main file. // // chunkNo must be a zero based index of data chunk. // Negative chunkNo eg. -1 indicates a control chunk. // ctrlType is type of control chunk (must be valid). // ctrlType must be "" for data chunks. // -// xactNo is a transaction number. -// Negative xactNo eg. -1 indicates an active chunk, -// otherwise produce temporary chunk name. +// xactID is a transaction identifier. Empty xactID denotes active chunk, +// otherwise temporary chunk name is produced. // -func (f *Fs) makeChunkName(mainPath string, chunkNo int, ctrlType string, xactNo int64) string { - dir, mainName := path.Split(mainPath) - var name string +func (f *Fs) makeChunkName(filePath string, chunkNo int, ctrlType, xactID string) string { + dir, parentName := path.Split(filePath) + var name, tempSuffix string switch { case chunkNo >= 0 && ctrlType == "": - name = fmt.Sprintf(f.dataNameFmt, mainName, chunkNo+f.opt.StartFrom) + name = fmt.Sprintf(f.dataNameFmt, parentName, chunkNo+f.opt.StartFrom) case chunkNo < 0 && ctrlTypeRegexp.MatchString(ctrlType): - name = fmt.Sprintf(f.ctrlNameFmt, mainName, ctrlType) + name = fmt.Sprintf(f.ctrlNameFmt, parentName, ctrlType) default: panic("makeChunkName: invalid argument") // must not produce something we can't consume } - if xactNo >= 0 { - name = fmt.Sprintf(tempChunkFormat, name, xactNo) + if xactID != "" { + tempSuffix = fmt.Sprintf(tempSuffixFormat, xactID) + if !tempSuffixRegexp.MatchString(tempSuffix) { + panic("makeChunkName: invalid argument") + } } - return dir + name + return dir + name + tempSuffix } // parseChunkName checks whether given file path belongs to @@ -470,20 +489,21 @@ func (f *Fs) makeChunkName(mainPath string, chunkNo int, ctrlType string, xactNo // // filePath can be name, relative or absolute path of a file. // -// Returned mainPath is a non-empty string if valid chunk name -// is detected or "" if it's not a chunk. +// Returned parentPath is path of the composite file owning the chunk. +// It's a non-empty string if valid chunk name is detected +// or "" if it's not a chunk. // Other returned values depend on detected chunk type: // data or control, active or temporary: // // data chunk - the returned chunkNo is non-negative and ctrlType is "" -// control chunk - the chunkNo is -1 and ctrlType is non-empty string -// active chunk - the returned xactNo is -1 -// temporary chunk - the xactNo is non-negative integer -func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrlType string, xactNo int64) { +// control chunk - the chunkNo is -1 and ctrlType is a non-empty string +// active chunk - the returned xactID is "" +// temporary chunk - the xactID is a non-empty string +func (f *Fs) parseChunkName(filePath string) (parentPath string, chunkNo int, ctrlType, xactID string) { dir, name := path.Split(filePath) match := f.nameRegexp.FindStringSubmatch(name) if match == nil || match[1] == "" { - return "", -1, "", -1 + return "", -1, "", "" } var err error @@ -494,19 +514,26 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl } if chunkNo -= f.opt.StartFrom; chunkNo < 0 { fs.Infof(f, "invalid data chunk number in file %q", name) - return "", -1, "", -1 + return "", -1, "", "" } } - xactNo = -1 if match[4] != "" { - if xactNo, err = strconv.ParseInt(match[4], 10, 64); err != nil || xactNo < 0 { - fs.Infof(f, "invalid transaction number in file %q", name) - return "", -1, "", -1 + xactID = match[4] + } + if match[5] != "" { + // old-style temporary suffix + number, err := strconv.ParseInt(match[5], 10, 64) + if err != nil || number < 0 { + fs.Infof(f, "invalid old-style transaction number in file %q", name) + return "", -1, "", "" } + // convert old-style transaction number to base-36 transaction ID + xactID = fmt.Sprintf(tempSuffixFormat, strconv.FormatInt(number, 36)) + xactID = xactID[1:] // strip leading underscore } - mainPath = dir + match[1] + parentPath = dir + match[1] ctrlType = match[3] return } @@ -514,17 +541,74 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl // forbidChunk prints error message or raises error if file is chunk. // First argument sets log prefix, use `false` to suppress message. func (f *Fs) forbidChunk(o interface{}, filePath string) error { - if mainPath, _, _, _ := f.parseChunkName(filePath); mainPath != "" { + if parentPath, _, _, _ := f.parseChunkName(filePath); parentPath != "" { if f.opt.FailHard { - return fmt.Errorf("chunk overlap with %q", mainPath) + return fmt.Errorf("chunk overlap with %q", parentPath) } if boolVal, isBool := o.(bool); !isBool || boolVal { - fs.Errorf(o, "chunk overlap with %q", mainPath) + fs.Errorf(o, "chunk overlap with %q", parentPath) } } return nil } +// newXactID produces a sufficiently random transaction identifier. +// +// The temporary suffix mask allows identifiers consisting of 4-9 +// base-36 digits (ie. digits 0-9 or lowercase letters a-z). +// The identifiers must be unique between transactions running on +// the single file in parallel. +// +// Currently the function produces 6-character identifiers. +// Together with underscore this makes a 7-character temporary suffix. +// +// The first 4 characters isolate groups of transactions by time intervals. +// The maximum length of interval is base-36 "zzzz" ie. 1,679,615 seconds. +// The function rather takes a maximum prime closest to this number +// (see https://primes.utm.edu) as the interval length to better safeguard +// against repeating pseudo-random sequences in cases when rclone is +// invoked from a periodic scheduler like unix cron. +// Thus, the interval is slightly more than 19 days 10 hours 33 minutes. +// +// The remaining 2 base-36 digits (in the range from 0 to 1295 inclusive) +// are taken from the local random source. +// This provides about 0.1% collision probability for two parallel +// operations started at the same second and working on the same file. +// +// Non-empty filePath argument enables probing for existing temporary chunk +// to further eliminate collisions. +func (f *Fs) newXactID(ctx context.Context, filePath string) (xactID string, err error) { + const closestPrimeZzzzSeconds = 1679609 + const maxTwoBase36Digits = 1295 + + unixSec := time.Now().Unix() + if unixSec < 0 { + unixSec = -unixSec // unlikely but the number must be positive + } + circleSec := unixSec % closestPrimeZzzzSeconds + first4chars := strconv.FormatInt(circleSec, 36) + + for tries := 0; tries < maxTransactionProbes; tries++ { + f.xactIDMutex.Lock() + randomness := f.xactIDRand.Int63n(maxTwoBase36Digits + 1) + f.xactIDMutex.Unlock() + + last2chars := strconv.FormatInt(randomness, 36) + xactID = fmt.Sprintf("%04s%02s", first4chars, last2chars) + + if filePath == "" { + return + } + probeChunk := f.makeChunkName(filePath, 0, "", xactID) + _, probeErr := f.base.NewObject(ctx, probeChunk) + if probeErr != nil { + return + } + } + + return "", fmt.Errorf("can't setup transaction for %s", filePath) +} + // List the objects and directories in dir into entries. // The entries can be returned in any order but should be // for a complete directory. @@ -602,8 +686,8 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP switch entry := dirOrObject.(type) { case fs.Object: remote := entry.Remote() - if mainRemote, chunkNo, ctrlType, xactNo := f.parseChunkName(remote); mainRemote != "" { - if xactNo != -1 { + if mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(remote); mainRemote != "" { + if xactID != "" { if revealHidden { fs.Infof(f, "ignore temporary chunk %q", remote) } @@ -686,7 +770,7 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP // // Please note that every NewObject invocation will scan the whole directory. // Using here something like fs.DirCache might improve performance -// (but will make logic more complex, though). +// (yet making the logic more complex). // // Note that chunker prefers analyzing file names rather than reading // the content of meta object assuming that directory scans are fast @@ -752,8 +836,8 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { if !strings.Contains(entryRemote, remote) { continue // bypass regexp to save cpu } - mainRemote, chunkNo, ctrlType, xactNo := f.parseChunkName(entryRemote) - if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactNo != -1 { + mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote) + if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactID != "" { continue // skip non-conforming, temporary and control chunks } //fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo) @@ -786,7 +870,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { // This is either a composite object with metadata or a non-chunked // file without metadata. Validate it and update the total data size. // As an optimization, skip metadata reading here - we will call - // readMetadata lazily when needed. + // readMetadata lazily when needed (reading can be expensive). if err := o.validate(); err != nil { return nil, err } @@ -843,14 +927,11 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st } }() - // Use system timer as a trivial source of transaction numbers, - // don't try hard to safeguard against chunk collisions between - // parallel transactions. - xactNo := time.Now().Unix() - if xactNo < 0 { - xactNo = -xactNo // unlikely but transaction number must be positive - } baseRemote := remote + xactID, errXact := f.newXactID(ctx, baseRemote) + if errXact != nil { + return nil, errXact + } // Transfer chunks data for c.chunkNo = 0; !c.done; c.chunkNo++ { @@ -858,7 +939,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st return nil, ErrChunkOverflow } - tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactNo) + tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactID) size := c.sizeLeft if size > c.chunkSize { size = c.chunkSize @@ -962,7 +1043,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st // Rename data chunks from temporary to final names for chunkNo, chunk := range c.chunks { - chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", -1) + chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", "") chunkMoved, errMove := f.baseMove(ctx, chunk, chunkRemote, delFailed) if errMove != nil { return nil, errMove @@ -1221,11 +1302,6 @@ func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, return f.newObject("", o, nil), nil } -// Precision returns the precision of this Fs -func (f *Fs) Precision() time.Duration { - return f.base.Precision() -} - // Hashes returns the supported hash sets. // Chunker advertises a hash type if and only if it can be calculated // for files of any size, non-chunked or composite. @@ -1613,8 +1689,8 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT wrappedNotifyFunc := func(path string, entryType fs.EntryType) { //fs.Debugf(f, "ChangeNotify: path %q entryType %d", path, entryType) if entryType == fs.EntryObject { - mainPath, _, _, xactNo := f.parseChunkName(path) - if mainPath != "" && xactNo == -1 { + mainPath, _, _, xactID := f.parseChunkName(path) + if mainPath != "" && xactID == "" { path = mainPath } } @@ -2063,7 +2139,7 @@ type metaSimpleJSON struct { // Current implementation creates metadata in three cases: // - for files larger than chunk size // - if file contents can be mistaken as meta object -// - if consistent hashing is on but wrapped remote can't provide given hash +// - if consistent hashing is On but wrapped remote can't provide given hash // func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) { version := metadataVersion @@ -2177,6 +2253,11 @@ func (f *Fs) String() string { return fmt.Sprintf("Chunked '%s:%s'", f.name, f.root) } +// Precision returns the precision of this Fs +func (f *Fs) Precision() time.Duration { + return f.base.Precision() +} + // Check the interfaces are satisfied var ( _ fs.Fs = (*Fs)(nil) diff --git a/backend/chunker/chunker_internal_test.go b/backend/chunker/chunker_internal_test.go index 372fa7bc6..6ba6890d2 100644 --- a/backend/chunker/chunker_internal_test.go +++ b/backend/chunker/chunker_internal_test.go @@ -64,35 +64,40 @@ func testChunkNameFormat(t *testing.T, f *Fs) { assert.Error(t, err) } - assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType string, xactNo int64) { - gotChunkName := f.makeChunkName(mainName, chunkNo, ctrlType, xactNo) - assert.Equal(t, wantChunkName, gotChunkName) + assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType, xactID string) { + gotChunkName := "" + assert.NotPanics(t, func() { + gotChunkName = f.makeChunkName(mainName, chunkNo, ctrlType, xactID) + }, "makeChunkName(%q,%d,%q,%q) must not panic", mainName, chunkNo, ctrlType, xactID) + if gotChunkName != "" { + assert.Equal(t, wantChunkName, gotChunkName) + } } - assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType string, xactNo int64) { + assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType, xactID string) { assert.Panics(t, func() { - _ = f.makeChunkName(mainName, chunkNo, ctrlType, xactNo) - }, "makeChunkName(%q,%d,%q,%d) should panic", mainName, chunkNo, ctrlType, xactNo) + _ = f.makeChunkName(mainName, chunkNo, ctrlType, xactID) + }, "makeChunkName(%q,%d,%q,%q) should panic", mainName, chunkNo, ctrlType, xactID) } - assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType string, wantXactNo int64) { - gotMainName, gotChunkNo, gotCtrlType, gotXactNo := f.parseChunkName(fileName) + assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType, wantXactID string) { + gotMainName, gotChunkNo, gotCtrlType, gotXactID := f.parseChunkName(fileName) assert.Equal(t, wantMainName, gotMainName) assert.Equal(t, wantChunkNo, gotChunkNo) assert.Equal(t, wantCtrlType, gotCtrlType) - assert.Equal(t, wantXactNo, gotXactNo) + assert.Equal(t, wantXactID, gotXactID) } const newFormatSupported = false // support for patterns not starting with base name (*) // valid formats - assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) if newFormatSupported { - assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z]{3,9})),(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z][a-z0-9]{2,6})),(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) } // invalid formats @@ -111,142 +116,223 @@ func testChunkNameFormat(t *testing.T, f *Fs) { // quick tests if newFormatSupported { - assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9][0-9a-z]{3,8})\.\.tmp_([0-9]{10,13}))?$`) f.opt.StartFrom = 1 - assertMakeName(`part_fish_1`, "fish", 0, "", -1) - assertParseName(`part_fish_43`, "fish", 42, "", -1) - assertMakeName(`part_fish_3..tmp_0000000004`, "fish", 2, "", 4) - assertParseName(`part_fish_4..tmp_0000000005`, "fish", 3, "", 5) - assertMakeName(`part_fish__locks`, "fish", -2, "locks", -3) - assertParseName(`part_fish__locks`, "fish", -1, "locks", -1) - assertMakeName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -3, "blockinfo", 1234567890123456789) - assertParseName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789) + assertMakeName(`part_fish_1`, "fish", 0, "", "") + assertParseName(`part_fish_43`, "fish", 42, "", "") + assertMakeName(`part_fish__locks`, "fish", -2, "locks", "") + assertParseName(`part_fish__locks`, "fish", -1, "locks", "") + assertMakeName(`part_fish__x2y`, "fish", -2, "x2y", "") + assertParseName(`part_fish__x2y`, "fish", -1, "x2y", "") + assertMakeName(`part_fish_3_0004`, "fish", 2, "", "4") + assertParseName(`part_fish_4_0005`, "fish", 3, "", "0005") + assertMakeName(`part_fish__blkinfo_jj5fvo3wr`, "fish", -3, "blkinfo", "jj5fvo3wr") + assertParseName(`part_fish__blkinfo_zz9fvo3wr`, "fish", -1, "blkinfo", "zz9fvo3wr") + + // old-style temporary suffix (parse only) + assertParseName(`part_fish_4..tmp_0000000011`, "fish", 3, "", "000b") + assertParseName(`part_fish__blkinfo_jj5fvo3wr`, "fish", -1, "blkinfo", "jj5fvo3wr") } // prepare format for long tests - assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) f.opt.StartFrom = 2 // valid data chunks - assertMakeName(`fish.chunk.003`, "fish", 1, "", -1) - assertMakeName(`fish.chunk.011..tmp_0000054321`, "fish", 9, "", 54321) - assertMakeName(`fish.chunk.011..tmp_1234567890`, "fish", 9, "", 1234567890) - assertMakeName(`fish.chunk.1916..tmp_123456789012345`, "fish", 1914, "", 123456789012345) + assertMakeName(`fish.chunk.003`, "fish", 1, "", "") + assertParseName(`fish.chunk.003`, "fish", 1, "", "") + assertMakeName(`fish.chunk.021`, "fish", 19, "", "") + assertParseName(`fish.chunk.021`, "fish", 19, "", "") - assertParseName(`fish.chunk.003`, "fish", 1, "", -1) - assertParseName(`fish.chunk.004..tmp_0000000021`, "fish", 2, "", 21) - assertParseName(`fish.chunk.021`, "fish", 19, "", -1) - assertParseName(`fish.chunk.323..tmp_1234567890123456789`, "fish", 321, "", 1234567890123456789) + // valid temporary data chunks + assertMakeName(`fish.chunk.011_4321`, "fish", 9, "", "4321") + assertParseName(`fish.chunk.011_4321`, "fish", 9, "", "4321") + assertMakeName(`fish.chunk.011_00bc`, "fish", 9, "", "00bc") + assertParseName(`fish.chunk.011_00bc`, "fish", 9, "", "00bc") + assertMakeName(`fish.chunk.1916_5jjfvo3wr`, "fish", 1914, "", "5jjfvo3wr") + assertParseName(`fish.chunk.1916_5jjfvo3wr`, "fish", 1914, "", "5jjfvo3wr") + assertMakeName(`fish.chunk.1917_zz9fvo3wr`, "fish", 1915, "", "zz9fvo3wr") + assertParseName(`fish.chunk.1917_zz9fvo3wr`, "fish", 1915, "", "zz9fvo3wr") + + // valid temporary data chunks (old temporary suffix, only parse) + assertParseName(`fish.chunk.004..tmp_0000000047`, "fish", 2, "", "001b") + assertParseName(`fish.chunk.323..tmp_9994567890123`, "fish", 321, "", "3jjfvo3wr") // parsing invalid data chunk names - assertParseName(`fish.chunk.3`, "", -1, "", -1) - assertParseName(`fish.chunk.001`, "", -1, "", -1) - assertParseName(`fish.chunk.21`, "", -1, "", -1) - assertParseName(`fish.chunk.-21`, "", -1, "", -1) + assertParseName(`fish.chunk.3`, "", -1, "", "") + assertParseName(`fish.chunk.001`, "", -1, "", "") + assertParseName(`fish.chunk.21`, "", -1, "", "") + assertParseName(`fish.chunk.-21`, "", -1, "", "") - assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", -1) - assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", -1) - assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", -1) - assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", -1) + assertParseName(`fish.chunk.004abcd`, "", -1, "", "") // missing underscore delimiter + assertParseName(`fish.chunk.004__1234`, "", -1, "", "") // extra underscore delimiter + assertParseName(`fish.chunk.004_123`, "", -1, "", "") // too short temporary suffix + assertParseName(`fish.chunk.004_1234567890`, "", -1, "", "") // too long temporary suffix + assertParseName(`fish.chunk.004_-1234`, "", -1, "", "") // temporary suffix must be positive + assertParseName(`fish.chunk.004_123E`, "", -1, "", "") // uppercase not allowed + assertParseName(`fish.chunk.004_12.3`, "", -1, "", "") // punctuation not allowed + + // parsing invalid data chunk names (old temporary suffix) + assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", "") + assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", "") + assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", "") + assertParseName(`fish.chunk.323..tmp_12345678901234`, "", -1, "", "") + assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", "") // valid control chunks - assertMakeName(`fish.chunk._info`, "fish", -1, "info", -1) - assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", -1) - assertMakeName(`fish.chunk._blockinfo`, "fish", -3, "blockinfo", -1) + assertMakeName(`fish.chunk._info`, "fish", -1, "info", "") + assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", "") + assertMakeName(`fish.chunk._blkinfo`, "fish", -3, "blkinfo", "") + assertMakeName(`fish.chunk._x2y`, "fish", -4, "x2y", "") - assertParseName(`fish.chunk._info`, "fish", -1, "info", -1) - assertParseName(`fish.chunk._locks`, "fish", -1, "locks", -1) - assertParseName(`fish.chunk._blockinfo`, "fish", -1, "blockinfo", -1) + assertParseName(`fish.chunk._info`, "fish", -1, "info", "") + assertParseName(`fish.chunk._locks`, "fish", -1, "locks", "") + assertParseName(`fish.chunk._blkinfo`, "fish", -1, "blkinfo", "") + assertParseName(`fish.chunk._x2y`, "fish", -1, "x2y", "") // valid temporary control chunks - assertMakeName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21) - assertMakeName(`fish.chunk._locks..tmp_0000054321`, "fish", -2, "locks", 54321) - assertMakeName(`fish.chunk._uploads..tmp_0000000000`, "fish", -3, "uploads", 0) - assertMakeName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -4, "blockinfo", 1234567890123456789) + assertMakeName(`fish.chunk._info_0001`, "fish", -1, "info", "1") + assertMakeName(`fish.chunk._locks_4321`, "fish", -2, "locks", "4321") + assertMakeName(`fish.chunk._uploads_abcd`, "fish", -3, "uploads", "abcd") + assertMakeName(`fish.chunk._blkinfo_xyzabcdef`, "fish", -4, "blkinfo", "xyzabcdef") + assertMakeName(`fish.chunk._x2y_1aaa`, "fish", -5, "x2y", "1aaa") - assertParseName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21) - assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", 54321) - assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", 0) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789) + assertParseName(`fish.chunk._info_0001`, "fish", -1, "info", "0001") + assertParseName(`fish.chunk._locks_4321`, "fish", -1, "locks", "4321") + assertParseName(`fish.chunk._uploads_9abc`, "fish", -1, "uploads", "9abc") + assertParseName(`fish.chunk._blkinfo_xyzabcdef`, "fish", -1, "blkinfo", "xyzabcdef") + assertParseName(`fish.chunk._x2y_1aaa`, "fish", -1, "x2y", "1aaa") + + // valid temporary control chunks (old temporary suffix, parse only) + assertParseName(`fish.chunk._info..tmp_0000000047`, "fish", -1, "info", "001b") + assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", "15wx") + assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", "0000") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123`, "fish", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._x2y..tmp_0000000000`, "fish", -1, "x2y", "0000") // parsing invalid control chunk names - assertParseName(`fish.chunk.info`, "", -1, "", -1) - assertParseName(`fish.chunk.locks`, "", -1, "", -1) - assertParseName(`fish.chunk.uploads`, "", -1, "", -1) - assertParseName(`fish.chunk.blockinfo`, "", -1, "", -1) + assertParseName(`fish.chunk.metadata`, "", -1, "", "") // must be prepended by underscore + assertParseName(`fish.chunk.info`, "", -1, "", "") + assertParseName(`fish.chunk.locks`, "", -1, "", "") + assertParseName(`fish.chunk.uploads`, "", -1, "", "") - assertParseName(`fish.chunk._os`, "", -1, "", -1) - assertParseName(`fish.chunk._futuredata`, "", -1, "", -1) - assertParseName(`fish.chunk._me_ta`, "", -1, "", -1) - assertParseName(`fish.chunk._in-fo`, "", -1, "", -1) - assertParseName(`fish.chunk._.bin`, "", -1, "", -1) + assertParseName(`fish.chunk._os`, "", -1, "", "") // too short + assertParseName(`fish.chunk._metadata`, "", -1, "", "") // too long + assertParseName(`fish.chunk._blockinfo`, "", -1, "", "") // way too long + assertParseName(`fish.chunk._4me`, "", -1, "", "") // cannot start with digit + assertParseName(`fish.chunk._567`, "", -1, "", "") // cannot be all digits + assertParseName(`fish.chunk._me_ta`, "", -1, "", "") // punctuation not allowed + assertParseName(`fish.chunk._in-fo`, "", -1, "", "") + assertParseName(`fish.chunk._.bin`, "", -1, "", "") + assertParseName(`fish.chunk._.2xy`, "", -1, "", "") - assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", -1) - assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", -1) - assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", -1) + // parsing invalid temporary control chunks + assertParseName(`fish.chunk._blkinfo1234`, "", -1, "", "") // missing underscore delimiter + assertParseName(`fish.chunk._info__1234`, "", -1, "", "") // extra underscore delimiter + assertParseName(`fish.chunk._info_123`, "", -1, "", "") // too short temporary suffix + assertParseName(`fish.chunk._info_1234567890`, "", -1, "", "") // too long temporary suffix + assertParseName(`fish.chunk._info_-1234`, "", -1, "", "") // temporary suffix must be positive + assertParseName(`fish.chunk._info_123E`, "", -1, "", "") // uppercase not allowed + assertParseName(`fish.chunk._info_12.3`, "", -1, "", "") // punctuation not allowed + + assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", "") + assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", "") + assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", "") // short control chunk names: 3 letters ok, 1-2 letters not allowed - assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", -1) - assertMakeName(`fish.chunk._ext..tmp_0000000021`, "fish", -1, "ext", 21) - assertParseName(`fish.chunk._int`, "fish", -1, "int", -1) - assertParseName(`fish.chunk._int..tmp_0000000021`, "fish", -1, "int", 21) - assertMakeNamePanics("fish", -1, "in", -1) - assertMakeNamePanics("fish", -1, "up", 4) - assertMakeNamePanics("fish", -1, "x", -1) - assertMakeNamePanics("fish", -1, "c", 4) + assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", "") + assertParseName(`fish.chunk._int`, "fish", -1, "int", "") + + assertMakeNamePanics("fish", -1, "in", "") + assertMakeNamePanics("fish", -1, "up", "4") + assertMakeNamePanics("fish", -1, "x", "") + assertMakeNamePanics("fish", -1, "c", "1z") + + assertMakeName(`fish.chunk._ext_0000`, "fish", -1, "ext", "0") + assertMakeName(`fish.chunk._ext_0026`, "fish", -1, "ext", "26") + assertMakeName(`fish.chunk._int_0abc`, "fish", -1, "int", "abc") + assertMakeName(`fish.chunk._int_9xyz`, "fish", -1, "int", "9xyz") + assertMakeName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") + assertMakeName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") + + assertParseName(`fish.chunk._ext_0000`, "fish", -1, "ext", "0000") + assertParseName(`fish.chunk._ext_0026`, "fish", -1, "ext", "0026") + assertParseName(`fish.chunk._int_0abc`, "fish", -1, "int", "0abc") + assertParseName(`fish.chunk._int_9xyz`, "fish", -1, "int", "9xyz") + assertParseName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") + assertParseName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") // base file name can sometimes look like a valid chunk name - assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", -1) - assertParseName(`fish.chunk.003.chunk.005..tmp_0000000021`, "fish.chunk.003", 3, "", 21) - assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", -1) - assertParseName(`fish.chunk.003.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.003", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", -1) + assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", "") + assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", "") + assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", "") - assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", -1) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000021`, "fish.chunk.004..tmp_0000000021", 3, "", 21) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", -1) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.004..tmp_0000000021", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", -1) + assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", "") + assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", "") + assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", "") - assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", -1) - assertParseName(`fish.chunk._info.chunk.005..tmp_0000000021`, "fish.chunk._info", 3, "", 21) - assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", -1) - assertParseName(`fish.chunk._info.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._info", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1) + // base file name looking like a valid chunk name (old temporary suffix) + assertParseName(`fish.chunk.003.chunk.005..tmp_0000000022`, "fish.chunk.003", 3, "", "000m") + assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", "") + assertParseName(`fish.chunk._info.chunk.005..tmp_0000000023`, "fish.chunk._info", 3, "", "000n") + assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "") - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blockinfo..tmp_1234567890123456789", 2, "", -1) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.005..tmp_0000000021`, "fish.chunk._blockinfo..tmp_1234567890123456789", 3, "", 21) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "info", -1) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1) + assertParseName(`fish.chunk.003.chunk._blkinfo..tmp_9994567890123`, "fish.chunk.003", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._info.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._info", -1, "blkinfo", "3jjfvo3wr") + + assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", "") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000025`, "fish.chunk.004..tmp_0000000021", 3, "", "000p") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", "") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blkinfo..tmp_9994567890123`, "fish.chunk.004..tmp_0000000021", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", "") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", "") + + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk.004`, "fish.chunk._blkinfo..tmp_9994567890123", 2, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk.005..tmp_0000000026`, "fish.chunk._blkinfo..tmp_9994567890123", 3, "", "000q") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info`, "fish.chunk._blkinfo..tmp_9994567890123", -1, "info", "") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._blkinfo..tmp_9994567890123", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info.chunk._Meta`, "", -1, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "") + + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blkinfo..tmp_1234567890123456789", 2, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk.005..tmp_0000000022`, "fish.chunk._blkinfo..tmp_1234567890123456789", 3, "", "000m") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blkinfo..tmp_1234567890123456789", -1, "info", "") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._blkinfo..tmp_1234567890123456789", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "") // attempts to make invalid chunk names - assertMakeNamePanics("fish", -1, "", -1) // neither data nor control - assertMakeNamePanics("fish", 0, "info", -1) // both data and control - assertMakeNamePanics("fish", -1, "futuredata", -1) // control type too long - assertMakeNamePanics("fish", -1, "123", -1) // digits not allowed - assertMakeNamePanics("fish", -1, "Meta", -1) // only lower case letters allowed - assertMakeNamePanics("fish", -1, "in-fo", -1) // punctuation not allowed - assertMakeNamePanics("fish", -1, "_info", -1) - assertMakeNamePanics("fish", -1, "info_", -1) - assertMakeNamePanics("fish", -2, ".bind", -3) - assertMakeNamePanics("fish", -2, "bind.", -3) + assertMakeNamePanics("fish", -1, "", "") // neither data nor control + assertMakeNamePanics("fish", 0, "info", "") // both data and control + assertMakeNamePanics("fish", -1, "metadata", "") // control type too long + assertMakeNamePanics("fish", -1, "blockinfo", "") // control type way too long + assertMakeNamePanics("fish", -1, "2xy", "") // first digit not allowed + assertMakeNamePanics("fish", -1, "123", "") // all digits not allowed + assertMakeNamePanics("fish", -1, "Meta", "") // only lower case letters allowed + assertMakeNamePanics("fish", -1, "in-fo", "") // punctuation not allowed + assertMakeNamePanics("fish", -1, "_info", "") + assertMakeNamePanics("fish", -1, "info_", "") + assertMakeNamePanics("fish", -2, ".bind", "") + assertMakeNamePanics("fish", -2, "bind.", "") - assertMakeNamePanics("fish", -1, "", 1) // neither data nor control - assertMakeNamePanics("fish", 0, "info", 12) // both data and control - assertMakeNamePanics("fish", -1, "futuredata", 45) // control type too long - assertMakeNamePanics("fish", -1, "123", 123) // digits not allowed - assertMakeNamePanics("fish", -1, "Meta", 456) // only lower case letters allowed - assertMakeNamePanics("fish", -1, "in-fo", 321) // punctuation not allowed - assertMakeNamePanics("fish", -1, "_info", 15678) - assertMakeNamePanics("fish", -1, "info_", 999) - assertMakeNamePanics("fish", -2, ".bind", 0) - assertMakeNamePanics("fish", -2, "bind.", 0) + assertMakeNamePanics("fish", -1, "", "1") // neither data nor control + assertMakeNamePanics("fish", 0, "info", "23") // both data and control + assertMakeNamePanics("fish", -1, "metadata", "45") // control type too long + assertMakeNamePanics("fish", -1, "blockinfo", "7") // control type way too long + assertMakeNamePanics("fish", -1, "2xy", "abc") // first digit not allowed + assertMakeNamePanics("fish", -1, "123", "def") // all digits not allowed + assertMakeNamePanics("fish", -1, "Meta", "mnk") // only lower case letters allowed + assertMakeNamePanics("fish", -1, "in-fo", "xyz") // punctuation not allowed + assertMakeNamePanics("fish", -1, "_info", "5678") + assertMakeNamePanics("fish", -1, "info_", "999") + assertMakeNamePanics("fish", -2, ".bind", "0") + assertMakeNamePanics("fish", -2, "bind.", "0") + + assertMakeNamePanics("fish", 0, "", "1234567890") // temporary suffix too long + assertMakeNamePanics("fish", 0, "", "123F4") // uppercase not allowed + assertMakeNamePanics("fish", 0, "", "123.") // punctuation not allowed + assertMakeNamePanics("fish", 0, "", "_123") } func testSmallFileInternals(t *testing.T, f *Fs) { @@ -383,7 +469,7 @@ func testPreventCorruption(t *testing.T, f *Fs) { billyObj := newFile("billy") billyChunkName := func(chunkNo int) string { - return f.makeChunkName(billyObj.Remote(), chunkNo, "", -1) + return f.makeChunkName(billyObj.Remote(), chunkNo, "", "") } err := f.Mkdir(ctx, billyChunkName(1)) @@ -433,7 +519,7 @@ func testPreventCorruption(t *testing.T, f *Fs) { // recreate billy in case it was anyhow corrupted willyObj := newFile("willy") - willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", -1) + willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", "") f.opt.FailHard = false willyChunk, err := f.NewObject(ctx, willyChunkName) f.opt.FailHard = true @@ -484,7 +570,7 @@ func testChunkNumberOverflow(t *testing.T, f *Fs) { f.opt.FailHard = false file, fileName := newFile(f, "wreaker") - wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", -1)) + wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", "")) f.opt.FailHard = false fstest.CheckListingWithRoot(t, f, dir, nil, nil, f.Precision()) @@ -532,7 +618,7 @@ func testMetadataInput(t *testing.T, f *Fs) { filename := path.Join(dir, name) require.True(t, len(contents) > 2 && len(contents) < minChunkForTest, description+" test data is correct") - part := putFile(f.base, f.makeChunkName(filename, 0, "", -1), "oops", "", true) + part := putFile(f.base, f.makeChunkName(filename, 0, "", ""), "oops", "", true) _ = putFile(f, filename, contents, "upload "+description, false) obj, err := f.NewObject(ctx, filename) diff --git a/docs/content/chunker.md b/docs/content/chunker.md index 8f362acfc..80cb8e415 100644 --- a/docs/content/chunker.md +++ b/docs/content/chunker.md @@ -130,10 +130,10 @@ error message in such cases. #### Chunk names -The default chunk name format is `*.rclone-chunk.###`, hence by default -chunk names are `BIG_FILE_NAME.rclone-chunk.001`, -`BIG_FILE_NAME.rclone-chunk.002` etc. You can configure a different name -format using the `--chunker-name-format` option. The format uses asterisk +The default chunk name format is `*.rclone_chunk.###`, hence by default +chunk names are `BIG_FILE_NAME.rclone_chunk.001`, +`BIG_FILE_NAME.rclone_chunk.002` etc. You can configure another name format +using the `name_format` configuration file option. The format uses asterisk `*` as a placeholder for the base file name and one or more consecutive hash characters `#` as a placeholder for sequential chunk number. There must be one and only one asterisk. The number of consecutive hash @@ -211,6 +211,9 @@ file hashing, configure chunker with `md5all` or `sha1all`. These two modes guarantee given hash for all files. If wrapped remote doesn't support it, chunker will then add metadata to all files, even small. However, this can double the amount of small files in storage and incur additional service charges. +You can even use chunker to force md5/sha1 support in any other remote +at expence of sidecar meta objects by setting eg. `chunk_type=sha1all` +to force hashsums and `chunk_size=1P` to effectively disable chunking. Normally, when a file is copied to chunker controlled remote, chunker will ask the file source for compatible file hash and revert to on-the-fly @@ -274,6 +277,14 @@ Chunker requires wrapped remote to support server side `move` (or `copy` + This is because it internally renames temporary chunk files to their final names when an operation completes successfully. +Chunker encodes chunk number in file name, so with default `name_format` +setting it adds 17 characters. Also chunker adds 7 characters of temporary +suffix during operations. Many file systems limit base file name without path +by 255 characters. Using rclone's crypt remote as a base file system limits +file name by 143 characters. Thus, maximum name length is 231 for most files +and 119 for chunker-over-crypt. A user in need can change name format to +eg. `*.rcc##` and save 10 characters (provided at most 99 chunks per file). + Note that a move implemented using the copy-and-delete method may incur double charging with some cloud storage providers.