mirror of
https://github.com/rclone/rclone
synced 2024-11-21 22:50:16 +01:00
march: added flag to allow Unicode filenames to remain unique
If your filenames contain two near-identical Unicode characters, rclone will normalize these, making them identical. This flag gives you the ability to keep them unique. This might create unintended side effects, such as duplicating files that contain certain Unicode characters, when downloading them from certain cloud providers to a macOS filesystem. Fixes #4228
This commit is contained in:
parent
4006345cfb
commit
899c8e0697
@ -908,6 +908,20 @@ changed and won't need copying then you shouldn't use `--no-traverse`.
|
||||
|
||||
See [rclone copy](/commands/rclone_copy/) for an example of how to use it.
|
||||
|
||||
### --no-unicode-normalization ###
|
||||
|
||||
Don't normalize unicode characters in filenames during the sync routine.
|
||||
|
||||
Sometimes, an operating system will store filenames containing unicode
|
||||
parts in their decomposed form (particularly macOS). Some cloud storage
|
||||
systems will then recompose the unicode, resulting in duplicate files if
|
||||
the data is ever copied back to a local filesystem.
|
||||
|
||||
Using this flag will disable that functionality, treating each unicode
|
||||
character as unique. For example, by default é and é will be normalized
|
||||
into the same character. With `--no-unicode-normalization` they will be
|
||||
treated as unique characters.
|
||||
|
||||
### --no-update-modtime ###
|
||||
|
||||
When using this flag, rclone won't update modification times of remote
|
||||
|
@ -70,6 +70,7 @@ type ConfigInfo struct {
|
||||
IgnoreCaseSync bool
|
||||
NoTraverse bool
|
||||
NoCheckDest bool
|
||||
NoUnicodeNormalization bool
|
||||
NoUpdateModTime bool
|
||||
DataRateUnit string
|
||||
CompareDest string
|
||||
|
@ -75,6 +75,7 @@ func AddFlags(flagSet *pflag.FlagSet) {
|
||||
flags.BoolVarP(flagSet, &fs.Config.IgnoreCaseSync, "ignore-case-sync", "", fs.Config.IgnoreCaseSync, "Ignore case when synchronizing")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoTraverse, "no-traverse", "", fs.Config.NoTraverse, "Don't traverse destination file system on copy.")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoCheckDest, "no-check-dest", "", fs.Config.NoCheckDest, "Don't check the destination, copy regardless.")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoUnicodeNormalization, "no-unicode-normalization", "", fs.Config.NoUnicodeNormalization, "Don't normalize unicode characters in filenames.")
|
||||
flags.BoolVarP(flagSet, &fs.Config.NoUpdateModTime, "no-update-modtime", "", fs.Config.NoUpdateModTime, "Don't update destination mod-time if files identical.")
|
||||
flags.StringVarP(flagSet, &fs.Config.CompareDest, "compare-dest", "", fs.Config.CompareDest, "Include additional server-side path during comparison.")
|
||||
flags.StringVarP(flagSet, &fs.Config.CopyDest, "copy-dest", "", fs.Config.CopyDest, "Implies --compare-dest but also copies files from path into destination.")
|
||||
|
@ -31,6 +31,7 @@ type March struct {
|
||||
DstIncludeAll bool // don't include all files in the destination
|
||||
Callback Marcher // object to call with results
|
||||
NoCheckDest bool // transfer all objects regardless without checking dst
|
||||
NoUnicodeNormalization bool // don't normalize unicode characters in filenames
|
||||
// internal state
|
||||
srcListDir listDirFn // function to call to list a directory in the src
|
||||
dstListDir listDirFn // function to call to list a directory in the dst
|
||||
@ -55,7 +56,9 @@ func (m *March) init() {
|
||||
}
|
||||
// Now create the matching transform
|
||||
// ..normalise the UTF8 first
|
||||
if !m.NoUnicodeNormalization {
|
||||
m.transforms = append(m.transforms, norm.NFC.String)
|
||||
}
|
||||
// ..if destination is caseInsensitive then make it lower case
|
||||
// case Insensitive | src | dst | lower case compare |
|
||||
// | No | No | No |
|
||||
|
@ -19,6 +19,7 @@ import (
|
||||
"github.com/rclone/rclone/fstest/mockobject"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// Some times used in the tests
|
||||
@ -313,6 +314,8 @@ func TestMatchListings(t *testing.T) {
|
||||
b = mockobject.Object("b")
|
||||
c = mockobject.Object("c")
|
||||
d = mockobject.Object("d")
|
||||
uE1 = mockobject.Object("é") // one of the unicode E characters
|
||||
uE2 = mockobject.Object("é") // a different unicode E character
|
||||
dirA = mockdir.New("A")
|
||||
dirb = mockdir.New("b")
|
||||
)
|
||||
@ -419,6 +422,28 @@ func TestMatchListings(t *testing.T) {
|
||||
},
|
||||
transforms: []matchTransformFn{strings.ToLower},
|
||||
},
|
||||
{
|
||||
what: "Unicode near-duplicate that becomes duplicate with normalization",
|
||||
input: fs.DirEntries{
|
||||
uE1, uE1,
|
||||
uE2, uE2,
|
||||
},
|
||||
matches: []matchPair{
|
||||
{uE1, uE1},
|
||||
},
|
||||
transforms: []matchTransformFn{norm.NFC.String},
|
||||
},
|
||||
{
|
||||
what: "Unicode near-duplicate with no normalization",
|
||||
input: fs.DirEntries{
|
||||
uE1, uE1,
|
||||
uE2, uE2,
|
||||
},
|
||||
matches: []matchPair{
|
||||
{uE1, uE1},
|
||||
{uE2, uE2},
|
||||
},
|
||||
},
|
||||
{
|
||||
what: "File and directory are not duplicates - srcOnly",
|
||||
input: fs.DirEntries{
|
||||
|
@ -34,6 +34,7 @@ type syncCopyMove struct {
|
||||
cancel func() // cancel the context
|
||||
noTraverse bool // if set don't traverse the dst
|
||||
noCheckDest bool // if set transfer all objects regardless without checking dst
|
||||
noUnicodeNormalization bool // don't normalize unicode characters in filenames
|
||||
deletersWg sync.WaitGroup // for delete before go routine
|
||||
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
|
||||
trackRenames bool // set if we should do server side renames
|
||||
@ -102,6 +103,7 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete
|
||||
srcEmptyDirs: make(map[string]fs.DirEntry),
|
||||
noTraverse: fs.Config.NoTraverse,
|
||||
noCheckDest: fs.Config.NoCheckDest,
|
||||
noUnicodeNormalization: fs.Config.NoUnicodeNormalization,
|
||||
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
|
||||
trackRenames: fs.Config.TrackRenames,
|
||||
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
|
||||
@ -790,6 +792,7 @@ func (s *syncCopyMove) run() error {
|
||||
Callback: s,
|
||||
DstIncludeAll: filter.Active.Opt.DeleteExcluded,
|
||||
NoCheckDest: s.noCheckDest,
|
||||
NoUnicodeNormalization: s.noUnicodeNormalization,
|
||||
}
|
||||
s.processError(m.Run())
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user