1
mirror of https://github.com/rclone/rclone synced 2024-11-21 22:50:16 +01:00

march: added flag to allow Unicode filenames to remain unique

If your filenames contain two near-identical Unicode characters,
rclone will normalize these, making them identical. This flag
gives you the ability to keep them unique. This might
create unintended side effects, such as duplicating files that
contain certain Unicode characters, when downloading them from
certain cloud providers to a macOS filesystem.

Fixes #4228
This commit is contained in:
Ben Zenker 2020-05-14 19:27:59 -04:00 committed by Nick Craig-Wood
parent 4006345cfb
commit 899c8e0697
6 changed files with 119 additions and 72 deletions

View File

@ -908,6 +908,20 @@ changed and won't need copying then you shouldn't use `--no-traverse`.
See [rclone copy](/commands/rclone_copy/) for an example of how to use it.
### --no-unicode-normalization ###
Don't normalize unicode characters in filenames during the sync routine.
Sometimes, an operating system will store filenames containing unicode
parts in their decomposed form (particularly macOS). Some cloud storage
systems will then recompose the unicode, resulting in duplicate files if
the data is ever copied back to a local filesystem.
Using this flag will disable that functionality, treating each unicode
character as unique. For example, by default é and é will be normalized
into the same character. With `--no-unicode-normalization` they will be
treated as unique characters.
### --no-update-modtime ###
When using this flag, rclone won't update modification times of remote

View File

@ -70,6 +70,7 @@ type ConfigInfo struct {
IgnoreCaseSync bool
NoTraverse bool
NoCheckDest bool
NoUnicodeNormalization bool
NoUpdateModTime bool
DataRateUnit string
CompareDest string

View File

@ -75,6 +75,7 @@ func AddFlags(flagSet *pflag.FlagSet) {
flags.BoolVarP(flagSet, &fs.Config.IgnoreCaseSync, "ignore-case-sync", "", fs.Config.IgnoreCaseSync, "Ignore case when synchronizing")
flags.BoolVarP(flagSet, &fs.Config.NoTraverse, "no-traverse", "", fs.Config.NoTraverse, "Don't traverse destination file system on copy.")
flags.BoolVarP(flagSet, &fs.Config.NoCheckDest, "no-check-dest", "", fs.Config.NoCheckDest, "Don't check the destination, copy regardless.")
flags.BoolVarP(flagSet, &fs.Config.NoUnicodeNormalization, "no-unicode-normalization", "", fs.Config.NoUnicodeNormalization, "Don't normalize unicode characters in filenames.")
flags.BoolVarP(flagSet, &fs.Config.NoUpdateModTime, "no-update-modtime", "", fs.Config.NoUpdateModTime, "Don't update destination mod-time if files identical.")
flags.StringVarP(flagSet, &fs.Config.CompareDest, "compare-dest", "", fs.Config.CompareDest, "Include additional server-side path during comparison.")
flags.StringVarP(flagSet, &fs.Config.CopyDest, "copy-dest", "", fs.Config.CopyDest, "Implies --compare-dest but also copies files from path into destination.")

View File

@ -31,6 +31,7 @@ type March struct {
DstIncludeAll bool // don't include all files in the destination
Callback Marcher // object to call with results
NoCheckDest bool // transfer all objects regardless without checking dst
NoUnicodeNormalization bool // don't normalize unicode characters in filenames
// internal state
srcListDir listDirFn // function to call to list a directory in the src
dstListDir listDirFn // function to call to list a directory in the dst
@ -55,7 +56,9 @@ func (m *March) init() {
}
// Now create the matching transform
// ..normalise the UTF8 first
if !m.NoUnicodeNormalization {
m.transforms = append(m.transforms, norm.NFC.String)
}
// ..if destination is caseInsensitive then make it lower case
// case Insensitive | src | dst | lower case compare |
// | No | No | No |

View File

@ -19,6 +19,7 @@ import (
"github.com/rclone/rclone/fstest/mockobject"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/text/unicode/norm"
)
// Some times used in the tests
@ -313,6 +314,8 @@ func TestMatchListings(t *testing.T) {
b = mockobject.Object("b")
c = mockobject.Object("c")
d = mockobject.Object("d")
uE1 = mockobject.Object("é") // one of the unicode E characters
uE2 = mockobject.Object("é") // a different unicode E character
dirA = mockdir.New("A")
dirb = mockdir.New("b")
)
@ -419,6 +422,28 @@ func TestMatchListings(t *testing.T) {
},
transforms: []matchTransformFn{strings.ToLower},
},
{
what: "Unicode near-duplicate that becomes duplicate with normalization",
input: fs.DirEntries{
uE1, uE1,
uE2, uE2,
},
matches: []matchPair{
{uE1, uE1},
},
transforms: []matchTransformFn{norm.NFC.String},
},
{
what: "Unicode near-duplicate with no normalization",
input: fs.DirEntries{
uE1, uE1,
uE2, uE2,
},
matches: []matchPair{
{uE1, uE1},
{uE2, uE2},
},
},
{
what: "File and directory are not duplicates - srcOnly",
input: fs.DirEntries{

View File

@ -34,6 +34,7 @@ type syncCopyMove struct {
cancel func() // cancel the context
noTraverse bool // if set don't traverse the dst
noCheckDest bool // if set transfer all objects regardless without checking dst
noUnicodeNormalization bool // don't normalize unicode characters in filenames
deletersWg sync.WaitGroup // for delete before go routine
deleteFilesCh chan fs.Object // channel to receive deletes if delete before
trackRenames bool // set if we should do server side renames
@ -102,6 +103,7 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete
srcEmptyDirs: make(map[string]fs.DirEntry),
noTraverse: fs.Config.NoTraverse,
noCheckDest: fs.Config.NoCheckDest,
noUnicodeNormalization: fs.Config.NoUnicodeNormalization,
deleteFilesCh: make(chan fs.Object, fs.Config.Checkers),
trackRenames: fs.Config.TrackRenames,
commonHash: fsrc.Hashes().Overlap(fdst.Hashes()).GetOne(),
@ -790,6 +792,7 @@ func (s *syncCopyMove) run() error {
Callback: s,
DstIncludeAll: filter.Active.Opt.DeleteExcluded,
NoCheckDest: s.noCheckDest,
NoUnicodeNormalization: s.noUnicodeNormalization,
}
s.processError(m.Run())