osx: consistent normalisation when searching for external files

several unicode characters can be encoded in two different ways, either
in a precomposed (NFC) or decomposed (NFD) representation. everywhere
besides on macOS, specifically HFS+, precomposed strings are being used.
furthermore on macOS we can get either precomposed or decomposed
strings, for example when not HFS+ formatted volumes are used. that can
be the case for network mounted devices (SMB, NFS) or optical/removable
devices (UDF). this can lead to an inequality of actual equal strings,
which can happen when comparing strings from different sources, like the
command line or filesystem. this makes it mainly a problem on macOS
systems.

one case that can potential break is the sub-auto option. to prevent
that we convert the search string as well as the string we search in to
the same normalised representation, specifically we use the decomposed
form which is used anywhere else.

this could potentially be a problem on other platforms too, though the
potential of occurring is very minor. for those platforms we don't
convert anything and just fallback to the input.

Fixes #4016
This commit is contained in:
Akemi 2017-01-24 19:48:02 +01:00
parent ce23dfa2fa
commit 8bbdecea83
3 changed files with 19 additions and 4 deletions

View File

@ -234,5 +234,9 @@ bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags)
#endif #endif
failure: failure:
return bstr_sanitize_utf8_latin1(NULL, buf); if (flags & MP_NO_LATIN1_FALLBACK) {
return buf;
} else {
return bstr_sanitize_utf8_latin1(NULL, buf);
}
} }

View File

@ -10,6 +10,7 @@ enum {
MP_ICONV_VERBOSE = 1, // print errors instead of failing silently MP_ICONV_VERBOSE = 1, // print errors instead of failing silently
MP_ICONV_ALLOW_CUTOFF = 2, // allow partial input data MP_ICONV_ALLOW_CUTOFF = 2, // allow partial input data
MP_STRICT_UTF8 = 4, // don't fall back to UTF-8-BROKEN when guessing MP_STRICT_UTF8 = 4, // don't fall back to UTF-8-BROKEN when guessing
MP_NO_LATIN1_FALLBACK = 8, // fall back to input buffer instead of latin1
}; };
bool mp_charset_is_utf8(const char *user_cp); bool mp_charset_is_utf8(const char *user_cp);

View File

@ -10,6 +10,7 @@
#include "common/global.h" #include "common/global.h"
#include "common/msg.h" #include "common/msg.h"
#include "misc/ctype.h" #include "misc/ctype.h"
#include "misc/charset_conv.h"
#include "options/options.h" #include "options/options.h"
#include "options/path.h" #include "options/path.h"
#include "external_files.h" #include "external_files.h"
@ -98,11 +99,16 @@ static void append_dir_subtitles(struct mpv_global *global,
if (mp_is_url(bstr0(fname))) if (mp_is_url(bstr0(fname)))
goto out; goto out;
struct bstr f_fname = bstr0(mp_basename(fname)); struct bstr f_fbname = bstr0(mp_basename(fname));
struct bstr f_fname = mp_iconv_to_utf8(log, f_fbname,
"UTF-8-MAC", MP_NO_LATIN1_FALLBACK);
struct bstr f_fname_noext = bstrdup(tmpmem, bstr_strip_ext(f_fname)); struct bstr f_fname_noext = bstrdup(tmpmem, bstr_strip_ext(f_fname));
bstr_lower(f_fname_noext); bstr_lower(f_fname_noext);
struct bstr f_fname_trim = bstr_strip(f_fname_noext); struct bstr f_fname_trim = bstr_strip(f_fname_noext);
if (f_fbname.start != f_fname.start)
talloc_steal(tmpmem, f_fname.start);
// 0 = nothing // 0 = nothing
// 1 = any subtitle file // 1 = any subtitle file
// 2 = any sub file containing movie name // 2 = any sub file containing movie name
@ -114,15 +120,19 @@ static void append_dir_subtitles(struct mpv_global *global,
mp_verbose(log, "Loading external files in %.*s\n", BSTR_P(path)); mp_verbose(log, "Loading external files in %.*s\n", BSTR_P(path));
struct dirent *de; struct dirent *de;
while ((de = readdir(d))) { while ((de = readdir(d))) {
struct bstr dename = bstr0(de->d_name);
void *tmpmem2 = talloc_new(tmpmem); void *tmpmem2 = talloc_new(tmpmem);
struct bstr den = bstr0(de->d_name);
struct bstr dename = mp_iconv_to_utf8(log, den,
"UTF-8-MAC", MP_NO_LATIN1_FALLBACK);
// retrieve various parts of the filename // retrieve various parts of the filename
struct bstr tmp_fname_noext = bstrdup(tmpmem2, bstr_strip_ext(dename)); struct bstr tmp_fname_noext = bstrdup(tmpmem2, bstr_strip_ext(dename));
bstr_lower(tmp_fname_noext); bstr_lower(tmp_fname_noext);
struct bstr tmp_fname_ext = bstr_get_ext(dename); struct bstr tmp_fname_ext = bstr_get_ext(dename);
struct bstr tmp_fname_trim = bstr_strip(tmp_fname_noext); struct bstr tmp_fname_trim = bstr_strip(tmp_fname_noext);
if (den.start != dename.start)
talloc_steal(tmpmem2, dename.start);
// check what it is (most likely) // check what it is (most likely)
int type = test_ext(tmp_fname_ext); int type = test_ext(tmp_fname_ext);
char **langs = NULL; char **langs = NULL;