mirror of
https://code.videolan.org/videolan/vlc
synced 2024-07-21 07:24:15 +02:00
IsUTF8: check if a string is a valid UTF8 sequence without modifying it
EnsureUTF8 would replace invalid bytes sequences with question marks. Most of the code was already there anyway. This allows UTF-8 autodetection without a dedicated UTF8-to-UTF8 iconv() handle.
This commit is contained in:
parent
f0ffeb173a
commit
d34aa78dc1
@ -46,6 +46,8 @@ int utf8_fprintf( FILE *, const char *, ... );
|
||||
#endif
|
||||
|
||||
VLC_EXPORT( char *, EnsureUTF8, ( char * ) );
|
||||
VLC_EXPORT( const char *, IsUTF8, ( const char * ) );
|
||||
|
||||
VLC_EXPORT( char *, FromUTF32, ( const uint32_t * ) );
|
||||
VLC_EXPORT( char *, FromUTF16, ( const uint16_t * ) );
|
||||
|
||||
|
@ -485,6 +485,7 @@ struct module_symbols_t
|
||||
char * (*decode_encoded_URI_duplicate_inner) (const char *psz);
|
||||
void (*resolve_xml_special_chars_inner) (char *psz_value);
|
||||
char * (*FromUTF16_inner) (const uint16_t *);
|
||||
const char * (*IsUTF8_inner) (const char *);
|
||||
};
|
||||
# if defined (__PLUGIN__)
|
||||
# define aout_FiltersCreatePipeline (p_symbols)->aout_FiltersCreatePipeline_inner
|
||||
@ -950,6 +951,7 @@ struct module_symbols_t
|
||||
# define decode_encoded_URI_duplicate (p_symbols)->decode_encoded_URI_duplicate_inner
|
||||
# define resolve_xml_special_chars (p_symbols)->resolve_xml_special_chars_inner
|
||||
# define FromUTF16 (p_symbols)->FromUTF16_inner
|
||||
# define IsUTF8 (p_symbols)->IsUTF8_inner
|
||||
# elif defined (HAVE_DYNAMIC_PLUGINS) && !defined (__BUILTIN__)
|
||||
/******************************************************************
|
||||
* STORE_SYMBOLS: store VLC APIs into p_symbols for plugin access.
|
||||
@ -1418,6 +1420,7 @@ struct module_symbols_t
|
||||
((p_symbols)->decode_encoded_URI_duplicate_inner) = decode_encoded_URI_duplicate; \
|
||||
((p_symbols)->resolve_xml_special_chars_inner) = resolve_xml_special_chars; \
|
||||
((p_symbols)->FromUTF16_inner) = FromUTF16; \
|
||||
((p_symbols)->IsUTF8_inner) = IsUTF8; \
|
||||
(p_symbols)->net_ConvertIPv4_deprecated = NULL; \
|
||||
(p_symbols)->__stats_CounterGet_deprecated = NULL; \
|
||||
(p_symbols)->__stats_TimerDumpAll_deprecated = NULL; \
|
||||
|
@ -299,9 +299,9 @@ void LocaleFree( const char *str )
|
||||
#endif
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
/**
|
||||
* utf8_fopen: Calls fopen() after conversion of file name to OS locale
|
||||
*****************************************************************************/
|
||||
*/
|
||||
FILE *utf8_fopen( const char *filename, const char *mode )
|
||||
{
|
||||
#if !(defined (WIN32) || defined (UNDER_CE))
|
||||
@ -337,9 +337,9 @@ FILE *utf8_fopen( const char *filename, const char *mode )
|
||||
#endif
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
/**
|
||||
* utf8_mkdir: Calls mkdir() after conversion of file name to OS locale
|
||||
*****************************************************************************/
|
||||
*/
|
||||
int utf8_mkdir( const char *dirname )
|
||||
{
|
||||
#if defined (UNDER_CE) || defined (WIN32)
|
||||
@ -464,9 +464,9 @@ int utf8_lstat( const char *filename, void *buf)
|
||||
return utf8_statEx( filename, buf, VLC_FALSE );
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
/**
|
||||
* utf8_*printf: *printf with conversion from UTF-8 to local encoding
|
||||
*****************************************************************************/
|
||||
*/
|
||||
static int utf8_vasprintf( char **str, const char *fmt, va_list ap )
|
||||
{
|
||||
char *utf8;
|
||||
@ -502,15 +502,9 @@ int utf8_fprintf( FILE *stream, const char *fmt, ... )
|
||||
return res;
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks
|
||||
*****************************************************************************
|
||||
* Not Todo : convert Latin1 to UTF-8 on the fly
|
||||
* It is not possible given UTF-8 needs more space
|
||||
* Returns str if it was valid UTF-8, NULL if not.
|
||||
*****************************************************************************/
|
||||
|
||||
static char *CheckUTF8( char *str, char rep )
|
||||
#define isutf8cont( c ) (((c) >= 0x80) && ((c) <= 0xBF))
|
||||
char *EnsureUTF8( char *str )
|
||||
{
|
||||
unsigned char *ptr, c;
|
||||
|
||||
@ -646,6 +640,8 @@ char *EnsureUTF8( char *str )
|
||||
continue;
|
||||
|
||||
error:
|
||||
if( rep == 0 )
|
||||
return NULL;
|
||||
*ptr++ = '?';
|
||||
str = NULL;
|
||||
}
|
||||
@ -653,6 +649,32 @@ error:
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* EnsureUTF8: replaces invalid/overlong UTF-8 sequences with question marks
|
||||
* Note that it is not possible to convert from Latin-1 to UTF-8 on the fly,
|
||||
* so we don't try that, even though it would be less disruptive.
|
||||
*
|
||||
* @return str if it was valid UTF-8, NULL if not.
|
||||
*/
|
||||
char *EnsureUTF8( char *str )
|
||||
{
|
||||
return CheckUTF8( str, '?' );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IsUTF8: checks whether a string is a valid UTF-8 byte sequence.
|
||||
*
|
||||
* @param str nul-terminated string to be checked
|
||||
*
|
||||
* @return str if it was valid UTF-8, NULL if not.
|
||||
*/
|
||||
const char *IsUTF8( const char *str )
|
||||
{
|
||||
return CheckUTF8( (char *)str, 0 );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* UTF32toUTF8(): converts an array from UTF-32 (host byte order)
|
||||
* to UTF-8.
|
||||
|
Loading…
Reference in New Issue
Block a user