diff --git a/desmume/src/libretro-common/encodings/encoding_utf.c b/desmume/src/libretro-common/encodings/encoding_utf.c index 93b205160..0c339e741 100644 --- a/desmume/src/libretro-common/encodings/encoding_utf.c +++ b/desmume/src/libretro-common/encodings/encoding_utf.c @@ -1,7 +1,7 @@ /* Copyright (C) 2010-2016 The RetroArch team * * --------------------------------------------------------------------------------------- - * The following license statement only applies to this file (encodings_utf.c). + * The following license statement only applies to this file (encoding_utf.c). * --------------------------------------------------------------------------------------- * * Permission is hereby granted, free of charge, @@ -21,6 +21,7 @@ */ #include +#include #include #include @@ -40,7 +41,7 @@ static INLINE unsigned leading_ones(uint8_t c) return ones; } -/* Simple implementation. Assumes the sequence is +/* Simple implementation. Assumes the sequence is * properly synchronized and terminated. */ size_t utf8_conv_utf32(uint32_t *out, size_t out_chars, @@ -119,14 +120,14 @@ bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, if (value < (((uint32_t)1) << (numAdds * 5 + 6))) break; if (out) - out[out_pos] = (char)(kUtf8Limits[numAdds - 1] + out[out_pos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds))); out_pos++; do { numAdds--; if (out) - out[out_pos] = (char)(0x80 + out[out_pos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F)); out_pos++; }while (numAdds != 0); @@ -136,21 +137,18 @@ bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, return false; } -/* Acts mostly like strlcpy. +/* Acts mostly like strlcpy. * - * Copies the given number of UTF-8 characters, + * Copies the given number of UTF-8 characters, * but at most d_len bytes. * - * Always NULL terminates. - * Does not copy half a character. + * Always NULL terminates. + * Does not copy half a character. * * Returns number of bytes. 's' is assumed valid UTF-8. * Use only if 'chars' is considerably less than 'd_len'. */ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars) { -#ifdef HAVE_UTF8 - char *d_org = d; - char *d_end = d+d_len; const uint8_t *sb = (const uint8_t*)s; const uint8_t *sb_org = sb; @@ -160,7 +158,7 @@ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars) while ((*sb&0xC0) == 0x80) sb++; } - if (sb - sb_org > d_len-1 /* NUL */) + if ((size_t)(sb - sb_org) > d_len-1 /* NUL */) { sb = sb_org + d_len-1; while ((*sb&0xC0) == 0x80) sb--; @@ -170,14 +168,10 @@ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars) d[sb-sb_org] = '\0'; return sb-sb_org; -#else - return strlcpy(d, s, chars + 1); -#endif } const char *utf8skip(const char *str, size_t chars) { -#ifdef HAVE_UTF8 const uint8_t *strb = (const uint8_t*)str; if (!chars) return str; @@ -188,14 +182,10 @@ const char *utf8skip(const char *str, size_t chars) chars--; } while(chars); return (const char*)strb; -#else - return str + chars; -#endif } size_t utf8len(const char *string) { -#ifdef HAVE_UTF8 size_t ret = 0; while (*string) { @@ -204,7 +194,67 @@ size_t utf8len(const char *string) string++; } return ret; -#else - return strlen(string); -#endif +} + +static INLINE uint8_t utf8_walkbyte(const char **string) +{ + return *((*string)++); +} + +/* Does not validate the input, returns garbage if it's not UTF-8. */ +uint32_t utf8_walk(const char **string) +{ + uint8_t first = utf8_walkbyte(string); + uint32_t ret; + + if (first<128) + return first; + + ret = 0; + ret = (ret<<6) | (utf8_walkbyte(string) & 0x3F); + if (first >= 0xE0) + ret = (ret<<6) | (utf8_walkbyte(string) & 0x3F); + if (first >= 0xF0) + ret = (ret<<6) | (utf8_walkbyte(string) & 0x3F); + + if (first >= 0xF0) + return ret | (first&31)<<18; + if (first >= 0xE0) + return ret | (first&15)<<12; + return ret | (first&7)<<6; +} + +static bool utf16_to_char(uint8_t **utf_data, + size_t *dest_len, const uint16_t *in) +{ + unsigned len = 0; + + while (in[len] != '\0') + len++; + + utf16_conv_utf8(NULL, dest_len, in, len); + *dest_len += 1; + *utf_data = (uint8_t*)malloc(*dest_len); + if (*utf_data == 0) + return false; + + return utf16_conv_utf8(*utf_data, dest_len, in, len); +} + +bool utf16_to_char_string(const uint16_t *in, char *s, size_t len) +{ + size_t dest_len = 0; + uint8_t *utf16_data = NULL; + bool ret = utf16_to_char(&utf16_data, &dest_len, in); + + if (ret) + { + utf16_data[dest_len] = 0; + strlcpy(s, (const char*)utf16_data, len); + } + + free(utf16_data); + utf16_data = NULL; + + return ret; } diff --git a/desmume/src/libretro-common/include/encodings/utf.h b/desmume/src/libretro-common/include/encodings/utf.h index 3201e0ede..289c549f8 100644 --- a/desmume/src/libretro-common/include/encodings/utf.h +++ b/desmume/src/libretro-common/include/encodings/utf.h @@ -44,6 +44,11 @@ size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars); const char *utf8skip(const char *str, size_t chars); +uint32_t utf8_walk(const char **string); + + +bool utf16_to_char_string(const uint16_t *in, char *s, size_t len); + RETRO_END_DECLS #endif