diff --git a/include/mgba-util/string.h b/include/mgba-util/string.h index 381869814..476b39848 100644 --- a/include/mgba-util/string.h +++ b/include/mgba-util/string.h @@ -28,6 +28,7 @@ bool endswith(const char* restrict s1, const char* restrict end); bool startswith(const char* restrict s1, const char* restrict start); size_t toUtf8(uint32_t unichar, char* buffer); +size_t toUtf16(uint32_t unichar, uint16_t* buffer); int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length); char* utf16to8(const uint16_t* utf16, size_t length); uint32_t utf8Char(const char** unicode, size_t* length); diff --git a/src/util/string.c b/src/util/string.c index 6a60abf48..d9dc701b1 100644 --- a/src/util/string.c +++ b/src/util/string.c @@ -180,6 +180,30 @@ size_t toUtf8(uint32_t unichar, char* buffer) { return 0; } +size_t toUtf16(uint32_t unichar, uint16_t* buffer) { + if (unichar < 0xD800) { + buffer[0] = unichar; + return 1; + } + if (unichar < 0xE000) { + // Orphan surrogate, invalid + return 0; + } + if (unichar < 0x10000) { + buffer[0] = unichar; + return 1; + } + if (unichar < 0x110000) { + unichar -= 0x10000; + buffer[0] = 0xD800 | (unichar >> 10); + buffer[1] = 0xDC00 | (unichar & 0x3FF); + return 2; + } + + // Invalid code point + return 0; +} + int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) { uint32_t char1 = 0, char2 = 0; while (utf16Length > 0 && utf8Length > 0) { @@ -548,4 +572,4 @@ bool wildcard(const char* search, const char* string) { } } return false; -} \ No newline at end of file +}