From e76698ae59da551df9af58bbb0c8ae04430e8466 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sun, 15 May 2022 21:27:37 -0700 Subject: [PATCH] Util: Add latin1ToUtf8 --- include/mgba-util/string.h | 1 + src/util/string.c | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/mgba-util/string.h b/include/mgba-util/string.h index cc7169bf2..de7de640f 100644 --- a/include/mgba-util/string.h +++ b/include/mgba-util/string.h @@ -33,6 +33,7 @@ int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t u char* utf16to8(const uint16_t* utf16, size_t length); uint32_t utf8Char(const char** unicode, size_t* length); uint32_t utf16Char(const uint16_t** unicode, size_t* length); +char* latin1ToUtf8(const char* latin1, size_t length); char* gbkToUtf8(const char* gbk, size_t length); size_t utf8strlen(const char* string); diff --git a/src/util/string.c b/src/util/string.c index a561a5fb1..fdb5305d4 100644 --- a/src/util/string.c +++ b/src/util/string.c @@ -285,6 +285,54 @@ char* utf16to8(const uint16_t* utf16, size_t length) { return newUTF8; } +char* latin1ToUtf8(const char* latin1, size_t length) { + char* utf8 = NULL; + char* utf8Offset = NULL; + size_t offset; + char buffer[4]; + size_t utf8TotalBytes = 0; + size_t utf8Length = 0; + for (offset = 0; offset < length; ++offset) { + if (length == 0) { + break; + } + uint8_t unichar = latin1[offset]; + size_t bytes = toUtf8(unichar, buffer); + utf8Length += bytes; + if (!utf8) { + utf8 = malloc(length); + if (!utf8) { + return NULL; + } + utf8TotalBytes = length; + memcpy(utf8, buffer, bytes); + utf8Offset = utf8 + bytes; + } else if (utf8Length < utf8TotalBytes) { + memcpy(utf8Offset, buffer, bytes); + utf8Offset += bytes; + } else if (utf8Length >= utf8TotalBytes) { + ptrdiff_t o = utf8Offset - utf8; + char* newUTF8 = realloc(utf8, utf8TotalBytes * 2); + utf8Offset = o + newUTF8; + if (!newUTF8) { + free(utf8); + return 0; + } + utf8 = newUTF8; + memcpy(utf8Offset, buffer, bytes); + utf8Offset += bytes; + } + } + + char* newUTF8 = realloc(utf8, utf8Length + 1); + if (!newUTF8) { + free(utf8); + return 0; + } + newUTF8[utf8Length] = '\0'; + return newUTF8; +} + extern const uint16_t gbkUnicodeTable[]; char* gbkToUtf8(const char* gbk, size_t length) {