Util: Add latin1ToUtf8

This commit is contained in:
Vicki Pfau 2022-05-15 21:27:37 -07:00
parent 155c57b19e
commit e76698ae59
2 changed files with 49 additions and 0 deletions

View File

@ -33,6 +33,7 @@ int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t u
char* utf16to8(const uint16_t* utf16, size_t length); char* utf16to8(const uint16_t* utf16, size_t length);
uint32_t utf8Char(const char** unicode, size_t* length); uint32_t utf8Char(const char** unicode, size_t* length);
uint32_t utf16Char(const uint16_t** unicode, size_t* length); uint32_t utf16Char(const uint16_t** unicode, size_t* length);
char* latin1ToUtf8(const char* latin1, size_t length);
char* gbkToUtf8(const char* gbk, size_t length); char* gbkToUtf8(const char* gbk, size_t length);
size_t utf8strlen(const char* string); size_t utf8strlen(const char* string);

View File

@ -285,6 +285,54 @@ char* utf16to8(const uint16_t* utf16, size_t length) {
return newUTF8; return newUTF8;
} }
char* latin1ToUtf8(const char* latin1, size_t length) {
char* utf8 = NULL;
char* utf8Offset = NULL;
size_t offset;
char buffer[4];
size_t utf8TotalBytes = 0;
size_t utf8Length = 0;
for (offset = 0; offset < length; ++offset) {
if (length == 0) {
break;
}
uint8_t unichar = latin1[offset];
size_t bytes = toUtf8(unichar, buffer);
utf8Length += bytes;
if (!utf8) {
utf8 = malloc(length);
if (!utf8) {
return NULL;
}
utf8TotalBytes = length;
memcpy(utf8, buffer, bytes);
utf8Offset = utf8 + bytes;
} else if (utf8Length < utf8TotalBytes) {
memcpy(utf8Offset, buffer, bytes);
utf8Offset += bytes;
} else if (utf8Length >= utf8TotalBytes) {
ptrdiff_t o = utf8Offset - utf8;
char* newUTF8 = realloc(utf8, utf8TotalBytes * 2);
utf8Offset = o + newUTF8;
if (!newUTF8) {
free(utf8);
return 0;
}
utf8 = newUTF8;
memcpy(utf8Offset, buffer, bytes);
utf8Offset += bytes;
}
}
char* newUTF8 = realloc(utf8, utf8Length + 1);
if (!newUTF8) {
free(utf8);
return 0;
}
newUTF8[utf8Length] = '\0';
return newUTF8;
}
extern const uint16_t gbkUnicodeTable[]; extern const uint16_t gbkUnicodeTable[];
char* gbkToUtf8(const char* gbk, size_t length) { char* gbkToUtf8(const char* gbk, size_t length) {