Util: Add toUtf16 function

This commit is contained in:
Vicki Pfau 2021-04-23 00:18:18 -07:00
parent dc5003a427
commit 926e8fcccb
2 changed files with 26 additions and 1 deletions

View File

@ -28,6 +28,7 @@ bool endswith(const char* restrict s1, const char* restrict end);
bool startswith(const char* restrict s1, const char* restrict start); bool startswith(const char* restrict s1, const char* restrict start);
size_t toUtf8(uint32_t unichar, char* buffer); size_t toUtf8(uint32_t unichar, char* buffer);
size_t toUtf16(uint32_t unichar, uint16_t* buffer);
int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length); int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length);
char* utf16to8(const uint16_t* utf16, size_t length); char* utf16to8(const uint16_t* utf16, size_t length);
uint32_t utf8Char(const char** unicode, size_t* length); uint32_t utf8Char(const char** unicode, size_t* length);

View File

@ -180,6 +180,30 @@ size_t toUtf8(uint32_t unichar, char* buffer) {
return 0; return 0;
} }
size_t toUtf16(uint32_t unichar, uint16_t* buffer) {
if (unichar < 0xD800) {
buffer[0] = unichar;
return 1;
}
if (unichar < 0xE000) {
// Orphan surrogate, invalid
return 0;
}
if (unichar < 0x10000) {
buffer[0] = unichar;
return 1;
}
if (unichar < 0x110000) {
unichar -= 0x10000;
buffer[0] = 0xD800 | (unichar >> 10);
buffer[1] = 0xDC00 | (unichar & 0x3FF);
return 2;
}
// Invalid code point
return 0;
}
int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) { int utfcmp(const uint16_t* utf16, const char* utf8, size_t utf16Length, size_t utf8Length) {
uint32_t char1 = 0, char2 = 0; uint32_t char1 = 0, char2 = 0;
while (utf16Length > 0 && utf8Length > 0) { while (utf16Length > 0 && utf8Length > 0) {
@ -548,4 +572,4 @@ bool wildcard(const char* search, const char* string) {
} }
} }
return false; return false;
} }