Remove cast for UTF-16 -> UTF-8 conversion

Add convenience functions to call `config::utf16_to_utf8()` without a
cast.

Signed-off-by: Rafael Kitover <rkitover@gmail.com>
This commit is contained in:
Fabrice de Gans 2024-09-07 13:03:45 -07:00 committed by Rafael Kitover
parent 7f06428df8
commit e76cef79d2
4 changed files with 49 additions and 29 deletions

View File

@ -48,10 +48,7 @@ int FAGetDev(FAudio* fa) {
if (hr != 0) {
continue;
}
const std::vector<uint8_t> device_id_u8 =
config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DeviceID));
const wxString device_id = wxString::FromUTF8(
reinterpret_cast<const char*>(device_id_u8.data()), device_id_u8.size());
const wxString device_id = config::utf16_to_utf8(dd.DeviceID);
if (audio_device == device_id) {
return i;
}
@ -500,14 +497,8 @@ std::vector<AudioDevice> GetFAudioDevices() {
}
// Convert to UTF-8.
const std::vector<uint8_t> display_name_u8 =
config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DisplayName));
const std::vector<uint8_t> device_id_u8 =
config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DeviceID));
const wxString display_name = wxString::FromUTF8(
reinterpret_cast<const char*>(display_name_u8.data()), display_name_u8.size());
const wxString device_id = wxString::FromUTF8(
reinterpret_cast<const char*>(device_id_u8.data()), device_id_u8.size());
const wxString display_name = config::utf16_to_utf8(dd.DisplayName);
const wxString device_id = config::utf16_to_utf8(dd.DeviceID);
devices.push_back({display_name, device_id});
}

View File

@ -96,8 +96,8 @@ TEST(StrSplitWithSepTest, MultipleSepTokens) {
}
TEST(UTF16ToUTF8Test, Basic) {
std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0};
auto vec = config::utf16_to_utf8(utf16.data());
const std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0};
auto vec = config::utf16_to_utf8_vector(utf16.data());
ASSERT_EQ(vec.size(), 3);
@ -108,8 +108,8 @@ TEST(UTF16ToUTF8Test, Basic) {
TEST(UTF16ToUTF8Test, MultiByte) {
// U+20AC EURO SIGN.
std::vector<uint16_t> utf16 = {0x20AC, 0};
auto vec = config::utf16_to_utf8(utf16.data());
const std::vector<uint16_t> utf16 = {0x20AC, 0};
auto vec = config::utf16_to_utf8_vector(utf16.data());
ASSERT_EQ(vec.size(), 3);
@ -118,10 +118,25 @@ TEST(UTF16ToUTF8Test, MultiByte) {
EXPECT_EQ(vec[2], 0xAC);
}
TEST(UTF16ToUTF8Test, DualMultiByte) {
// This is a variant of the above to test the buffer reset is done properly.
const std::vector<uint16_t> utf16 = {0x20AC, 0x20AC, 0};
auto vec = config::utf16_to_utf8_vector(utf16.data());
ASSERT_EQ(vec.size(), 6);
EXPECT_EQ(vec[0], 0xE2);
EXPECT_EQ(vec[1], 0x82);
EXPECT_EQ(vec[2], 0xAC);
EXPECT_EQ(vec[3], 0xE2);
EXPECT_EQ(vec[4], 0x82);
EXPECT_EQ(vec[5], 0xAC);
}
TEST(UTF16ToUTF8Test, SurrogatePair) {
// U+1F914 THINKING FACE.
std::vector<uint16_t> utf16 = {0xD83E, 0xDD14, 0};
auto vec = config::utf16_to_utf8(utf16.data());
const std::vector<uint16_t> utf16 = {0xD83E, 0xDD14, 0};
auto vec = config::utf16_to_utf8_vector(utf16.data());
ASSERT_EQ(vec.size(), 4);
@ -133,26 +148,26 @@ TEST(UTF16ToUTF8Test, SurrogatePair) {
TEST(UTF16ToUTF8Test, InvalidSurrogatePair) {
// U+D800 HIGH SURROGATE.
std::vector<uint16_t> utf16 = {0xD800, 0};
EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
const std::vector<uint16_t> utf16 = {0xD800, 0};
EXPECT_DEATH(config::utf16_to_utf8_vector(utf16.data()), ".*");
}
TEST(UTF16ToUTF8Test, InvalidSurrogatePair2) {
// U+D800 HIGH SURROGATE followed by U+0020 SPACE.
std::vector<uint16_t> utf16 = {0xD800, 0x0020, 0};
EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
const std::vector<uint16_t> utf16 = {0xD800, 0x0020, 0};
EXPECT_DEATH(config::utf16_to_utf8_vector(utf16.data()), ".*");
}
TEST(UTF16ToUTF8Test, InvalidSurrogatePair3) {
// U+D800 HIGH SURROGATE followed by U+D800 HIGH SURROGATE.
std::vector<uint16_t> utf16 = {0xD800, 0xD800, 0};
EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
const std::vector<uint16_t> utf16 = {0xD800, 0xD800, 0};
EXPECT_DEATH(config::utf16_to_utf8_vector(utf16.data()), ".*");
}
TEST(UTF16ToUTF8Test, FullString) {
// "foo€🤔"
std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0x20AC, 0xD83E, 0xDD14, 0};
auto vec = config::utf16_to_utf8(utf16.data());
const std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0x20AC, 0xD83E, 0xDD14, 0};
auto vec = config::utf16_to_utf8_vector(utf16.data());
ASSERT_EQ(vec.size(), 10);

View File

@ -41,10 +41,10 @@ wxArrayString str_split_with_sep(const wxString& text, const wxString& sep)
return str_split(text, sep, true);
}
std::vector<uint8_t> utf16_to_utf8(const uint16_t* utf16) {
std::vector<uint8_t> utf16_to_utf8_vector(const uint16_t* utf16) {
std::vector<uint8_t> out;
for (size_t i = 0; utf16[i]; i++) {
uint16_t c = utf16[i];
const uint16_t c = utf16[i];
if (c < 0x80) {
out.push_back(c);
} else if (c < 0x800) {
@ -77,4 +77,14 @@ std::vector<uint8_t> utf16_to_utf8(const uint16_t* utf16) {
return out;
}
wxString utf16_to_utf8(const uint16_t* utf16) {
std::vector<uint8_t> output_vector = utf16_to_utf8_vector(utf16);
return wxString::FromUTF8(reinterpret_cast<const char*>(output_vector.data()),
output_vector.size());
}
wxString utf16_to_utf8(const int16_t* utf16) {
return utf16_to_utf8(reinterpret_cast<const uint16_t*>(utf16));
}
} // namespace config

View File

@ -19,7 +19,11 @@ wxArrayString str_split_with_sep(const wxString& text, const wxString& sep);
// Converts a null-terminated array of UTF-16 code units to a vector of UTF-8 code units.
// This will assert if the input is not a valid UTF-16 string.
std::vector<uint8_t> utf16_to_utf8(const uint16_t* utf16);
std::vector<uint8_t> utf16_to_utf8_vector(const uint16_t* utf16);
// Convenience functions to convert a null-terminated array of UTF-16 code units to a wxString.
wxString utf16_to_utf8(const uint16_t* utf16);
wxString utf16_to_utf8(const int16_t* utf16);
} // namespace config