From aa6ed14b2ab36c558c483bc1c88d7034a3c93402 Mon Sep 17 00:00:00 2001 From: Fabrice de Gans Date: Sat, 17 Aug 2024 21:02:01 -0700 Subject: [PATCH] [FAudio] Always convert UTF-16 names (#1328) On Windows, wchar_t is 16 bits and represents UTF-16 code units. However, on other platforms, wchar_t is typically implemented as 32 bits to represent UTF-32 code units. In order to work around this problem, we always convert the string we get from FAudio, which is represented as UTF-16 code units, into UTF-8 and let wxString handle the conversion to its native type internally. --- src/wx/audio/internal/faudio.cpp | 29 ++++++++----- src/wx/config/strutils-test.cpp | 73 ++++++++++++++++++++++++++++++++ src/wx/config/strutils.cpp | 40 +++++++++++++++++ src/wx/config/strutils.h | 7 +++ 4 files changed, 138 insertions(+), 11 deletions(-) diff --git a/src/wx/audio/internal/faudio.cpp b/src/wx/audio/internal/faudio.cpp index 18aaed17..e9a7fcbf 100644 --- a/src/wx/audio/internal/faudio.cpp +++ b/src/wx/audio/internal/faudio.cpp @@ -20,6 +20,7 @@ #include "core/base/system.h" #include "core/gba/gbaGlobals.h" #include "wx/config/option-proxy.h" +#include "wx/config/strutils.h" namespace audio { namespace internal { @@ -27,7 +28,7 @@ namespace internal { namespace { int FAGetDev(FAudio* fa) { - const wxString& audio_device = OPTION(kSoundAudioDevice); + const wxString audio_device = OPTION(kSoundAudioDevice).Get(); if (audio_device.empty()) { // Just use the default device. return 0; @@ -47,7 +48,10 @@ int FAGetDev(FAudio* fa) { if (hr != 0) { continue; } - const wxString device_id(reinterpret_cast(dd.DeviceID)); + const std::vector device_id_u8 = + config::utf16_to_utf8(reinterpret_cast(dd.DeviceID)); + const wxString device_id = wxString::FromUTF8( + reinterpret_cast(device_id_u8.data()), device_id_u8.size()); if (audio_device == device_id) { return i; } @@ -119,8 +123,6 @@ public: FAudio_Output(); ~FAudio_Output(); - void device_change(); - private: void close(); @@ -193,10 +195,6 @@ void FAudio_Output::close() { } } -void FAudio_Output::device_change() { - device_changed = true; -} - bool FAudio_Output::init(long sampleRate) { if (failed || initialized) return false; @@ -488,8 +486,11 @@ std::vector GetFAudioDevices() { } std::vector devices; +#if defined(__WXMSW__) + // Add a separate default device on Windows. devices.reserve(dev_count + 1); devices.push_back({_("Default device"), wxEmptyString}); +#endif for (uint32_t i = 0; i < dev_count; i++) { FAudioDeviceDetails dd; @@ -498,9 +499,15 @@ std::vector GetFAudioDevices() { continue; } - const wxString display_name(reinterpret_cast(dd.DisplayName)); - const wxString device_id(reinterpret_cast(dd.DeviceID)); - + // Convert to UTF-8. + const std::vector display_name_u8 = + config::utf16_to_utf8(reinterpret_cast(dd.DisplayName)); + const std::vector device_id_u8 = + config::utf16_to_utf8(reinterpret_cast(dd.DeviceID)); + const wxString display_name = wxString::FromUTF8( + reinterpret_cast(display_name_u8.data()), display_name_u8.size()); + const wxString device_id = wxString::FromUTF8( + reinterpret_cast(device_id_u8.data()), device_id_u8.size()); devices.push_back({display_name, device_id}); } diff --git a/src/wx/config/strutils-test.cpp b/src/wx/config/strutils-test.cpp index ea061d33..35da449f 100644 --- a/src/wx/config/strutils-test.cpp +++ b/src/wx/config/strutils-test.cpp @@ -94,3 +94,76 @@ TEST(StrSplitWithSepTest, MultipleSepTokens) { EXPECT_EQ(vec[4], "baz"); EXPECT_EQ(vec[5], "|-|"); } + +TEST(UTF16ToUTF8Test, Basic) { + std::vector utf16 = {'f', 'o', 'o', 0}; + auto vec = config::utf16_to_utf8(utf16.data()); + + ASSERT_EQ(vec.size(), 3); + + EXPECT_EQ(vec[0], 'f'); + EXPECT_EQ(vec[1], 'o'); + EXPECT_EQ(vec[2], 'o'); +} + +TEST(UTF16ToUTF8Test, MultiByte) { + // U+20AC EURO SIGN. + std::vector utf16 = {0x20AC, 0}; + auto vec = config::utf16_to_utf8(utf16.data()); + + ASSERT_EQ(vec.size(), 3); + + EXPECT_EQ(vec[0], 0xE2); + EXPECT_EQ(vec[1], 0x82); + EXPECT_EQ(vec[2], 0xAC); +} + +TEST(UTF16ToUTF8Test, SurrogatePair) { + // U+1F914 THINKING FACE. + std::vector utf16 = {0xD83E, 0xDD14, 0}; + auto vec = config::utf16_to_utf8(utf16.data()); + + ASSERT_EQ(vec.size(), 4); + + EXPECT_EQ(vec[0], 0xF0); + EXPECT_EQ(vec[1], 0x9F); + EXPECT_EQ(vec[2], 0xA4); + EXPECT_EQ(vec[3], 0x94); +} + +TEST(UTF16ToUTF8Test, InvalidSurrogatePair) { + // U+D800 HIGH SURROGATE. + std::vector utf16 = {0xD800, 0}; + EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*"); +} + +TEST(UTF16ToUTF8Test, InvalidSurrogatePair2) { + // U+D800 HIGH SURROGATE followed by U+0020 SPACE. + std::vector utf16 = {0xD800, 0x0020, 0}; + EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*"); +} + +TEST(UTF16ToUTF8Test, InvalidSurrogatePair3) { + // U+D800 HIGH SURROGATE followed by U+D800 HIGH SURROGATE. + std::vector utf16 = {0xD800, 0xD800, 0}; + EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*"); +} + +TEST(UTF16ToUTF8Test, FullString) { + // "foo€🤔" + std::vector utf16 = {'f', 'o', 'o', 0x20AC, 0xD83E, 0xDD14, 0}; + auto vec = config::utf16_to_utf8(utf16.data()); + + ASSERT_EQ(vec.size(), 10); + + EXPECT_EQ(vec[0], 'f'); + EXPECT_EQ(vec[1], 'o'); + EXPECT_EQ(vec[2], 'o'); + EXPECT_EQ(vec[3], 0xE2); + EXPECT_EQ(vec[4], 0x82); + EXPECT_EQ(vec[5], 0xAC); + EXPECT_EQ(vec[6], 0xF0); + EXPECT_EQ(vec[7], 0x9F); + EXPECT_EQ(vec[8], 0xA4); + EXPECT_EQ(vec[9], 0x94); +} diff --git a/src/wx/config/strutils.cpp b/src/wx/config/strutils.cpp index 3d506d94..0bf6227f 100644 --- a/src/wx/config/strutils.cpp +++ b/src/wx/config/strutils.cpp @@ -1,7 +1,11 @@ #include "wx/config/strutils.h" +#include + #include +#include "core/base/check.h" + namespace config { // From: https://stackoverflow.com/a/7408245/262458 @@ -37,4 +41,40 @@ wxArrayString str_split_with_sep(const wxString& text, const wxString& sep) return str_split(text, sep, true); } +std::vector utf16_to_utf8(const uint16_t* utf16) { + std::vector out; + for (size_t i = 0; utf16[i]; i++) { + uint16_t c = utf16[i]; + if (c < 0x80) { + out.push_back(c); + } else if (c < 0x800) { + out.push_back(0xC0 | (c >> 6)); + out.push_back(0x80 | (c & 0x3F)); + } else if (c < 0xD800 || c >= 0xE000) { + // Regular 3-byte UTF-8 character. + out.push_back(0xE0 | (c >> 12)); + out.push_back(0x80 | ((c >> 6) & 0x3F)); + out.push_back(0x80 | (c & 0x3F)); + } else { + // Surrogate pair, construct the original code point. + const uint32_t high = c; + + // The next code unit must be a low surrogate. + i++; + const uint32_t low = utf16[i]; + VBAM_CHECK(low); + VBAM_CHECK(low >= 0xDC00 && low < 0xE000); + + const uint32_t codepoint = 0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF); + + // Convert to UTF-8. + out.push_back(0xF0 | (codepoint >> 18)); + out.push_back(0x80 | ((codepoint >> 12) & 0x3F)); + out.push_back(0x80 | ((codepoint >> 6) & 0x3F)); + out.push_back(0x80 | (codepoint & 0x3F)); + } + } + return out; +} + } // namespace config diff --git a/src/wx/config/strutils.h b/src/wx/config/strutils.h index 49fd6b07..0a9f8308 100644 --- a/src/wx/config/strutils.h +++ b/src/wx/config/strutils.h @@ -1,6 +1,9 @@ #ifndef VBAM_WX_CONFIG_STRUTILS_H_ #define VBAM_WX_CONFIG_STRUTILS_H_ +#include +#include + #include #include @@ -14,6 +17,10 @@ wxArrayString str_split(const wxString& text, const wxString& sep, bool empty_to // 'A', ',' and 'B' will be in the output. wxArrayString str_split_with_sep(const wxString& text, const wxString& sep); +// Converts a null-terminated array of UTF-16 code units to a vector of UTF-8 code units. +// This will assert if the input is not a valid UTF-16 string. +std::vector utf16_to_utf8(const uint16_t* utf16); + } // namespace config #endif // VBAM_WX_CONFIG_STRUTILS_H_