[FAudio] Always convert UTF-16 names (#1328)

On Windows, wchar_t is 16 bits and represents UTF-16 code units. However, on other platforms, wchar_t is typically implemented as 32 bits to represent UTF-32 code units. In order to work around this problem, we always convert the string we get from FAudio, which is represented as UTF-16 code units, into UTF-8 and let wxString handle the conversion to its native type internally.
2024-08-17 21:02:01 -07:00 · 2024-08-17 21:02:01 -07:00 · aa6ed14b2a
parent 2ce20c4f59
commit aa6ed14b2a
4 changed files with 138 additions and 11 deletions
--- a/src/wx/audio/internal/faudio.cpp
+++ b/src/wx/audio/internal/faudio.cpp
@ -20,6 +20,7 @@
 #include "core/base/system.h"
 #include "core/gba/gbaGlobals.h"
 #include "wx/config/option-proxy.h"
+#include "wx/config/strutils.h"

 namespace audio {
 namespace internal {
@ -27,7 +28,7 @@ namespace internal {
 namespace {

 int FAGetDev(FAudio* fa) {
-    const wxString& audio_device = OPTION(kSoundAudioDevice);
+    const wxString audio_device = OPTION(kSoundAudioDevice).Get();
    if (audio_device.empty()) {
        // Just use the default device.
        return 0;
@ -47,7 +48,10 @@ int FAGetDev(FAudio* fa) {
        if (hr != 0) {
            continue;
        }
-        const wxString device_id(reinterpret_cast<wchar_t*>(dd.DeviceID));
+        const std::vector<uint8_t> device_id_u8 =
+            config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DeviceID));
+        const wxString device_id = wxString::FromUTF8(
+            reinterpret_cast<const char*>(device_id_u8.data()), device_id_u8.size());
        if (audio_device == device_id) {
            return i;
        }
@ -119,8 +123,6 @@ public:
    FAudio_Output();
    ~FAudio_Output();

-    void device_change();
-
 private:
    void close();

@ -193,10 +195,6 @@ void FAudio_Output::close() {
    }
 }

-void FAudio_Output::device_change() {
-    device_changed = true;
-}
-
 bool FAudio_Output::init(long sampleRate) {
    if (failed || initialized)
        return false;
@ -488,8 +486,11 @@ std::vector<AudioDevice> GetFAudioDevices() {
    }

    std::vector<AudioDevice> devices;
+#if defined(__WXMSW__)
+    // Add a separate default device on Windows.
    devices.reserve(dev_count + 1);
    devices.push_back({_("Default device"), wxEmptyString});
+#endif

    for (uint32_t i = 0; i < dev_count; i++) {
        FAudioDeviceDetails dd;
@ -498,9 +499,15 @@ std::vector<AudioDevice> GetFAudioDevices() {
            continue;
        }

-        const wxString display_name(reinterpret_cast<wchar_t*>(dd.DisplayName));
-        const wxString device_id(reinterpret_cast<wchar_t*>(dd.DeviceID));
-
+        // Convert to UTF-8.
+        const std::vector<uint8_t> display_name_u8 =
+            config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DisplayName));
+        const std::vector<uint8_t> device_id_u8 =
+            config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DeviceID));
+        const wxString display_name = wxString::FromUTF8(
+            reinterpret_cast<const char*>(display_name_u8.data()), display_name_u8.size());
+        const wxString device_id = wxString::FromUTF8(
+            reinterpret_cast<const char*>(device_id_u8.data()), device_id_u8.size());
        devices.push_back({display_name, device_id});
    }

--- a/src/wx/config/strutils-test.cpp
+++ b/src/wx/config/strutils-test.cpp
@ -94,3 +94,76 @@ TEST(StrSplitWithSepTest, MultipleSepTokens) {
    EXPECT_EQ(vec[4], "baz");
    EXPECT_EQ(vec[5], "|-|");
 }
+
+TEST(UTF16ToUTF8Test, Basic) {
+    std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0};
+    auto vec = config::utf16_to_utf8(utf16.data());
+
+    ASSERT_EQ(vec.size(), 3);
+
+    EXPECT_EQ(vec[0], 'f');
+    EXPECT_EQ(vec[1], 'o');
+    EXPECT_EQ(vec[2], 'o');
+}
+
+TEST(UTF16ToUTF8Test, MultiByte) {
+    // U+20AC EURO SIGN.
+    std::vector<uint16_t> utf16 = {0x20AC, 0};
+    auto vec = config::utf16_to_utf8(utf16.data());
+
+    ASSERT_EQ(vec.size(), 3);
+
+    EXPECT_EQ(vec[0], 0xE2);
+    EXPECT_EQ(vec[1], 0x82);
+    EXPECT_EQ(vec[2], 0xAC);
+}
+
+TEST(UTF16ToUTF8Test, SurrogatePair) {
+    // U+1F914 THINKING FACE.
+    std::vector<uint16_t> utf16 = {0xD83E, 0xDD14, 0};
+    auto vec = config::utf16_to_utf8(utf16.data());
+
+    ASSERT_EQ(vec.size(), 4);
+
+    EXPECT_EQ(vec[0], 0xF0);
+    EXPECT_EQ(vec[1], 0x9F);
+    EXPECT_EQ(vec[2], 0xA4);
+    EXPECT_EQ(vec[3], 0x94);
+}
+
+TEST(UTF16ToUTF8Test, InvalidSurrogatePair) {
+    // U+D800 HIGH SURROGATE.
+    std::vector<uint16_t> utf16 = {0xD800, 0};
+    EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
+}
+
+TEST(UTF16ToUTF8Test, InvalidSurrogatePair2) {
+    // U+D800 HIGH SURROGATE followed by U+0020 SPACE.
+    std::vector<uint16_t> utf16 = {0xD800, 0x0020, 0};
+    EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
+}
+
+TEST(UTF16ToUTF8Test, InvalidSurrogatePair3) {
+    // U+D800 HIGH SURROGATE followed by U+D800 HIGH SURROGATE.
+    std::vector<uint16_t> utf16 = {0xD800, 0xD800, 0};
+    EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
+}
+
+TEST(UTF16ToUTF8Test, FullString) {
+    // "foo€🤔"
+    std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0x20AC, 0xD83E, 0xDD14, 0};
+    auto vec = config::utf16_to_utf8(utf16.data());
+
+    ASSERT_EQ(vec.size(), 10);
+
+    EXPECT_EQ(vec[0], 'f');
+    EXPECT_EQ(vec[1], 'o');
+    EXPECT_EQ(vec[2], 'o');
+    EXPECT_EQ(vec[3], 0xE2);
+    EXPECT_EQ(vec[4], 0x82);
+    EXPECT_EQ(vec[5], 0xAC);
+    EXPECT_EQ(vec[6], 0xF0);
+    EXPECT_EQ(vec[7], 0x9F);
+    EXPECT_EQ(vec[8], 0xA4);
+    EXPECT_EQ(vec[9], 0x94);
+}
--- a/src/wx/config/strutils.cpp
+++ b/src/wx/config/strutils.cpp
@ -1,7 +1,11 @@
 #include "wx/config/strutils.h"

+#include <cstdint>
+
 #include <wx/tokenzr.h>

+#include "core/base/check.h"
+
 namespace config {

 // From: https://stackoverflow.com/a/7408245/262458
@ -37,4 +41,40 @@ wxArrayString str_split_with_sep(const wxString& text, const wxString& sep)
    return str_split(text, sep, true);
 }

+std::vector<uint8_t> utf16_to_utf8(const uint16_t* utf16) {
+    std::vector<uint8_t> out;
+    for (size_t i = 0; utf16[i]; i++) {
+        uint16_t c = utf16[i];
+        if (c < 0x80) {
+            out.push_back(c);
+        } else if (c < 0x800) {
+            out.push_back(0xC0 | (c >> 6));
+            out.push_back(0x80 | (c & 0x3F));
+        } else if (c < 0xD800 || c >= 0xE000) {
+            // Regular 3-byte UTF-8 character.
+            out.push_back(0xE0 | (c >> 12));
+            out.push_back(0x80 | ((c >> 6) & 0x3F));
+            out.push_back(0x80 | (c & 0x3F));
+        } else {
+            // Surrogate pair, construct the original code point.
+            const uint32_t high = c;
+
+            // The next code unit must be a low surrogate.
+            i++;
+            const uint32_t low = utf16[i];
+            VBAM_CHECK(low);
+            VBAM_CHECK(low >= 0xDC00 && low < 0xE000);
+
+            const uint32_t codepoint = 0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF);
+
+            // Convert to UTF-8.
+            out.push_back(0xF0 | (codepoint >> 18));
+            out.push_back(0x80 | ((codepoint >> 12) & 0x3F));
+            out.push_back(0x80 | ((codepoint >> 6) & 0x3F));
+            out.push_back(0x80 | (codepoint & 0x3F));
+        }
+    }
+    return out;
+}
+
 } // namespace config
--- a/src/wx/config/strutils.h
+++ b/src/wx/config/strutils.h
@ -1,6 +1,9 @@
 #ifndef VBAM_WX_CONFIG_STRUTILS_H_
 #define VBAM_WX_CONFIG_STRUTILS_H_

+#include <cstdint>
+#include <vector>
+
 #include <wx/string.h>
 #include <wx/arrstr.h>

@ -14,6 +17,10 @@ wxArrayString str_split(const wxString& text, const wxString& sep, bool empty_to
 // 'A', ',' and 'B' will be in the output.
 wxArrayString str_split_with_sep(const wxString& text, const wxString& sep);

+// Converts a null-terminated array of UTF-16 code units to a vector of UTF-8 code units.
+// This will assert if the input is not a valid UTF-16 string.
+std::vector<uint8_t> utf16_to_utf8(const uint16_t* utf16);
+
 } // namespace config

 #endif  // VBAM_WX_CONFIG_STRUTILS_H_