[FAudio] Always convert UTF-16 names (#1328)

On Windows, wchar_t is 16 bits and represents UTF-16 code units.
However, on other platforms, wchar_t is typically implemented as 32 bits
to represent UTF-32 code units. In order to work around this problem, we
always convert the string we get from FAudio, which is represented as
UTF-16 code units, into UTF-8 and let wxString handle the conversion to
its native type internally.
This commit is contained in:
Fabrice de Gans 2024-08-17 21:02:01 -07:00 committed by GitHub
parent 2ce20c4f59
commit aa6ed14b2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 138 additions and 11 deletions

View File

@ -20,6 +20,7 @@
#include "core/base/system.h"
#include "core/gba/gbaGlobals.h"
#include "wx/config/option-proxy.h"
#include "wx/config/strutils.h"
namespace audio {
namespace internal {
@ -27,7 +28,7 @@ namespace internal {
namespace {
int FAGetDev(FAudio* fa) {
const wxString& audio_device = OPTION(kSoundAudioDevice);
const wxString audio_device = OPTION(kSoundAudioDevice).Get();
if (audio_device.empty()) {
// Just use the default device.
return 0;
@ -47,7 +48,10 @@ int FAGetDev(FAudio* fa) {
if (hr != 0) {
continue;
}
const wxString device_id(reinterpret_cast<wchar_t*>(dd.DeviceID));
const std::vector<uint8_t> device_id_u8 =
config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DeviceID));
const wxString device_id = wxString::FromUTF8(
reinterpret_cast<const char*>(device_id_u8.data()), device_id_u8.size());
if (audio_device == device_id) {
return i;
}
@ -119,8 +123,6 @@ public:
FAudio_Output();
~FAudio_Output();
void device_change();
private:
void close();
@ -193,10 +195,6 @@ void FAudio_Output::close() {
}
}
void FAudio_Output::device_change() {
device_changed = true;
}
bool FAudio_Output::init(long sampleRate) {
if (failed || initialized)
return false;
@ -488,8 +486,11 @@ std::vector<AudioDevice> GetFAudioDevices() {
}
std::vector<AudioDevice> devices;
#if defined(__WXMSW__)
// Add a separate default device on Windows.
devices.reserve(dev_count + 1);
devices.push_back({_("Default device"), wxEmptyString});
#endif
for (uint32_t i = 0; i < dev_count; i++) {
FAudioDeviceDetails dd;
@ -498,9 +499,15 @@ std::vector<AudioDevice> GetFAudioDevices() {
continue;
}
const wxString display_name(reinterpret_cast<wchar_t*>(dd.DisplayName));
const wxString device_id(reinterpret_cast<wchar_t*>(dd.DeviceID));
// Convert to UTF-8.
const std::vector<uint8_t> display_name_u8 =
config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DisplayName));
const std::vector<uint8_t> device_id_u8 =
config::utf16_to_utf8(reinterpret_cast<uint16_t*>(dd.DeviceID));
const wxString display_name = wxString::FromUTF8(
reinterpret_cast<const char*>(display_name_u8.data()), display_name_u8.size());
const wxString device_id = wxString::FromUTF8(
reinterpret_cast<const char*>(device_id_u8.data()), device_id_u8.size());
devices.push_back({display_name, device_id});
}

View File

@ -94,3 +94,76 @@ TEST(StrSplitWithSepTest, MultipleSepTokens) {
EXPECT_EQ(vec[4], "baz");
EXPECT_EQ(vec[5], "|-|");
}
TEST(UTF16ToUTF8Test, Basic) {
std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0};
auto vec = config::utf16_to_utf8(utf16.data());
ASSERT_EQ(vec.size(), 3);
EXPECT_EQ(vec[0], 'f');
EXPECT_EQ(vec[1], 'o');
EXPECT_EQ(vec[2], 'o');
}
TEST(UTF16ToUTF8Test, MultiByte) {
// U+20AC EURO SIGN.
std::vector<uint16_t> utf16 = {0x20AC, 0};
auto vec = config::utf16_to_utf8(utf16.data());
ASSERT_EQ(vec.size(), 3);
EXPECT_EQ(vec[0], 0xE2);
EXPECT_EQ(vec[1], 0x82);
EXPECT_EQ(vec[2], 0xAC);
}
TEST(UTF16ToUTF8Test, SurrogatePair) {
// U+1F914 THINKING FACE.
std::vector<uint16_t> utf16 = {0xD83E, 0xDD14, 0};
auto vec = config::utf16_to_utf8(utf16.data());
ASSERT_EQ(vec.size(), 4);
EXPECT_EQ(vec[0], 0xF0);
EXPECT_EQ(vec[1], 0x9F);
EXPECT_EQ(vec[2], 0xA4);
EXPECT_EQ(vec[3], 0x94);
}
TEST(UTF16ToUTF8Test, InvalidSurrogatePair) {
// U+D800 HIGH SURROGATE.
std::vector<uint16_t> utf16 = {0xD800, 0};
EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
}
TEST(UTF16ToUTF8Test, InvalidSurrogatePair2) {
// U+D800 HIGH SURROGATE followed by U+0020 SPACE.
std::vector<uint16_t> utf16 = {0xD800, 0x0020, 0};
EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
}
TEST(UTF16ToUTF8Test, InvalidSurrogatePair3) {
// U+D800 HIGH SURROGATE followed by U+D800 HIGH SURROGATE.
std::vector<uint16_t> utf16 = {0xD800, 0xD800, 0};
EXPECT_DEATH(config::utf16_to_utf8(utf16.data()), ".*");
}
TEST(UTF16ToUTF8Test, FullString) {
// "foo€🤔"
std::vector<uint16_t> utf16 = {'f', 'o', 'o', 0x20AC, 0xD83E, 0xDD14, 0};
auto vec = config::utf16_to_utf8(utf16.data());
ASSERT_EQ(vec.size(), 10);
EXPECT_EQ(vec[0], 'f');
EXPECT_EQ(vec[1], 'o');
EXPECT_EQ(vec[2], 'o');
EXPECT_EQ(vec[3], 0xE2);
EXPECT_EQ(vec[4], 0x82);
EXPECT_EQ(vec[5], 0xAC);
EXPECT_EQ(vec[6], 0xF0);
EXPECT_EQ(vec[7], 0x9F);
EXPECT_EQ(vec[8], 0xA4);
EXPECT_EQ(vec[9], 0x94);
}

View File

@ -1,7 +1,11 @@
#include "wx/config/strutils.h"
#include <cstdint>
#include <wx/tokenzr.h>
#include "core/base/check.h"
namespace config {
// From: https://stackoverflow.com/a/7408245/262458
@ -37,4 +41,40 @@ wxArrayString str_split_with_sep(const wxString& text, const wxString& sep)
return str_split(text, sep, true);
}
std::vector<uint8_t> utf16_to_utf8(const uint16_t* utf16) {
std::vector<uint8_t> out;
for (size_t i = 0; utf16[i]; i++) {
uint16_t c = utf16[i];
if (c < 0x80) {
out.push_back(c);
} else if (c < 0x800) {
out.push_back(0xC0 | (c >> 6));
out.push_back(0x80 | (c & 0x3F));
} else if (c < 0xD800 || c >= 0xE000) {
// Regular 3-byte UTF-8 character.
out.push_back(0xE0 | (c >> 12));
out.push_back(0x80 | ((c >> 6) & 0x3F));
out.push_back(0x80 | (c & 0x3F));
} else {
// Surrogate pair, construct the original code point.
const uint32_t high = c;
// The next code unit must be a low surrogate.
i++;
const uint32_t low = utf16[i];
VBAM_CHECK(low);
VBAM_CHECK(low >= 0xDC00 && low < 0xE000);
const uint32_t codepoint = 0x10000 + ((high & 0x3FF) << 10) + (low & 0x3FF);
// Convert to UTF-8.
out.push_back(0xF0 | (codepoint >> 18));
out.push_back(0x80 | ((codepoint >> 12) & 0x3F));
out.push_back(0x80 | ((codepoint >> 6) & 0x3F));
out.push_back(0x80 | (codepoint & 0x3F));
}
}
return out;
}
} // namespace config

View File

@ -1,6 +1,9 @@
#ifndef VBAM_WX_CONFIG_STRUTILS_H_
#define VBAM_WX_CONFIG_STRUTILS_H_
#include <cstdint>
#include <vector>
#include <wx/string.h>
#include <wx/arrstr.h>
@ -14,6 +17,10 @@ wxArrayString str_split(const wxString& text, const wxString& sep, bool empty_to
// 'A', ',' and 'B' will be in the output.
wxArrayString str_split_with_sep(const wxString& text, const wxString& sep);
// Converts a null-terminated array of UTF-16 code units to a vector of UTF-8 code units.
// This will assert if the input is not a valid UTF-16 string.
std::vector<uint8_t> utf16_to_utf8(const uint16_t* utf16);
} // namespace config
#endif // VBAM_WX_CONFIG_STRUTILS_H_