forked from ShuriZma/suyu
1
0
Fork 0

gdbserver: use numeric character references for unicode

This commit is contained in:
Liam 2023-10-01 16:21:23 -04:00
parent a0d56c855c
commit 38394f36d7
3 changed files with 21 additions and 2 deletions

View File

@ -135,6 +135,11 @@ std::u16string UTF8ToUTF16(std::string_view input) {
return convert.from_bytes(input.data(), input.data() + input.size());
}
std::u32string UTF8ToUTF32(std::string_view input) {
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert;
return convert.from_bytes(input.data(), input.data() + input.size());
}
#ifdef _WIN32
static std::wstring CPToUTF16(u32 code_page, std::string_view input) {
const auto size =

View File

@ -38,6 +38,7 @@ bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _
[[nodiscard]] std::string UTF16ToUTF8(std::u16string_view input);
[[nodiscard]] std::u16string UTF8ToUTF16(std::string_view input);
[[nodiscard]] std::u32string UTF8ToUTF32(std::string_view input);
#ifdef _WIN32
[[nodiscard]] std::string UTF16ToUTF8(std::wstring_view input);

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
#include <codecvt>
#include <locale>
#include <numeric>
#include <optional>
#include <thread>
@ -12,6 +14,7 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "common/string_util.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/debugger/gdbstub.h"
@ -68,10 +71,16 @@ static std::string EscapeGDB(std::string_view data) {
}
static std::string EscapeXML(std::string_view data) {
std::u32string converted = U"[Encoding error]";
try {
converted = Common::UTF8ToUTF32(data);
} catch (std::range_error&) {
}
std::string escaped;
escaped.reserve(data.size());
for (char c : data) {
for (char32_t c : converted) {
switch (c) {
case '&':
escaped += "&amp;";
@ -86,7 +95,11 @@ static std::string EscapeXML(std::string_view data) {
escaped += "&gt;";
break;
default:
escaped += c;
if (c > 0x7f) {
escaped += fmt::format("&#{};", static_cast<u32>(c));
} else {
escaped += static_cast<char>(c);
}
break;
}
}