From 6cba5ba7e6565fa06cc093ccd177a1b2954063fe Mon Sep 17 00:00:00 2001 From: NicknineTheEagle Date: Sun, 22 Sep 2024 20:24:37 +0300 Subject: [PATCH] Convert STFS filenames from Win-1252 to UTF-8 --- src/xenia/base/string.cc | 77 +++++++++++++++++++ src/xenia/base/string.h | 2 + src/xenia/vfs/devices/disc_image_device.cc | 4 +- .../xcontent_devices/stfs_container_device.cc | 6 +- .../xcontent_devices/svod_container_device.cc | 4 +- 5 files changed, 89 insertions(+), 4 deletions(-) diff --git a/src/xenia/base/string.cc b/src/xenia/base/string.cc index 38b0ccf5e..9d13b7d86 100644 --- a/src/xenia/base/string.cc +++ b/src/xenia/base/string.cc @@ -14,6 +14,9 @@ #include #include "xenia/base/platform.h" +#if XE_PLATFORM_WIN32 +#include "xenia/base/platform_win.h" +#endif #if !XE_PLATFORM_WIN32 #include @@ -58,4 +61,78 @@ std::u16string to_utf16(const std::string_view source) { return utfcpp::utf8to16(source); } +std::string utf8_to_win1252(const std::string_view source) { +#if XE_PLATFORM_WIN32 + std::string input_str(source); + int srclen = static_cast(input_str.size()); + + int wlen = + MultiByteToWideChar(CP_UTF8, 0, input_str.c_str(), srclen, NULL, 0); + if (!wlen) { + return ""; + } + std::vector wbuf(wlen); + int result = MultiByteToWideChar(CP_UTF8, 0, input_str.c_str(), srclen, + wbuf.data(), wlen); + if (!result) { + return ""; + } + + int len = WideCharToMultiByte(1252, 0, wbuf.data(), wlen, NULL, 0, "_", NULL); + if (!len) { + return ""; + } + std::vector buf(len); + result = WideCharToMultiByte(1252, 0, wbuf.data(), wlen, buf.data(), len, "_", + NULL); + if (!result) { + return ""; + } + + std::string output_str(buf.begin(), buf.end()); + return output_str; +#else + // TODO: Use iconv on POSIX. + std::string output_str(source); + return output_str; +#endif +} + +std::string win1252_to_utf8(const std::string_view source) { +#if XE_PLATFORM_WIN32 + std::string input_str(source); + int srclen = static_cast(input_str.size()); + + int wlen = MultiByteToWideChar(1252, 0, input_str.c_str(), srclen, NULL, 0); + if (!wlen) { + return ""; + } + std::vector wbuf(wlen); + int result = MultiByteToWideChar(1252, 0, input_str.c_str(), srclen, + wbuf.data(), wlen); + if (!result) { + return ""; + } + + int len = + WideCharToMultiByte(CP_UTF8, 0, wbuf.data(), wlen, NULL, 0, "_", NULL); + if (!len) { + return ""; + } + std::vector buf(len); + result = WideCharToMultiByte(CP_UTF8, 0, wbuf.data(), wlen, buf.data(), len, + "_", NULL); + if (!result) { + return ""; + } + + std::string output_str(buf.begin(), buf.end()); + return output_str; +#else + // TODO: Use iconv on POSIX. + std::string output_str(source); + return output_str; +#endif +} + } // namespace xe diff --git a/src/xenia/base/string.h b/src/xenia/base/string.h index b768d22e6..70f39d3be 100644 --- a/src/xenia/base/string.h +++ b/src/xenia/base/string.h @@ -22,6 +22,8 @@ char* xe_strdup(const char* source); std::string to_utf8(const std::u16string_view source); std::u16string to_utf16(const std::string_view source); +std::string utf8_to_win1252(const std::string_view source); +std::string win1252_to_utf8(const std::string_view source); } // namespace xe diff --git a/src/xenia/vfs/devices/disc_image_device.cc b/src/xenia/vfs/devices/disc_image_device.cc index ef8800a03..4ab72d1fb 100644 --- a/src/xenia/vfs/devices/disc_image_device.cc +++ b/src/xenia/vfs/devices/disc_image_device.cc @@ -140,7 +140,9 @@ bool DiscImageDevice::ReadEntry(ParseState* state, const uint8_t* buffer, return false; } - auto name = std::string(name_buffer, name_length); + // Filename is stored as Windows-1252, convert it to UTF-8. + auto ansi_name = std::string(name_buffer, name_length); + auto name = xe::win1252_to_utf8(ansi_name); auto entry = DiscImageEntry::Create(this, parent, name, mmap_.get()); entry->attributes_ = attributes | kFileAttributeReadOnly; diff --git a/src/xenia/vfs/devices/xcontent_devices/stfs_container_device.cc b/src/xenia/vfs/devices/xcontent_devices/stfs_container_device.cc index 417f05414..a8d9e62de 100644 --- a/src/xenia/vfs/devices/xcontent_devices/stfs_container_device.cc +++ b/src/xenia/vfs/devices/xcontent_devices/stfs_container_device.cc @@ -113,8 +113,10 @@ StfsContainerDevice::Result StfsContainerDevice::Read() { std::unique_ptr StfsContainerDevice::ReadEntry( Entry* parent, MultiFileHandles* files, const StfsDirectoryEntry* dir_entry) { - std::string name(reinterpret_cast(dir_entry->name), - dir_entry->flags.name_length & 0x3F); + // Filename is stored as Windows-1252, convert it to UTF-8. + std::string ansi_name(reinterpret_cast(dir_entry->name), + dir_entry->flags.name_length & 0x3F); + std::string name = xe::win1252_to_utf8(ansi_name); auto entry = XContentContainerEntry::Create(this, parent, name, &files_); diff --git a/src/xenia/vfs/devices/xcontent_devices/svod_container_device.cc b/src/xenia/vfs/devices/xcontent_devices/svod_container_device.cc index 21b367cc8..8ec80ebf2 100644 --- a/src/xenia/vfs/devices/xcontent_devices/svod_container_device.cc +++ b/src/xenia/vfs/devices/xcontent_devices/svod_container_device.cc @@ -153,7 +153,9 @@ SvodContainerDevice::Result SvodContainerDevice::ReadEntry( return Result::kReadError; } - auto name = std::string(name_buffer.get(), dir_entry.name_length); + // Filename is stored as Windows-1252, convert it to UTF-8. + auto ansi_name = std::string(name_buffer.get(), dir_entry.name_length); + auto name = xe::win1252_to_utf8(ansi_name); // Read the left node if (dir_entry.node_l) {