From a73592c2ef050ea4ee31eb9100bb075fdfe7dc92 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 24 Nov 2020 22:18:50 +0300 Subject: [PATCH] [Memory/CPU] UWP: Support separate code execution and write memory, FromApp functions + other Windows memory fixes --- src/xenia/base/mapped_memory_win.cc | 82 ++++++++-- src/xenia/base/memory.cc | 15 ++ src/xenia/base/memory.h | 11 ++ src/xenia/base/memory_posix.cc | 5 + src/xenia/base/memory_win.cc | 58 ++++++- src/xenia/cpu/backend/code_cache.h | 6 +- src/xenia/cpu/backend/x64/x64_code_cache.cc | 146 ++++++++++++------ src/xenia/cpu/backend/x64/x64_code_cache.h | 46 ++++-- .../cpu/backend/x64/x64_code_cache_posix.cc | 2 +- .../cpu/backend/x64/x64_code_cache_win.cc | 50 +++--- src/xenia/cpu/backend/x64/x64_emitter.cc | 18 ++- .../ppc/testing/ppc_testing_native_main.cc | 3 + src/xenia/cpu/stack_walker_win.cc | 13 +- src/xenia/emulator.cc | 2 +- src/xenia/memory.cc | 4 + 15 files changed, 348 insertions(+), 113 deletions(-) diff --git a/src/xenia/base/mapped_memory_win.cc b/src/xenia/base/mapped_memory_win.cc index 25ec5cb92..53b5b8ffe 100644 --- a/src/xenia/base/mapped_memory_win.cc +++ b/src/xenia/base/mapped_memory_win.cc @@ -29,7 +29,7 @@ class Win32MappedMemory : public MappedMemory { if (data_) { UnmapViewOfFile(data_); } - if (mapping_handle != INVALID_HANDLE_VALUE) { + if (mapping_handle) { CloseHandle(mapping_handle); } if (file_handle != INVALID_HANDLE_VALUE) { @@ -42,9 +42,9 @@ class Win32MappedMemory : public MappedMemory { UnmapViewOfFile(data_); data_ = nullptr; } - if (mapping_handle != INVALID_HANDLE_VALUE) { + if (mapping_handle) { CloseHandle(mapping_handle); - mapping_handle = INVALID_HANDLE_VALUE; + mapping_handle = nullptr; } if (file_handle != INVALID_HANDLE_VALUE) { if (truncate_size) { @@ -65,8 +65,13 @@ class Win32MappedMemory : public MappedMemory { size_t aligned_length = length + (offset - aligned_offset); UnmapViewOfFile(data_); +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) data_ = MapViewOfFile(mapping_handle, view_access_, aligned_offset >> 32, aligned_offset & 0xFFFFFFFF, aligned_length); +#else + data_ = MapViewOfFileFromApp(mapping_handle, ULONG(view_access_), + ULONG64(aligned_offset), aligned_length); +#endif if (!data_) { return false; } @@ -84,7 +89,7 @@ class Win32MappedMemory : public MappedMemory { } HANDLE file_handle = INVALID_HANDLE_VALUE; - HANDLE mapping_handle = INVALID_HANDLE_VALUE; + HANDLE mapping_handle = nullptr; DWORD view_access_ = 0; }; @@ -129,16 +134,28 @@ std::unique_ptr MappedMemory::Open( return nullptr; } - mm->mapping_handle = CreateFileMapping(mm->file_handle, nullptr, - mapping_protect, aligned_length >> 32, - aligned_length & 0xFFFFFFFF, nullptr); - if (mm->mapping_handle == INVALID_HANDLE_VALUE) { +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + mm->mapping_handle = CreateFileMapping( + mm->file_handle, nullptr, mapping_protect, DWORD(aligned_length >> 32), + DWORD(aligned_length), nullptr); +#else + mm->mapping_handle = + CreateFileMappingFromApp(mm->file_handle, nullptr, ULONG(mapping_protect), + ULONG64(aligned_length), nullptr); +#endif + if (!mm->mapping_handle) { return nullptr; } +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) mm->data_ = reinterpret_cast(MapViewOfFile( - mm->mapping_handle, view_access, static_cast(aligned_offset >> 32), - static_cast(aligned_offset & 0xFFFFFFFF), aligned_length)); + mm->mapping_handle, view_access, DWORD(aligned_offset >> 32), + DWORD(aligned_offset), aligned_length)); +#else + mm->data_ = reinterpret_cast( + MapViewOfFileFromApp(mm->mapping_handle, ULONG(view_access), + ULONG64(aligned_offset), aligned_length)); +#endif if (!mm->data_) { return nullptr; } @@ -203,8 +220,8 @@ class Win32ChunkedMappedMemoryWriter : public ChunkedMappedMemoryWriter { class Chunk { public: explicit Chunk(size_t capacity) - : file_handle_(0), - mapping_handle_(0), + : file_handle_(INVALID_HANDLE_VALUE), + mapping_handle_(nullptr), data_(nullptr), offset_(0), capacity_(capacity), @@ -217,7 +234,7 @@ class Win32ChunkedMappedMemoryWriter : public ChunkedMappedMemoryWriter { if (mapping_handle_) { CloseHandle(mapping_handle_); } - if (file_handle_) { + if (file_handle_ != INVALID_HANDLE_VALUE) { CloseHandle(file_handle_); } } @@ -231,13 +248,19 @@ class Win32ChunkedMappedMemoryWriter : public ChunkedMappedMemoryWriter { file_handle_ = CreateFile(path.c_str(), file_access, file_share, nullptr, create_mode, FILE_ATTRIBUTE_NORMAL, nullptr); - if (!file_handle_) { + if (file_handle_ == INVALID_HANDLE_VALUE) { return false; } +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) mapping_handle_ = - CreateFileMapping(file_handle_, nullptr, mapping_protect, 0, - static_cast(capacity_), nullptr); + CreateFileMapping(file_handle_, nullptr, mapping_protect, + DWORD(capacity_ >> 32), DWORD(capacity_), nullptr); +#else + mapping_handle_ = CreateFileMappingFromApp(file_handle_, nullptr, + ULONG(mapping_protect), + ULONG64(capacity_), nullptr); +#endif if (!mapping_handle_) { return false; } @@ -247,10 +270,32 @@ class Win32ChunkedMappedMemoryWriter : public ChunkedMappedMemoryWriter { if (low_address_space) { bool successful = false; data_ = reinterpret_cast(0x10000000); +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + HANDLE process = GetCurrentProcess(); +#endif for (int i = 0; i < 1000; ++i) { +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) if (MapViewOfFileEx(mapping_handle_, view_access, 0, 0, capacity_, data_)) { successful = true; + } +#else + // VirtualAlloc2FromApp and MapViewOfFile3FromApp were added in + // 10.0.17134.0. + // https://docs.microsoft.com/en-us/uwp/win32-and-com/win32-apis + if (VirtualAlloc2FromApp(process, data_, capacity_, + MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, nullptr, 0)) { + if (MapViewOfFile3FromApp(mapping_handle_, process, data_, 0, + capacity_, MEM_REPLACE_PLACEHOLDER, + ULONG(mapping_protect), nullptr, 0)) { + successful = true; + } else { + VirtualFree(data_, capacity_, MEM_RELEASE); + } + } +#endif + if (successful) { break; } data_ += capacity_; @@ -261,8 +306,13 @@ class Win32ChunkedMappedMemoryWriter : public ChunkedMappedMemoryWriter { } } } else { +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) data_ = reinterpret_cast( MapViewOfFile(mapping_handle_, view_access, 0, 0, capacity_)); +#else + data_ = reinterpret_cast(MapViewOfFileFromApp( + mapping_handle_, ULONG(view_access), 0, capacity_)); +#endif } if (!data_) { return false; diff --git a/src/xenia/base/memory.cc b/src/xenia/base/memory.cc index f7e70d2ff..155946779 100644 --- a/src/xenia/base/memory.cc +++ b/src/xenia/base/memory.cc @@ -8,11 +8,26 @@ */ #include "xenia/base/memory.h" +#include "xenia/base/cvar.h" #include "xenia/base/platform.h" #include +DEFINE_bool( + writable_executable_memory, true, + "Allow mapping memory with both write and execute access, for simulating " + "behavior on platforms where that's not supported", + "Memory"); + namespace xe { +namespace memory { + +bool IsWritableExecutableMemoryPreferred() { + return IsWritableExecutableMemorySupported() && + cvars::writable_executable_memory; +} + +} // namespace memory // TODO(benvanik): fancy AVX versions. // https://github.com/gnuradio/volk/blob/master/kernels/volk/volk_16u_byteswap.h diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index 45471e50f..96eb9b68a 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -35,6 +35,7 @@ enum class PageAccess { kNoAccess = 0, kReadOnly = 1 << 0, kReadWrite = kReadOnly | 1 << 1, + kExecuteReadOnly = kReadOnly | 1 << 2, kExecuteReadWrite = kReadWrite | 1 << 2, }; @@ -49,6 +50,16 @@ enum class DeallocationType { kDecommit = 1 << 1, }; +// Whether the host allows the pages to be allocated or mapped with +// PageAccess::kExecuteReadWrite - if not, separate mappings backed by the same +// memory-mapped file must be used to write to executable pages. +bool IsWritableExecutableMemorySupported(); + +// Whether PageAccess::kExecuteReadWrite is a supported and preferred way of +// writing executable memory, useful for simulating how Xenia would work without +// writable executable memory on a system with it. +bool IsWritableExecutableMemoryPreferred(); + // Allocates a block of memory at the given page-aligned base address. // Fails if the memory is not available. // Specify nullptr for base_address to leave it up to the system. diff --git a/src/xenia/base/memory_posix.cc b/src/xenia/base/memory_posix.cc index bd078d90f..271249a87 100644 --- a/src/xenia/base/memory_posix.cc +++ b/src/xenia/base/memory_posix.cc @@ -39,6 +39,8 @@ uint32_t ToPosixProtectFlags(PageAccess access) { return PROT_READ; case PageAccess::kReadWrite: return PROT_READ | PROT_WRITE; + case PageAccess::kExecuteReadOnly: + return PROT_READ | PROT_EXEC; case PageAccess::kExecuteReadWrite: return PROT_READ | PROT_WRITE | PROT_EXEC; default: @@ -47,6 +49,8 @@ uint32_t ToPosixProtectFlags(PageAccess access) { } } +bool IsWritableExecutableMemorySupported() { return true; } + void* AllocFixed(void* base_address, size_t length, AllocationType allocation_type, PageAccess access) { // mmap does not support reserve / commit, so ignore allocation_type. @@ -112,6 +116,7 @@ FileMappingHandle CreateFileMappingHandle(const std::filesystem::path& path, oflag = 0; break; case PageAccess::kReadOnly: + case PageAccess::kExecuteReadOnly: oflag = O_RDONLY; break; case PageAccess::kReadWrite: diff --git a/src/xenia/base/memory_win.cc b/src/xenia/base/memory_win.cc index 343285d94..231a65633 100644 --- a/src/xenia/base/memory_win.cc +++ b/src/xenia/base/memory_win.cc @@ -42,6 +42,8 @@ DWORD ToWin32ProtectFlags(PageAccess access) { return PAGE_READONLY; case PageAccess::kReadWrite: return PAGE_READWRITE; + case PageAccess::kExecuteReadOnly: + return PAGE_EXECUTE_READ; case PageAccess::kExecuteReadWrite: return PAGE_EXECUTE_READWRITE; default: @@ -63,6 +65,8 @@ PageAccess ToXeniaProtectFlags(DWORD access) { return PageAccess::kReadOnly; case PAGE_READWRITE: return PageAccess::kReadWrite; + case PAGE_EXECUTE_READ: + return PageAccess::kExecuteReadOnly; case PAGE_EXECUTE_READWRITE: return PageAccess::kExecuteReadWrite; default: @@ -70,6 +74,17 @@ PageAccess ToXeniaProtectFlags(DWORD access) { } } +bool IsWritableExecutableMemorySupported() { +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + return true; +#else + // To test FromApp functions on desktop, replace + // WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) with 0 in the #ifs and + // link to WindowsApp.lib. + return false; +#endif +} + void* AllocFixed(void* base_address, size_t length, AllocationType allocation_type, PageAccess access) { DWORD alloc_type = 0; @@ -88,7 +103,12 @@ void* AllocFixed(void* base_address, size_t length, break; } DWORD protect = ToWin32ProtectFlags(access); +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) return VirtualAlloc(base_address, length, alloc_type, protect); +#else + return VirtualAllocFromApp(base_address, length, ULONG(alloc_type), + ULONG(protect)); +#endif } bool DeallocFixed(void* base_address, size_t length, @@ -115,13 +135,19 @@ bool Protect(void* base_address, size_t length, PageAccess access, *out_old_access = PageAccess::kNoAccess; } DWORD new_protect = ToWin32ProtectFlags(access); +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) DWORD old_protect = 0; BOOL result = VirtualProtect(base_address, length, new_protect, &old_protect); +#else + ULONG old_protect = 0; + BOOL result = VirtualProtectFromApp(base_address, length, ULONG(new_protect), + &old_protect); +#endif if (!result) { return false; } if (out_old_access) { - *out_old_access = ToXeniaProtectFlags(old_protect); + *out_old_access = ToXeniaProtectFlags(DWORD(old_protect)); } return true; } @@ -148,9 +174,14 @@ FileMappingHandle CreateFileMappingHandle(const std::filesystem::path& path, DWORD protect = ToWin32ProtectFlags(access) | (commit ? SEC_COMMIT : SEC_RESERVE); auto full_path = "Local" / path; - return CreateFileMappingW(INVALID_HANDLE_VALUE, NULL, protect, +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + return CreateFileMappingW(INVALID_HANDLE_VALUE, nullptr, protect, static_cast(length >> 32), static_cast(length), full_path.c_str()); +#else + return CreateFileMappingFromApp(INVALID_HANDLE_VALUE, nullptr, ULONG(protect), + ULONG64(length), full_path.c_str()); +#endif } void CloseFileMappingHandle(FileMappingHandle handle, @@ -160,6 +191,7 @@ void CloseFileMappingHandle(FileMappingHandle handle, void* MapFileView(FileMappingHandle handle, void* base_address, size_t length, PageAccess access, size_t file_offset) { +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) DWORD target_address_low = static_cast(file_offset); DWORD target_address_high = static_cast(file_offset >> 32); DWORD file_access = 0; @@ -170,6 +202,9 @@ void* MapFileView(FileMappingHandle handle, void* base_address, size_t length, case PageAccess::kReadWrite: file_access = FILE_MAP_ALL_ACCESS; break; + case PageAccess::kExecuteReadOnly: + file_access = FILE_MAP_READ | FILE_MAP_EXECUTE; + break; case PageAccess::kExecuteReadWrite: file_access = FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE; break; @@ -180,6 +215,25 @@ void* MapFileView(FileMappingHandle handle, void* base_address, size_t length, } return MapViewOfFileEx(handle, file_access, target_address_high, target_address_low, length, base_address); +#else + // VirtualAlloc2FromApp and MapViewOfFile3FromApp were added in 10.0.17134.0. + // https://docs.microsoft.com/en-us/uwp/win32-and-com/win32-apis + HANDLE process = GetCurrentProcess(); + void* placeholder = VirtualAlloc2FromApp( + process, base_address, length, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, nullptr, 0); + if (!placeholder) { + return nullptr; + } + void* mapping = MapViewOfFile3FromApp( + handle, process, placeholder, ULONG64(file_offset), length, + MEM_REPLACE_PLACEHOLDER, ULONG(ToWin32ProtectFlags(access)), nullptr, 0); + if (!mapping) { + VirtualFree(placeholder, length, MEM_RELEASE); + return nullptr; + } + return mapping; +#endif } bool UnmapFileView(FileMappingHandle handle, void* base_address, diff --git a/src/xenia/cpu/backend/code_cache.h b/src/xenia/cpu/backend/code_cache.h index 03d70114f..318608822 100644 --- a/src/xenia/cpu/backend/code_cache.h +++ b/src/xenia/cpu/backend/code_cache.h @@ -10,6 +10,8 @@ #ifndef XENIA_CPU_BACKEND_CODE_CACHE_H_ #define XENIA_CPU_BACKEND_CODE_CACHE_H_ +#include +#include #include #include "xenia/cpu/function.h" @@ -24,8 +26,8 @@ class CodeCache { virtual ~CodeCache() = default; virtual const std::filesystem::path& file_name() const = 0; - virtual uint32_t base_address() const = 0; - virtual uint32_t total_size() const = 0; + virtual uintptr_t execute_base_address() const = 0; + virtual size_t total_size() const = 0; // Finds a function based on the given host PC (that may be within a // function). diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.cc b/src/xenia/cpu/backend/x64/x64_code_cache.cc index d3b22d14c..beb715661 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache.cc @@ -41,8 +41,15 @@ X64CodeCache::~X64CodeCache() { // Unmap all views and close mapping. if (mapping_ != xe::memory::kFileMappingHandleInvalid) { - xe::memory::UnmapFileView(mapping_, generated_code_base_, - kGeneratedCodeSize); + if (generated_code_write_base_ && + generated_code_write_base_ != generated_code_execute_base_) { + xe::memory::UnmapFileView(mapping_, generated_code_write_base_, + kGeneratedCodeSize); + } + if (generated_code_execute_base_) { + xe::memory::UnmapFileView(mapping_, generated_code_execute_base_, + kGeneratedCodeSize); + } xe::memory::CloseFileMappingHandle(mapping_, file_name_); mapping_ = xe::memory::kFileMappingHandleInvalid; } @@ -73,17 +80,41 @@ bool X64CodeCache::Initialize() { } // Map generated code region into the file. Pages are committed as required. - generated_code_base_ = reinterpret_cast(xe::memory::MapFileView( - mapping_, reinterpret_cast(kGeneratedCodeBase), kGeneratedCodeSize, - xe::memory::PageAccess::kExecuteReadWrite, 0)); - if (!generated_code_base_) { - XELOGE("Unable to allocate code cache generated code storage"); - XELOGE( - "This is likely because the {:X}-{:X} range is in use by some other " - "system DLL", - static_cast(kGeneratedCodeBase), - kGeneratedCodeBase + kGeneratedCodeSize); - return false; + if (xe::memory::IsWritableExecutableMemoryPreferred()) { + generated_code_execute_base_ = + reinterpret_cast(xe::memory::MapFileView( + mapping_, reinterpret_cast(kGeneratedCodeExecuteBase), + kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadWrite, 0)); + generated_code_write_base_ = generated_code_execute_base_; + if (!generated_code_execute_base_ || !generated_code_write_base_) { + XELOGE("Unable to allocate code cache generated code storage"); + XELOGE( + "This is likely because the {:X}-{:X} range is in use by some other " + "system DLL", + uint64_t(kGeneratedCodeExecuteBase), + uint64_t(kGeneratedCodeExecuteBase + kGeneratedCodeSize)); + return false; + } + } else { + generated_code_execute_base_ = + reinterpret_cast(xe::memory::MapFileView( + mapping_, reinterpret_cast(kGeneratedCodeExecuteBase), + kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadOnly, 0)); + generated_code_write_base_ = + reinterpret_cast(xe::memory::MapFileView( + mapping_, reinterpret_cast(kGeneratedCodeWriteBase), + kGeneratedCodeSize, xe::memory::PageAccess::kReadWrite, 0)); + if (!generated_code_execute_base_ || !generated_code_write_base_) { + XELOGE("Unable to allocate code cache generated code storage"); + XELOGE( + "This is likely because the {:X}-{:X} and {:X}-{:X} ranges are in " + "use by some other system DLL", + uint64_t(kGeneratedCodeExecuteBase), + uint64_t(kGeneratedCodeExecuteBase + kGeneratedCodeSize), + uint64_t(kGeneratedCodeWriteBase), + uint64_t(kGeneratedCodeWriteBase + kGeneratedCodeSize)); + return false; + } } // Preallocate the function map to a large, reasonable size. @@ -117,7 +148,7 @@ void X64CodeCache::CommitExecutableRange(uint32_t guest_low, xe::memory::AllocFixed( indirection_table_base_ + (guest_low - kIndirectionTableBase), guest_high - guest_low, xe::memory::AllocationType::kCommit, - xe::memory::PageAccess::kExecuteReadWrite); + xe::memory::PageAccess::kReadWrite); // Fill memory with the default value. uint32_t* p = reinterpret_cast(indirection_table_base_); @@ -126,21 +157,26 @@ void X64CodeCache::CommitExecutableRange(uint32_t guest_low, } } -void* X64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code, - const EmitFunctionInfo& func_info) { +void X64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code, + const EmitFunctionInfo& func_info, + void*& code_execute_address_out, + void*& code_write_address_out) { // Same for now. We may use different pools or whatnot later on, like when // we only want to place guest code in a serialized cache on disk. - return PlaceGuestCode(guest_address, machine_code, func_info, nullptr); + PlaceGuestCode(guest_address, machine_code, func_info, nullptr, + code_execute_address_out, code_write_address_out); } -void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, - const EmitFunctionInfo& func_info, - GuestFunction* function_info) { +void X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, + const EmitFunctionInfo& func_info, + GuestFunction* function_info, + void*& code_execute_address_out, + void*& code_write_address_out) { // Hold a lock while we bump the pointers up. This is important as the // unwind table requires entries AND code to be sorted in order. size_t low_mark; size_t high_mark; - uint8_t* code_address; + uint8_t* code_execute_address; UnwindReservation unwind_reservation; { auto global_lock = global_critical_region_.Acquire(); @@ -149,26 +185,33 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, // Reserve code. // Always move the code to land on 16b alignment. - code_address = generated_code_base_ + generated_code_offset_; + code_execute_address = + generated_code_execute_base_ + generated_code_offset_; + code_execute_address_out = code_execute_address; + uint8_t* code_write_address = + generated_code_write_base_ + generated_code_offset_; + code_write_address_out = code_write_address; generated_code_offset_ += xe::round_up(func_info.code_size.total, 16); - auto tail_address = generated_code_base_ + generated_code_offset_; + auto tail_write_address = + generated_code_write_base_ + generated_code_offset_; // Reserve unwind info. // We go on the high size of the unwind info as we don't know how big we // need it, and a few extra bytes of padding isn't the worst thing. - unwind_reservation = - RequestUnwindReservation(generated_code_base_ + generated_code_offset_); + unwind_reservation = RequestUnwindReservation(generated_code_write_base_ + + generated_code_offset_); generated_code_offset_ += xe::round_up(unwind_reservation.data_size, 16); - auto end_address = generated_code_base_ + generated_code_offset_; + auto end_write_address = + generated_code_write_base_ + generated_code_offset_; high_mark = generated_code_offset_; // Store in map. It is maintained in sorted order of host PC dependent on // us also being append-only. generated_code_map_.emplace_back( - (uint64_t(code_address - generated_code_base_) << 32) | + (uint64_t(code_execute_address - generated_code_execute_base_) << 32) | generated_code_offset_, function_info); @@ -185,21 +228,30 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, if (high_mark <= old_commit_mark) break; new_commit_mark = old_commit_mark + 16 * 1024 * 1024; - xe::memory::AllocFixed(generated_code_base_, new_commit_mark, - xe::memory::AllocationType::kCommit, - xe::memory::PageAccess::kExecuteReadWrite); + if (generated_code_execute_base_ == generated_code_write_base_) { + xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark, + xe::memory::AllocationType::kCommit, + xe::memory::PageAccess::kExecuteReadWrite); + } else { + xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark, + xe::memory::AllocationType::kCommit, + xe::memory::PageAccess::kExecuteReadOnly); + xe::memory::AllocFixed(generated_code_write_base_, new_commit_mark, + xe::memory::AllocationType::kCommit, + xe::memory::PageAccess::kReadWrite); + } } while (generated_code_commit_mark_.compare_exchange_weak( old_commit_mark, new_commit_mark)); // Copy code. - std::memcpy(code_address, machine_code, func_info.code_size.total); + std::memcpy(code_write_address, machine_code, func_info.code_size.total); // Fill unused slots with 0xCC - std::memset(tail_address, 0xCC, - static_cast(end_address - tail_address)); + std::memset(tail_write_address, 0xCC, + static_cast(end_write_address - tail_write_address)); // Notify subclasses of placed code. - PlaceCode(guest_address, machine_code, func_info, code_address, + PlaceCode(guest_address, machine_code, func_info, code_execute_address, unwind_reservation); } @@ -214,7 +266,7 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, iJIT_Method_Load_V2 method = {0}; method.method_id = iJIT_GetNewMethodID(); - method.method_load_address = code_address; + method.method_load_address = code_execute_address; method.method_size = uint32_t(code_size); method.method_name = const_cast(method_name.data()); method.module_name = function_info @@ -230,10 +282,9 @@ void* X64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code, if (guest_address && indirection_table_base_) { uint32_t* indirection_slot = reinterpret_cast( indirection_table_base_ + (guest_address - kIndirectionTableBase)); - *indirection_slot = uint32_t(reinterpret_cast(code_address)); + *indirection_slot = + uint32_t(reinterpret_cast(code_execute_address)); } - - return code_address; } uint32_t X64CodeCache::PlaceData(const void* data, size_t length) { @@ -245,7 +296,7 @@ uint32_t X64CodeCache::PlaceData(const void* data, size_t length) { // Reserve code. // Always move the code to land on 16b alignment. - data_address = generated_code_base_ + generated_code_offset_; + data_address = generated_code_write_base_ + generated_code_offset_; generated_code_offset_ += xe::round_up(length, 16); high_mark = generated_code_offset_; @@ -260,9 +311,18 @@ uint32_t X64CodeCache::PlaceData(const void* data, size_t length) { if (high_mark <= old_commit_mark) break; new_commit_mark = old_commit_mark + 16 * 1024 * 1024; - xe::memory::AllocFixed(generated_code_base_, new_commit_mark, - xe::memory::AllocationType::kCommit, - xe::memory::PageAccess::kExecuteReadWrite); + if (generated_code_execute_base_ == generated_code_write_base_) { + xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark, + xe::memory::AllocationType::kCommit, + xe::memory::PageAccess::kExecuteReadWrite); + } else { + xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark, + xe::memory::AllocationType::kCommit, + xe::memory::PageAccess::kExecuteReadOnly); + xe::memory::AllocFixed(generated_code_write_base_, new_commit_mark, + xe::memory::AllocationType::kCommit, + xe::memory::PageAccess::kReadWrite); + } } while (generated_code_commit_mark_.compare_exchange_weak(old_commit_mark, new_commit_mark)); @@ -273,7 +333,7 @@ uint32_t X64CodeCache::PlaceData(const void* data, size_t length) { } GuestFunction* X64CodeCache::LookupFunction(uint64_t host_pc) { - uint32_t key = uint32_t(host_pc - kGeneratedCodeBase); + uint32_t key = uint32_t(host_pc - kGeneratedCodeExecuteBase); void* fn_entry = std::bsearch( &key, generated_code_map_.data(), generated_code_map_.size() + 1, sizeof(std::pair), diff --git a/src/xenia/cpu/backend/x64/x64_code_cache.h b/src/xenia/cpu/backend/x64/x64_code_cache.h index 9f7424ec8..021e5e684 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache.h +++ b/src/xenia/cpu/backend/x64/x64_code_cache.h @@ -11,6 +11,8 @@ #define XENIA_CPU_BACKEND_X64_X64_CODE_CACHE_H_ #include +#include +#include #include #include #include @@ -46,8 +48,10 @@ class X64CodeCache : public CodeCache { virtual bool Initialize(); const std::filesystem::path& file_name() const override { return file_name_; } - uint32_t base_address() const override { return kGeneratedCodeBase; } - uint32_t total_size() const override { return kGeneratedCodeSize; } + uintptr_t execute_base_address() const override { + return kGeneratedCodeExecuteBase; + } + size_t total_size() const override { return kGeneratedCodeSize; } // TODO(benvanik): ELF serialization/etc // TODO(benvanik): keep track of code blocks @@ -59,11 +63,15 @@ class X64CodeCache : public CodeCache { void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high); - void* PlaceHostCode(uint32_t guest_address, void* machine_code, - const EmitFunctionInfo& func_info); - void* PlaceGuestCode(uint32_t guest_address, void* machine_code, - const EmitFunctionInfo& func_info, - GuestFunction* function_info); + void PlaceHostCode(uint32_t guest_address, void* machine_code, + const EmitFunctionInfo& func_info, + void*& code_execute_address_out, + void*& code_write_address_out); + void PlaceGuestCode(uint32_t guest_address, void* machine_code, + const EmitFunctionInfo& func_info, + GuestFunction* function_info, + void*& code_execute_address_out, + void*& code_write_address_out); uint32_t PlaceData(const void* data, size_t length); GuestFunction* LookupFunction(uint64_t host_pc) override; @@ -71,13 +79,16 @@ class X64CodeCache : public CodeCache { protected: // All executable code falls within 0x80000000 to 0x9FFFFFFF, so we can // only map enough for lookups within that range. - static const uint64_t kIndirectionTableBase = 0x80000000; - static const uint64_t kIndirectionTableSize = 0x1FFFFFFF; + static const size_t kIndirectionTableSize = 0x1FFFFFFF; + static const uintptr_t kIndirectionTableBase = 0x80000000; // The code range is 512MB, but we know the total code games will have is // pretty small (dozens of mb at most) and our expansion is reasonablish // so 256MB should be more than enough. - static const uint64_t kGeneratedCodeBase = 0xA0000000; - static const uint64_t kGeneratedCodeSize = 0x0FFFFFFF; + static const size_t kGeneratedCodeSize = 0x0FFFFFFF; + static const uintptr_t kGeneratedCodeExecuteBase = 0xA0000000; + // Used for writing when PageAccess::kExecuteReadWrite is not supported. + static const uintptr_t kGeneratedCodeWriteBase = + kGeneratedCodeExecuteBase + kGeneratedCodeSize + 1; // This is picked to be high enough to cover whatever we can reasonably // expect. If we hit issues with this it probably means some corner case @@ -96,7 +107,8 @@ class X64CodeCache : public CodeCache { return UnwindReservation(); } virtual void PlaceCode(uint32_t guest_address, void* machine_code, - const EmitFunctionInfo& func_info, void* code_address, + const EmitFunctionInfo& func_info, + void* code_execute_address, UnwindReservation unwind_reservation) {} std::filesystem::path file_name_; @@ -114,9 +126,13 @@ class X64CodeCache : public CodeCache { // the generated code table that correspond to the PPC functions in guest // space. uint8_t* indirection_table_base_ = nullptr; - // Fixed at kGeneratedCodeBase and holding all generated code, growing as - // needed. - uint8_t* generated_code_base_ = nullptr; + // Fixed at kGeneratedCodeExecuteBase and holding all generated code, growing + // as needed. + uint8_t* generated_code_execute_base_ = nullptr; + // View of the memory that backs generated_code_execute_base_ when + // PageAccess::kExecuteReadWrite is not supported, for writing the generated + // code. Equals to generated_code_execute_base_ when it's supported. + uint8_t* generated_code_write_base_ = nullptr; // Current offset to empty space in generated code. size_t generated_code_offset_ = 0; // Current high water mark of COMMITTED code. diff --git a/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc b/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc index 490ab2ce9..e889eba0c 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache_posix.cc @@ -27,7 +27,7 @@ class PosixX64CodeCache : public X64CodeCache { /* UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override; void PlaceCode(uint32_t guest_address, void* machine_code, size_t code_size, - size_t stack_size, void* code_address, + size_t stack_size, void* code_execute_address, UnwindReservation unwind_reservation) override; void InitializeUnwindEntry(uint8_t* unwind_entry_address, diff --git a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc index aed5d3f60..0aff67034 100644 --- a/src/xenia/cpu/backend/x64/x64_code_cache_win.cc +++ b/src/xenia/cpu/backend/x64/x64_code_cache_win.cc @@ -107,11 +107,12 @@ class Win32X64CodeCache : public X64CodeCache { private: UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override; void PlaceCode(uint32_t guest_address, void* machine_code, - const EmitFunctionInfo& func_info, void* code_address, + const EmitFunctionInfo& func_info, void* code_execute_address, UnwindReservation unwind_reservation) override; void InitializeUnwindEntry(uint8_t* unwind_entry_address, - size_t unwind_table_slot, void* code_address, + size_t unwind_table_slot, + void* code_execute_address, const EmitFunctionInfo& func_info); // Growable function table system handle. @@ -140,9 +141,9 @@ Win32X64CodeCache::~Win32X64CodeCache() { delete_growable_table_(unwind_table_handle_); } } else { - if (generated_code_base_) { + if (generated_code_execute_base_) { RtlDeleteFunctionTable(reinterpret_cast( - reinterpret_cast(generated_code_base_) | 0x3)); + reinterpret_cast(generated_code_execute_base_) | 0x3)); } } } @@ -176,11 +177,12 @@ bool Win32X64CodeCache::Initialize() { // Create table and register with the system. It's empty now, but we'll grow // it as functions are added. if (supports_growable_table_) { - if (add_growable_table_(&unwind_table_handle_, unwind_table_.data(), - unwind_table_count_, DWORD(unwind_table_.size()), - reinterpret_cast(generated_code_base_), - reinterpret_cast(generated_code_base_ + - kGeneratedCodeSize))) { + if (add_growable_table_( + &unwind_table_handle_, unwind_table_.data(), unwind_table_count_, + DWORD(unwind_table_.size()), + reinterpret_cast(generated_code_execute_base_), + reinterpret_cast(generated_code_execute_base_ + + kGeneratedCodeSize))) { XELOGE("Unable to create unwind function table"); return false; } @@ -188,8 +190,9 @@ bool Win32X64CodeCache::Initialize() { // Install a callback that the debugger will use to lookup unwind info on // demand. if (!RtlInstallFunctionTableCallback( - reinterpret_cast(generated_code_base_) | 0x3, - reinterpret_cast(generated_code_base_), kGeneratedCodeSize, + reinterpret_cast(generated_code_execute_base_) | 0x3, + reinterpret_cast(generated_code_execute_base_), + kGeneratedCodeSize, [](DWORD64 control_pc, PVOID context) { auto code_cache = reinterpret_cast(context); return reinterpret_cast( @@ -216,11 +219,12 @@ Win32X64CodeCache::RequestUnwindReservation(uint8_t* entry_address) { void Win32X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code, const EmitFunctionInfo& func_info, - void* code_address, + void* code_execute_address, UnwindReservation unwind_reservation) { // Add unwind info. InitializeUnwindEntry(unwind_reservation.entry_address, - unwind_reservation.table_slot, code_address, func_info); + unwind_reservation.table_slot, code_execute_address, + func_info); if (supports_growable_table_) { // Notify that the unwind table has grown. @@ -229,13 +233,15 @@ void Win32X64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code, } // This isn't needed on x64 (probably), but is convention. - FlushInstructionCache(GetCurrentProcess(), code_address, + // On UWP, FlushInstructionCache available starting from 10.0.16299.0. + // https://docs.microsoft.com/en-us/uwp/win32-and-com/win32-apis + FlushInstructionCache(GetCurrentProcess(), code_execute_address, func_info.code_size.total); } void Win32X64CodeCache::InitializeUnwindEntry( - uint8_t* unwind_entry_address, size_t unwind_table_slot, void* code_address, - const EmitFunctionInfo& func_info) { + uint8_t* unwind_entry_address, size_t unwind_table_slot, + void* code_execute_address, const EmitFunctionInfo& func_info) { auto unwind_info = reinterpret_cast(unwind_entry_address); UNWIND_CODE* unwind_code = nullptr; @@ -299,10 +305,12 @@ void Win32X64CodeCache::InitializeUnwindEntry( // Add entry. auto& fn_entry = unwind_table_[unwind_table_slot]; fn_entry.BeginAddress = - (DWORD)(reinterpret_cast(code_address) - generated_code_base_); + DWORD(reinterpret_cast(code_execute_address) - + generated_code_execute_base_); fn_entry.EndAddress = - (DWORD)(fn_entry.BeginAddress + func_info.code_size.total); - fn_entry.UnwindData = (DWORD)(unwind_entry_address - generated_code_base_); + DWORD(fn_entry.BeginAddress + func_info.code_size.total); + fn_entry.UnwindData = + DWORD(unwind_entry_address - generated_code_execute_base_); } void* Win32X64CodeCache::LookupUnwindInfo(uint64_t host_pc) { @@ -310,8 +318,8 @@ void* Win32X64CodeCache::LookupUnwindInfo(uint64_t host_pc) { &host_pc, unwind_table_.data(), unwind_table_count_, sizeof(RUNTIME_FUNCTION), [](const void* key_ptr, const void* element_ptr) { - auto key = - *reinterpret_cast(key_ptr) - kGeneratedCodeBase; + auto key = *reinterpret_cast(key_ptr) - + kGeneratedCodeExecuteBase; auto element = reinterpret_cast(element_ptr); if (key < element->BeginAddress) { return -1; diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index e772276e7..37d1cdc77 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -125,20 +125,26 @@ void* X64Emitter::Emplace(const EmitFunctionInfo& func_info, // top_ points to the Xbyak buffer, and since we are in AutoGrow mode // it has pending relocations. We copy the top_ to our buffer, swap the // pointer, relocate, then return the original scratch pointer for use. + // top_ is used by Xbyak's ready() as both write base pointer and the absolute + // address base, which would not work on platforms not supporting writable + // executable memory, but Xenia doesn't use absolute label addresses in the + // generated code. uint8_t* old_address = top_; - void* new_address; + void* new_execute_address; + void* new_write_address; assert_true(func_info.code_size.total == size_); if (function) { - new_address = code_cache_->PlaceGuestCode(function->address(), top_, - func_info, function); + code_cache_->PlaceGuestCode(function->address(), top_, func_info, function, + new_execute_address, new_write_address); } else { - new_address = code_cache_->PlaceHostCode(0, top_, func_info); + code_cache_->PlaceHostCode(0, top_, func_info, new_execute_address, + new_write_address); } - top_ = reinterpret_cast(new_address); + top_ = reinterpret_cast(new_write_address); ready(); top_ = old_address; reset(); - return new_address; + return new_execute_address; } bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) { diff --git a/src/xenia/cpu/ppc/testing/ppc_testing_native_main.cc b/src/xenia/cpu/ppc/testing/ppc_testing_native_main.cc index c5f835133..074552d36 100644 --- a/src/xenia/cpu/ppc/testing/ppc_testing_native_main.cc +++ b/src/xenia/cpu/ppc/testing/ppc_testing_native_main.cc @@ -177,6 +177,9 @@ class TestRunner { public: TestRunner() { memory_size_ = 64 * 1024 * 1024; + // FIXME(Triang3l): If this is ever compiled for a platform without + // xe::memory::IsWritableExecutableMemorySupported, two memory mappings must + // be used. memory_ = memory::AllocFixed(nullptr, memory_size_, memory::AllocationType::kReserveCommit, memory::PageAccess::kExecuteReadWrite); diff --git a/src/xenia/cpu/stack_walker_win.cc b/src/xenia/cpu/stack_walker_win.cc index 4339e3a8b..cbfa96023 100644 --- a/src/xenia/cpu/stack_walker_win.cc +++ b/src/xenia/cpu/stack_walker_win.cc @@ -9,6 +9,7 @@ #include "xenia/cpu/stack_walker.h" +#include #include #include "xenia/base/logging.h" @@ -120,8 +121,8 @@ class Win32StackWalker : public StackWalker { // They never change, so it's fine even if they are touched from multiple // threads. code_cache_ = code_cache; - code_cache_min_ = code_cache_->base_address(); - code_cache_max_ = code_cache_->base_address() + code_cache_->total_size(); + code_cache_min_ = code_cache_->execute_base_address(); + code_cache_max_ = code_cache_min_ + code_cache_->total_size(); } bool Initialize() { @@ -297,13 +298,13 @@ class Win32StackWalker : public StackWalker { std::mutex dbghelp_mutex_; static xe::cpu::backend::CodeCache* code_cache_; - static uint32_t code_cache_min_; - static uint32_t code_cache_max_; + static uintptr_t code_cache_min_; + static uintptr_t code_cache_max_; }; xe::cpu::backend::CodeCache* Win32StackWalker::code_cache_ = nullptr; -uint32_t Win32StackWalker::code_cache_min_ = 0; -uint32_t Win32StackWalker::code_cache_max_ = 0; +uintptr_t Win32StackWalker::code_cache_min_ = 0; +uintptr_t Win32StackWalker::code_cache_max_ = 0; std::unique_ptr StackWalker::Create( backend::CodeCache* code_cache) { diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index 7f62efc7c..aca6bd52a 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -513,7 +513,7 @@ bool Emulator::ExceptionCallbackThunk(Exception* ex, void* data) { bool Emulator::ExceptionCallback(Exception* ex) { // Check to see if the exception occurred in guest code. auto code_cache = processor()->backend()->code_cache(); - auto code_base = code_cache->base_address(); + auto code_base = code_cache->execute_base_address(); auto code_end = code_base + code_cache->total_size(); if (!processor()->is_debugger_attached() && debugging::IsDebuggerAttached()) { diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index c6e55c98b..5b631e416 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -621,6 +621,10 @@ uint32_t FromPageAccess(xe::memory::PageAccess protect) { return kMemoryProtectRead; case memory::PageAccess::kReadWrite: return kMemoryProtectRead | kMemoryProtectWrite; + case memory::PageAccess::kExecuteReadOnly: + // Guest memory cannot be executable - this should never happen :) + assert_always(); + return kMemoryProtectRead; case memory::PageAccess::kExecuteReadWrite: // Guest memory cannot be executable - this should never happen :) assert_always();