From bf2e38aed598fbcfaf6cddb1d3a98db1e19642f0 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Mon, 23 Nov 2020 01:06:25 +1000 Subject: [PATCH] CPU/Recompiler: Implement LUT-based fastmem --- android/app/src/main/res/values/arrays.xml | 10 + .../src/main/res/xml/advanced_preferences.xml | 9 +- src/common/page_fault_handler.cpp | 23 +- src/core/CMakeLists.txt | 12 +- src/core/bus.cpp | 300 +++++++---- src/core/bus.h | 18 +- src/core/core.vcxproj | 10 +- src/core/cpu_code_cache.cpp | 87 +++- src/core/cpu_code_cache.h | 8 +- src/core/cpu_core.cpp | 6 +- src/core/cpu_recompiler_code_generator.cpp | 6 +- src/core/cpu_recompiler_code_generator.h | 18 +- .../cpu_recompiler_code_generator_aarch32.cpp | 246 +++++++++ .../cpu_recompiler_code_generator_aarch64.cpp | 193 +++++-- .../cpu_recompiler_code_generator_generic.cpp | 28 +- .../cpu_recompiler_code_generator_x64.cpp | 477 +++++++++++------- src/core/cpu_recompiler_thunks.h | 4 - src/core/host_interface.cpp | 13 +- src/core/settings.cpp | 37 +- src/core/settings.h | 19 +- src/core/types.h | 14 +- .../libretro_host_interface.cpp | 11 +- src/duckstation-qt/advancedsettingswidget.cpp | 42 +- src/duckstation-qt/duckstation-qt.vcxproj | 16 +- .../duckstation-qt.vcxproj.filters | 8 + src/duckstation-sdl/sdl_host_interface.cpp | 16 +- 26 files changed, 1177 insertions(+), 454 deletions(-) diff --git a/android/app/src/main/res/values/arrays.xml b/android/app/src/main/res/values/arrays.xml index ff751334f..efe1d5ae2 100644 --- a/android/app/src/main/res/values/arrays.xml +++ b/android/app/src/main/res/values/arrays.xml @@ -21,6 +21,16 @@ CachedInterpreter Recompiler + + Disabled (Slowest) + MMap (Hardware, Fastest, 64-Bit Only) + LUT (Faster) + + + Disabled + MMap + LUT + Hardware (OpenGL) Hardware (Vulkan) diff --git a/android/app/src/main/res/xml/advanced_preferences.xml b/android/app/src/main/res/xml/advanced_preferences.xml index 6953f5c72..2230c8653 100644 --- a/android/app/src/main/res/xml/advanced_preferences.xml +++ b/android/app/src/main/res/xml/advanced_preferences.xml @@ -47,10 +47,13 @@ app:defaultValue="false" app:summary="Determines whether the CPU's instruction cache is simulated in the recompiler. Improves accuracy at a small cost to performance. If games are running too fast, try enabling this option." app:iconSpaceReserved="false" /> - m_handlers; static std::mutex m_handler_lock; static thread_local bool s_in_handler; -#ifdef __aarch64__ +#if defined(CPU_AARCH32) +static bool IsStoreInstruction(const void* ptr) +{ + u32 bits; + std::memcpy(&bits, ptr, sizeof(bits)); + + // TODO + return false; +} + +#elif defined(CPU_AARCH64) static bool IsStoreInstruction(const void* ptr) { u32 bits; @@ -118,10 +128,13 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx) #ifndef __APPLE__ void* const exception_address = reinterpret_cast(info->si_addr); -#if defined(__x86_64__) +#if defined(CPU_X64) void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.gregs[REG_RIP]); const bool is_write = (static_cast(ctx)->uc_mcontext.gregs[REG_ERR] & 2) != 0; -#elif defined(__aarch64__) +#elif defined(CPU_AARCH32) + void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.arm_pc); + const bool is_write = IsStoreInstruction(exception_pc); +#elif defined(CPU_AARCH64) void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext.pc); const bool is_write = IsStoreInstruction(exception_pc); #else @@ -129,12 +142,12 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx) const bool is_write = false; #endif #else // __APPLE__ -#if defined(__x86_64__) +#if defined(CPU_X64) void* const exception_address = reinterpret_cast(static_cast(ctx)->uc_mcontext->__es.__faultvaddr); void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext->__ss.__rip); const bool is_write = (static_cast(ctx)->uc_mcontext->__es.__err & 2) != 0; -#elif defined(__aarch64__) +#elif defined(CPU_AARCH64) void* const exception_address = reinterpret_cast(static_cast(ctx)->uc_mcontext->__es.__far); void* const exception_pc = reinterpret_cast(static_cast(ctx)->uc_mcontext->__ss.__pc); const bool is_write = IsStoreInstruction(exception_pc); diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6a7bb81ca..f906ba5a9 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -119,25 +119,25 @@ if(WIN32) endif() if(${CPU_ARCH} STREQUAL "x64") - target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak") - target_compile_definitions(core PRIVATE "WITH_RECOMPILER=1" "WITH_FASTMEM=1") + target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak") + target_compile_definitions(core PUBLIC "WITH_RECOMPILER=1" "WITH_MMAP_FASTMEM=1") target_sources(core PRIVATE ${RECOMPILER_SRCS} cpu_recompiler_code_generator_x64.cpp ) message("Building x64 recompiler") elseif(${CPU_ARCH} STREQUAL "aarch32") - target_compile_definitions(core PRIVATE "WITH_RECOMPILER=1") + target_compile_definitions(core PUBLIC "WITH_RECOMPILER=1") target_sources(core PRIVATE ${RECOMPILER_SRCS} cpu_recompiler_code_generator_aarch32.cpp ) - target_link_libraries(core PRIVATE vixl) + target_link_libraries(core PUBLIC vixl) message("Building AArch32 recompiler") elseif(${CPU_ARCH} STREQUAL "aarch64") - target_compile_definitions(core PRIVATE "WITH_RECOMPILER=1" "WITH_FASTMEM=1") + target_compile_definitions(core PUBLIC "WITH_RECOMPILER=1" "WITH_MMAP_FASTMEM=1") target_sources(core PRIVATE ${RECOMPILER_SRCS} cpu_recompiler_code_generator_aarch64.cpp ) - target_link_libraries(core PRIVATE vixl) + target_link_libraries(core PUBLIC vixl) message("Building AArch64 recompiler") else() message("Not building recompiler") diff --git a/src/core/bus.cpp b/src/core/bus.cpp index efbe8be8c..2ff3066fa 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -3,6 +3,7 @@ #include "common/align.h" #include "common/assert.h" #include "common/log.h" +#include "common/make_array.h" #include "common/state_wrapper.h" #include "cpu_code_cache.h" #include "cpu_core.h" @@ -69,7 +70,7 @@ union MEMCTRL }; }; -std::bitset m_ram_code_bits{}; +std::bitset m_ram_code_bits{}; u8* g_ram = nullptr; // 2MB RAM u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM @@ -88,20 +89,24 @@ static std::string m_tty_line_buffer; static Common::MemoryArena m_memory_arena; -#ifdef WITH_FASTMEM +static CPUFastmemMode m_fastmem_mode = CPUFastmemMode::Disabled; + +#ifdef WITH_MMAP_FASTMEM static u8* m_fastmem_base = nullptr; static std::vector m_fastmem_ram_views; #endif +static u8** m_fastmem_lut = nullptr; +static constexpr auto m_fastmem_ram_mirrors = + make_array(0x00000000u, 0x00200000u, 0x00400000u, 0x00600000u, 0x80000000u, 0x80200000u, 0x80400000u, 0x80600000u, + 0xA0000000u, 0xA0200000u, 0xA0400000u, 0xA0600000u); + static std::tuple CalculateMemoryTiming(MEMDELAY mem_delay, COMDELAY common_delay); static void RecalculateMemoryTimings(); static bool AllocateMemory(); -#ifdef WITH_FASTMEM static void SetCodePageFastmemProtection(u32 page_index, bool writable); -static void UnmapFastmemViews(); -#endif #define FIXUP_WORD_READ_OFFSET(offset) ((offset) & ~u32(3)) #define FIXUP_WORD_READ_VALUE(offset, value) ((value) >> (((offset)&u32(3)) * 8u)) @@ -132,17 +137,22 @@ bool Initialize() void Shutdown() { -#ifdef WITH_FASTMEM - UnmapFastmemViews(); + std::free(m_fastmem_lut); + m_fastmem_lut = nullptr; + +#ifdef WITH_MMAP_FASTMEM + m_fastmem_base = nullptr; + m_fastmem_ram_views.clear(); #endif + CPU::g_state.fastmem_base = nullptr; + m_fastmem_mode = CPUFastmemMode::Disabled; + if (g_ram) { m_memory_arena.ReleaseViewPtr(g_ram, RAM_SIZE); g_ram = nullptr; } - - CPU::g_state.fastmem_base = nullptr; } void Reset() @@ -268,95 +278,185 @@ bool AllocateMemory() return true; } -#ifdef WITH_FASTMEM - -void UnmapFastmemViews() +static ALWAYS_INLINE u32 FastmemAddressToLUTPageIndex(u32 address) { - m_fastmem_ram_views.clear(); + return address >> 12; } -void UpdateFastmemViews(bool enabled, bool isolate_cache) +static ALWAYS_INLINE_RELEASE void SetLUTFastmemPage(u32 address, u8* ptr, bool writable) { - UnmapFastmemViews(); - if (!enabled) + m_fastmem_lut[FastmemAddressToLUTPageIndex(address)] = ptr; + m_fastmem_lut[FASTMEM_LUT_NUM_PAGES + FastmemAddressToLUTPageIndex(address)] = writable ? ptr : nullptr; +} + +CPUFastmemMode GetFastmemMode() +{ + return m_fastmem_mode; +} + +void UpdateFastmemViews(CPUFastmemMode mode, bool isolate_cache) +{ +#ifndef WITH_MMAP_FASTMEM + Assert(mode != CPUFastmemMode::MMap); +#else + m_fastmem_ram_views.clear(); +#endif + + m_fastmem_mode = mode; + if (mode == CPUFastmemMode::Disabled) { +#ifdef WITH_MMAP_FASTMEM m_fastmem_base = nullptr; +#endif + std::free(m_fastmem_lut); + m_fastmem_lut = nullptr; return; } - Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true " : "false"); - if (!m_fastmem_base) + Log_DevPrintf("Remapping fastmem area, isolate cache = %s", isolate_cache ? "true" : "false"); + +#ifdef WITH_MMAP_FASTMEM + if (mode == CPUFastmemMode::MMap) { - m_fastmem_base = static_cast(m_memory_arena.FindBaseAddressForMapping(FASTMEM_REGION_SIZE)); + std::free(m_fastmem_lut); + m_fastmem_lut = nullptr; + if (!m_fastmem_base) { - Log_ErrorPrint("Failed to find base address for fastmem"); - return; - } - - Log_InfoPrintf("Fastmem base: %p", m_fastmem_base); - CPU::g_state.fastmem_base = m_fastmem_base; - } - - auto MapRAM = [](u32 base_address, bool writable) { - u8* map_address = m_fastmem_base + base_address; - auto view = m_memory_arena.CreateView(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, writable, false, map_address); - if (!view) - { - Log_ErrorPrintf("Failed to map RAM at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE); - return; - } - - // mark all pages with code as non-writable - for (u32 i = 0; i < CPU_CODE_CACHE_PAGE_COUNT; i++) - { - if (m_ram_code_bits[i]) + m_fastmem_base = static_cast(m_memory_arena.FindBaseAddressForMapping(FASTMEM_REGION_SIZE)); + if (!m_fastmem_base) { - u8* page_address = map_address + (i * CPU_CODE_CACHE_PAGE_SIZE); - if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, false, false)) + Log_ErrorPrint("Failed to find base address for fastmem"); + return; + } + + Log_InfoPrintf("Fastmem base: %p", m_fastmem_base); + CPU::g_state.fastmem_base = m_fastmem_base; + } + + auto MapRAM = [](u32 base_address, bool writable) { + u8* map_address = m_fastmem_base + base_address; + auto view = m_memory_arena.CreateView(MEMORY_ARENA_RAM_OFFSET, RAM_SIZE, writable, false, map_address); + if (!view) + { + Log_ErrorPrintf("Failed to map RAM at fastmem area %p (offset 0x%08X)", map_address, RAM_SIZE); + return; + } + + // mark all pages with code as non-writable + for (u32 i = 0; i < RAM_CODE_PAGE_COUNT; i++) + { + if (m_ram_code_bits[i]) { - Log_ErrorPrintf("Failed to write-protect code page at %p"); - return; + u8* page_address = map_address + (i * HOST_PAGE_SIZE); + if (!m_memory_arena.SetPageProtection(page_address, HOST_PAGE_SIZE, true, false, false)) + { + Log_ErrorPrintf("Failed to write-protect code page at %p"); + return; + } } } + + m_fastmem_ram_views.push_back(std::move(view.value())); + }; + + if (!isolate_cache) + { + // KUSEG - cached + MapRAM(0x00000000, true); + // MapRAM(0x00200000, true); + // MapRAM(0x00400000, true); + // MapRAM(0x00600000, true); + + // KSEG0 - cached + MapRAM(0x80000000, true); + // MapRAM(0x80200000, true); + // MapRAM(0x80400000, true); + // MapRAM(0x80600000, true); } - m_fastmem_ram_views.push_back(std::move(view.value())); - }; + // KSEG1 - uncached + MapRAM(0xA0000000, true); + // MapRAM(0xA0200000, true); + // MapRAM(0xA0400000, true); + // MapRAM(0xA0600000, true); - if (!isolate_cache) + return; + } +#endif + +#ifdef WITH_MMAP_FASTMEM + m_fastmem_base = nullptr; +#endif + + if (!m_fastmem_lut) { - // KUSEG - cached - MapRAM(0x00000000, !isolate_cache); - // MapRAM(0x00200000, !isolate_cache); - // MapRAM(0x00400000, !isolate_cache); - // MapRAM(0x00600000, !isolate_cache); + m_fastmem_lut = static_cast(std::calloc(FASTMEM_LUT_NUM_SLOTS, sizeof(u8*))); + Assert(m_fastmem_lut); - // KSEG0 - cached - MapRAM(0x80000000, !isolate_cache); - // MapRAM(0x80200000, !isolate_cache); - // MapRAM(0x80400000, !isolate_cache); - // MapRAM(0x80600000, !isolate_cache); + Log_InfoPrintf("Fastmem base (software): %p", m_fastmem_lut); + CPU::g_state.fastmem_base = reinterpret_cast(m_fastmem_lut); } + auto MapRAM = [](u32 base_address, bool readable, bool writable) { + if (readable) + { + for (u32 address = 0; address < RAM_SIZE; address += HOST_PAGE_SIZE) + { + SetLUTFastmemPage(base_address + address, &g_ram[address], + !m_ram_code_bits[FastmemAddressToLUTPageIndex(address)]); + } + } + else + { + for (u32 address = 0; address < RAM_SIZE; address += HOST_PAGE_SIZE) + SetLUTFastmemPage(base_address + address, nullptr, false); + } + }; + + // KUSEG - cached + MapRAM(0x00000000, !isolate_cache, !isolate_cache); + MapRAM(0x00200000, !isolate_cache, !isolate_cache); + MapRAM(0x00400000, !isolate_cache, !isolate_cache); + MapRAM(0x00600000, !isolate_cache, !isolate_cache); + + // KSEG0 - cached + MapRAM(0x80000000, !isolate_cache, !isolate_cache); + MapRAM(0x80200000, !isolate_cache, !isolate_cache); + MapRAM(0x80400000, !isolate_cache, !isolate_cache); + MapRAM(0x80600000, !isolate_cache, !isolate_cache); + // KSEG1 - uncached - MapRAM(0xA0000000, true); - // MapRAM(0xA0200000, true); - // MapRAM(0xA0400000, true); - // MapRAM(0xA0600000, true); + MapRAM(0xA0000000, true, true); + MapRAM(0xA0200000, true, true); + MapRAM(0xA0400000, true, true); + MapRAM(0xA0600000, true, true); } bool CanUseFastmemForAddress(VirtualMemoryAddress address) { const PhysicalMemoryAddress paddr = address & CPU::PHYSICAL_MEMORY_ADDRESS_MASK; - // Currently since we don't map the mirrors, don't use fastmem for them. - // This is because the swapping of page code bits for SMC is too expensive. - return (paddr < RAM_SIZE); -} - + switch (m_fastmem_mode) + { +#ifdef WITH_MMAP_FASTMEM + case CPUFastmemMode::MMap: + { + // Currently since we don't map the mirrors, don't use fastmem for them. + // This is because the swapping of page code bits for SMC is too expensive. + return (paddr < RAM_MIRROR_END); + } #endif + case CPUFastmemMode::LUT: + return (paddr < RAM_SIZE); + + case CPUFastmemMode::Disabled: + default: + return false; + } +} + bool IsRAMCodePage(u32 index) { return m_ram_code_bits[index]; @@ -369,10 +469,7 @@ void SetRAMCodePage(u32 index) // protect fastmem pages m_ram_code_bits[index] = true; - -#ifdef WITH_FASTMEM SetCodePageFastmemProtection(index, false); -#endif } void ClearRAMCodePage(u32 index) @@ -382,49 +479,70 @@ void ClearRAMCodePage(u32 index) // unprotect fastmem pages m_ram_code_bits[index] = false; - -#ifdef WITH_FASTMEM SetCodePageFastmemProtection(index, true); -#endif } -#ifdef WITH_FASTMEM - void SetCodePageFastmemProtection(u32 page_index, bool writable) { - // unprotect fastmem pages - for (const auto& view : m_fastmem_ram_views) +#ifdef WITH_MMAP_FASTMEM + if (m_fastmem_mode == CPUFastmemMode::MMap) { - u8* page_address = static_cast(view.GetBasePointer()) + (page_index * CPU_CODE_CACHE_PAGE_SIZE); - if (!m_memory_arena.SetPageProtection(page_address, CPU_CODE_CACHE_PAGE_SIZE, true, writable, false)) + // unprotect fastmem pages + for (const auto& view : m_fastmem_ram_views) { - Log_ErrorPrintf("Failed to %s code page %u (0x%08X) @ %p", writable ? "unprotect" : "protect", page_index, - page_index * CPU_CODE_CACHE_PAGE_SIZE, page_address); + u8* page_address = static_cast(view.GetBasePointer()) + (page_index * HOST_PAGE_SIZE); + if (!m_memory_arena.SetPageProtection(page_address, HOST_PAGE_SIZE, true, writable, false)) + { + Log_ErrorPrintf("Failed to %s code page %u (0x%08X) @ %p", writable ? "unprotect" : "protect", page_index, + page_index * HOST_PAGE_SIZE, page_address); + } } + + return; + } +#endif + + if (m_fastmem_mode == CPUFastmemMode::LUT) + { + // mirrors... + const u32 ram_address = page_index * HOST_PAGE_SIZE; + for (u32 mirror_start : m_fastmem_ram_mirrors) + SetLUTFastmemPage(mirror_start + ram_address, &g_ram[ram_address], writable); } } -#endif - void ClearRAMCodePageFlags() { m_ram_code_bits.reset(); -#ifdef WITH_FASTMEM - // unprotect fastmem pages - for (const auto& view : m_fastmem_ram_views) +#ifdef WITH_MMAP_FASTMEM + if (m_fastmem_mode == CPUFastmemMode::MMap) { - if (!m_memory_arena.SetPageProtection(view.GetBasePointer(), view.GetMappingSize(), true, true, false)) + // unprotect fastmem pages + for (const auto& view : m_fastmem_ram_views) { - Log_ErrorPrintf("Failed to unprotect code pages for fastmem view @ %p", view.GetBasePointer()); + if (!m_memory_arena.SetPageProtection(view.GetBasePointer(), view.GetMappingSize(), true, true, false)) + { + Log_ErrorPrintf("Failed to unprotect code pages for fastmem view @ %p", view.GetBasePointer()); + } } } #endif + + if (m_fastmem_mode == CPUFastmemMode::LUT) + { + for (u32 i = 0; i < RAM_CODE_PAGE_COUNT; i++) + { + const u32 addr = (i * HOST_PAGE_SIZE); + for (u32 mirror_start : m_fastmem_ram_mirrors) + SetLUTFastmemPage(mirror_start + addr, &g_ram[addr], true); + } + } } bool IsCodePageAddress(PhysicalMemoryAddress address) { - return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE] : false; + return IsRAMAddress(address) ? m_ram_code_bits[(address & RAM_MASK) / HOST_PAGE_SIZE] : false; } bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size) @@ -437,11 +555,11 @@ bool HasCodePagesInRange(PhysicalMemoryAddress start_address, u32 size) const u32 end_address = start_address + size; while (start_address < end_address) { - const u32 code_page_index = start_address / CPU_CODE_CACHE_PAGE_SIZE; + const u32 code_page_index = start_address / HOST_PAGE_SIZE; if (m_ram_code_bits[code_page_index]) return true; - start_address += CPU_CODE_CACHE_PAGE_SIZE; + start_address += HOST_PAGE_SIZE; } return false; @@ -499,7 +617,7 @@ ALWAYS_INLINE static TickCount DoRAMAccess(u32 offset, u32& value) } else { - const u32 page_index = offset / CPU_CODE_CACHE_PAGE_SIZE; + const u32 page_index = offset / HOST_PAGE_SIZE; if (m_ram_code_bits[page_index]) CPU::CodeCache::InvalidateBlocksWithPageIndex(page_index); diff --git a/src/core/bus.h b/src/core/bus.h index 6f3cf94ea..b6ea51148 100644 --- a/src/core/bus.h +++ b/src/core/bus.h @@ -82,10 +82,15 @@ enum : size_t // Offsets within the memory arena. MEMORY_ARENA_RAM_OFFSET = 0, -#ifdef WITH_FASTMEM +#ifdef WITH_MMAP_FASTMEM // Fastmem region size is 4GB to cover the entire 32-bit address space. - FASTMEM_REGION_SIZE = UINT64_C(0x100000000) + FASTMEM_REGION_SIZE = UINT64_C(0x100000000), #endif + + RAM_CODE_PAGE_COUNT = (RAM_SIZE + (HOST_PAGE_SIZE + 1)) / HOST_PAGE_SIZE, + + FASTMEM_LUT_NUM_PAGES = 0x100000, // 0x100000000 >> 12 + FASTMEM_LUT_NUM_SLOTS = FASTMEM_LUT_NUM_PAGES * 2, }; bool Initialize(); @@ -93,15 +98,14 @@ void Shutdown(); void Reset(); bool DoState(StateWrapper& sw); -#ifdef WITH_FASTMEM -void UpdateFastmemViews(bool enabled, bool isolate_cache); +CPUFastmemMode GetFastmemMode(); +void UpdateFastmemViews(CPUFastmemMode mode, bool isolate_cache); bool CanUseFastmemForAddress(VirtualMemoryAddress address); -#endif void SetExpansionROM(std::vector data); void SetBIOS(const std::vector& image); -extern std::bitset m_ram_code_bits; +extern std::bitset m_ram_code_bits; extern u8* g_ram; // 2MB RAM extern u8 g_bios[BIOS_SIZE]; // 512K BIOS ROM @@ -114,7 +118,7 @@ ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address) /// Returns the code page index for a RAM address. ALWAYS_INLINE static u32 GetRAMCodePageIndex(PhysicalMemoryAddress address) { - return (address & RAM_MASK) / CPU_CODE_CACHE_PAGE_SIZE; + return (address & RAM_MASK) / HOST_PAGE_SIZE; } /// Returns true if the specified page contains code. diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index a0fc019ce..8d79f8cd3 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -483,7 +483,7 @@ Level4 Disabled - WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) @@ -564,7 +564,7 @@ Level4 Disabled - WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) @@ -678,7 +678,7 @@ MaxSpeed true - WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false @@ -732,7 +732,7 @@ MaxSpeed true - WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_IMGUI=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) true true @@ -844,4 +844,4 @@ true core - + \ No newline at end of file diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 6db67a18c..b3898348f 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -98,7 +98,7 @@ static void UnlinkBlock(CodeBlock* block); static void ClearState(); static BlockMap s_blocks; -static std::array, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map; +static std::array, Bus::RAM_CODE_PAGE_COUNT> m_ram_block_map; #ifdef WITH_RECOMPILER static HostCodeMap s_host_code_map; @@ -106,11 +106,14 @@ static HostCodeMap s_host_code_map; static void AddBlockToHostCodeMap(CodeBlock* block); static void RemoveBlockFromHostCodeMap(CodeBlock* block); -#ifdef WITH_FASTMEM static bool InitializeFastmem(); static void ShutdownFastmem(); -static Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write); -#endif // WITH_FASTMEM +static Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc, void* fault_address, + bool is_write); +#ifdef WITH_MMAP_FASTMEM +static Common::PageFaultHandler::HandlerResult MMapPageFaultHandler(void* exception_pc, void* fault_address, + bool is_write); +#endif #endif // WITH_RECOMPILER void Initialize() @@ -130,10 +133,8 @@ void Initialize() Panic("Failed to initialize code space"); } -#ifdef WITH_FASTMEM if (g_settings.IsUsingFastmem() && !InitializeFastmem()) Panic("Failed to initialize fastmem"); -#endif ResetFastMap(); CompileDispatcher(); @@ -161,10 +162,8 @@ void ClearState() void Shutdown() { ClearState(); -#ifdef WITH_FASTMEM - ShutdownFastmem(); -#endif #ifdef WITH_RECOMPILER + ShutdownFastmem(); s_code_buffer.Destroy(); #endif } @@ -339,10 +338,7 @@ void Reinitialize() #ifdef WITH_RECOMPILER -#ifdef WITH_FASTMEM ShutdownFastmem(); -#endif - s_code_buffer.Destroy(); if (g_settings.IsUsingRecompiler()) @@ -358,10 +354,8 @@ void Reinitialize() Panic("Failed to initialize code space"); } -#ifdef WITH_FASTMEM if (g_settings.IsUsingFastmem() && !InitializeFastmem()) Panic("Failed to initialize fastmem"); -#endif ResetFastMap(); CompileDispatcher(); @@ -620,7 +614,7 @@ void FastCompileBlockFunction() void InvalidateBlocksWithPageIndex(u32 page_index) { - DebugAssert(page_index < CPU_CODE_CACHE_PAGE_COUNT); + DebugAssert(page_index < Bus::RAM_CODE_PAGE_COUNT); auto& blocks = m_ram_block_map[page_index]; for (CodeBlock* block : blocks) { @@ -737,27 +731,37 @@ void RemoveBlockFromHostCodeMap(CodeBlock* block) s_host_code_map.erase(hc_iter); } -#ifdef WITH_FASTMEM - bool InitializeFastmem() { - if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, PageFaultHandler)) + const CPUFastmemMode mode = g_settings.cpu_fastmem_mode; + Assert(mode != CPUFastmemMode::Disabled); + +#ifdef WITH_MMAP_FASTMEM + const auto handler = (mode == CPUFastmemMode::MMap) ? MMapPageFaultHandler : LUTPageFaultHandler; +#else + const auto handler = LUTPageFaultHandler; + Assert(mode != CPUFastmemMode::MMap); +#endif + + if (!Common::PageFaultHandler::InstallHandler(&s_host_code_map, handler)) { Log_ErrorPrintf("Failed to install page fault handler"); return false; } - Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc); + Bus::UpdateFastmemViews(mode, g_state.cop0_regs.sr.Isc); return true; } void ShutdownFastmem() { Common::PageFaultHandler::RemoveHandler(&s_host_code_map); - Bus::UpdateFastmemViews(false, false); + Bus::UpdateFastmemViews(CPUFastmemMode::Disabled, false); } -Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, void* fault_address, bool is_write) +#ifdef WITH_MMAP_FASTMEM + +Common::PageFaultHandler::HandlerResult MMapPageFaultHandler(void* exception_pc, void* fault_address, bool is_write) { if (static_cast(fault_address) < g_state.fastmem_base || (static_cast(fault_address) - g_state.fastmem_base) >= Bus::FASTMEM_REGION_SIZE) @@ -827,7 +831,46 @@ Common::PageFaultHandler::HandlerResult PageFaultHandler(void* exception_pc, voi return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler; } -#endif // WITH_FASTMEM +#endif + +Common::PageFaultHandler::HandlerResult LUTPageFaultHandler(void* exception_pc, void* fault_address, bool is_write) +{ + // use upper_bound to find the next block after the pc + HostCodeMap::iterator upper_iter = + s_host_code_map.upper_bound(reinterpret_cast(exception_pc)); + if (upper_iter == s_host_code_map.begin()) + return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler; + + // then decrement it by one to (hopefully) get the block we want + upper_iter--; + + // find the loadstore info in the code block + CodeBlock* block = upper_iter->second; + for (auto bpi_iter = block->loadstore_backpatch_info.begin(); bpi_iter != block->loadstore_backpatch_info.end(); + ++bpi_iter) + { + Recompiler::LoadStoreBackpatchInfo& lbi = *bpi_iter; + if (lbi.host_pc == exception_pc) + { + // found it, do fixup + if (Recompiler::CodeGenerator::BackpatchLoadStore(lbi)) + { + // remove the backpatch entry since we won't be coming back to this one + block->loadstore_backpatch_info.erase(bpi_iter); + return Common::PageFaultHandler::HandlerResult::ContinueExecution; + } + else + { + Log_ErrorPrintf("Failed to backpatch %p in block 0x%08X", exception_pc, block->GetPC()); + return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler; + } + } + } + + // we didn't find the pc in our list.. + Log_ErrorPrintf("Loadstore PC not found for %p in block 0x%08X", exception_pc, block->GetPC()); + return Common::PageFaultHandler::HandlerResult::ExecuteNextHandler; +} #endif // WITH_RECOMPILER diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index 5f5540f3a..8e2ea7f98 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -89,10 +89,10 @@ struct CodeBlock const u32 GetPC() const { return key.GetPC(); } const u32 GetSizeInBytes() const { return static_cast(instructions.size()) * sizeof(Instruction); } - const u32 GetStartPageIndex() const { return (key.GetPCPhysicalAddress() / CPU_CODE_CACHE_PAGE_SIZE); } + const u32 GetStartPageIndex() const { return (key.GetPCPhysicalAddress() / HOST_PAGE_SIZE); } const u32 GetEndPageIndex() const { - return ((key.GetPCPhysicalAddress() + GetSizeInBytes()) / CPU_CODE_CACHE_PAGE_SIZE); + return ((key.GetPCPhysicalAddress() + GetSizeInBytes()) / HOST_PAGE_SIZE); } bool IsInRAM() const { @@ -131,8 +131,8 @@ void InterpretUncachedBlock(); /// Invalidates any code pages which overlap the specified range. ALWAYS_INLINE void InvalidateCodePages(PhysicalMemoryAddress address, u32 word_count) { - const u32 start_page = address / CPU_CODE_CACHE_PAGE_SIZE; - const u32 end_page = (address + word_count * sizeof(u32) - sizeof(u32)) / CPU_CODE_CACHE_PAGE_SIZE; + const u32 start_page = address / HOST_PAGE_SIZE; + const u32 end_page = (address + word_count * sizeof(u32) - sizeof(u32)) / HOST_PAGE_SIZE; for (u32 page = start_page; page <= end_page; page++) { if (Bus::m_ram_code_bits[page]) diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 440b06373..1e75a8f7a 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -1600,15 +1600,11 @@ bool InterpretInstructionPGXP() return g_state.exception_raised; } -#ifdef WITH_FASTMEM - void UpdateFastmemMapping() { - Bus::UpdateFastmemViews(true, g_state.cop0_regs.sr.Isc); + Bus::UpdateFastmemViews(Bus::GetFastmemMode(), g_state.cop0_regs.sr.Isc); } -#endif - } // namespace Recompiler::Thunks } // namespace CPU diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp index 7fda1ed0e..959221d38 100644 --- a/src/core/cpu_recompiler_code_generator.cpp +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -2260,9 +2260,8 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi) value = AndValues(value, Value::FromConstantU32(write_mask)); } -#ifdef WITH_FASTMEM // changing SR[Isc] needs to update fastmem views - if (reg == Cop0Reg::SR && g_settings.cpu_fastmem) + if (reg == Cop0Reg::SR && g_settings.IsUsingFastmem()) { LabelType skip_fastmem_update; Value old_value = m_register_cache.AllocateScratch(RegSize_32); @@ -2279,9 +2278,6 @@ bool CodeGenerator::Compile_cop0(const CodeBlockInstruction& cbi) { EmitStoreCPUStructField(offset, value); } -#else - EmitStoreCPUStructField(offset, value); -#endif } } diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h index 5ddb6e09e..fcd8a4d20 100644 --- a/src/core/cpu_recompiler_code_generator.h +++ b/src/core/cpu_recompiler_code_generator.h @@ -52,9 +52,12 @@ public: bool signed_divide); void EmitInc(HostReg to_reg, RegSize size); void EmitDec(HostReg to_reg, RegSize size); - void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, bool assume_amount_masked = true); - void EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, bool assume_amount_masked = true); - void EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, bool assume_amount_masked = true); + void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, + bool assume_amount_masked = true); + void EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, + bool assume_amount_masked = true); + void EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, + bool assume_amount_masked = true); void EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value); void EmitOr(HostReg to_reg, HostReg from_reg, const Value& value); void EmitXor(HostReg to_reg, HostReg from_reg, const Value& value); @@ -77,19 +80,17 @@ public: void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr); // Automatically generates an exception handler. + Value GetFastmemLoadBase(); + Value GetFastmemStoreBase(); Value EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec, RegSize size); -#ifdef WITH_FASTMEM void EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result); void EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result); -#endif void EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, bool in_far_code); void EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const Value& address, const SpeculativeValue& address_spec, const Value& value); -#ifdef WITH_FASTMEM void EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value); -#endif void EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, bool in_far_code); @@ -250,6 +251,9 @@ private: bool m_load_delay_dirty = false; bool m_next_load_delay_dirty = false; + bool m_fastmem_load_base_in_register = false; + bool m_fastmem_store_base_in_register = false; + ////////////////////////////////////////////////////////////////////////// // Speculative Constants ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/cpu_recompiler_code_generator_aarch32.cpp b/src/core/cpu_recompiler_code_generator_aarch32.cpp index 1e5512edb..7c21a2835 100644 --- a/src/core/cpu_recompiler_code_generator_aarch32.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch32.cpp @@ -839,6 +839,8 @@ void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Cond u32 CodeGenerator::PrepareStackForCall() { + m_fastmem_load_base_in_register = false; + m_fastmem_store_base_in_register = false; m_register_cache.PushCallerSavedRegisters(); return 0; } @@ -1124,6 +1126,146 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) } } +Value CodeGenerator::GetFastmemLoadBase() +{ + Value val = Value::FromHostReg(&m_register_cache, RARG4, RegSize_32); + if (!m_fastmem_load_base_in_register) + { + m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base))); + m_fastmem_load_base_in_register = true; + } + + return val; +} + +Value CodeGenerator::GetFastmemStoreBase() +{ + Value val = Value::FromHostReg(&m_register_cache, RARG3, RegSize_32); + if (!m_fastmem_store_base_in_register) + { + m_emit->ldr(GetHostReg32(val), a32::MemOperand(GetCPUPtrReg(), offsetof(CPU::State, fastmem_base))); + m_emit->add(GetHostReg32(val), GetHostReg32(val), sizeof(u32*) * Bus::FASTMEM_LUT_NUM_PAGES); + m_fastmem_store_base_in_register = true; + } + + return val; +} + +void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) +{ + Value fastmem_base = GetFastmemLoadBase(); + + HostReg address_reg; + if (address.IsConstant()) + { + m_emit->Mov(GetHostReg32(RSCRATCH), static_cast(address.constant_value)); + address_reg = RSCRATCH; + } + else + { + address_reg = address.host_reg; + } + + m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); + m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK); + m_emit->ldr(GetHostReg32(RARG1), + a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load + + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + default: + UnreachableCode(); + break; + } +} + +void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, + Value& result) +{ + // fastmem + LoadStoreBackpatchInfo bpi; + bpi.address_host_reg = HostReg_Invalid; + bpi.value_host_reg = result.host_reg; + bpi.guest_pc = m_current_instruction->pc; + + Value fastmem_base = GetFastmemLoadBase(); + + HostReg address_reg; + if (address.IsConstant()) + { + m_emit->Mov(GetHostReg32(RSCRATCH), static_cast(address.constant_value)); + address_reg = RSCRATCH; + } + else + { + address_reg = address.host_reg; + } + + m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); + m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK); + m_emit->ldr(GetHostReg32(RARG1), + a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load + + m_register_cache.InhibitAllocation(); + bpi.host_pc = GetCurrentNearCodePointer(); + + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + default: + UnreachableCode(); + break; + } + + EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS)); + + bpi.host_code_size = static_cast( + static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(bpi.host_pc))); + + const bool old_store_fastmem_base = m_fastmem_store_base_in_register; + + // generate slowmem fallback + bpi.host_slowmem_pc = GetCurrentFarCodePointer(); + SwitchToFarCode(); + EmitLoadGuestMemorySlowmem(cbi, address, size, result, true); + + // restore fastmem base state for the next instruction + if (old_store_fastmem_base) + fastmem_base = GetFastmemStoreBase(); + fastmem_base = GetFastmemLoadBase(); + + // return to the block code + EmitBranch(GetCurrentNearCodePointer(), false); + + SwitchToNearCode(); + m_register_cache.UninhibitAllocation(); + + m_block->loadstore_backpatch_info.push_back(bpi); +} + void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, RegSize size, Value& result, bool in_far_code) { @@ -1199,6 +1341,81 @@ void CodeGenerator::EmitLoadGuestMemorySlowmem(const CodeBlockInstruction& cbi, } } +void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, const Value& address, + const Value& value) +{ + LoadStoreBackpatchInfo bpi; + bpi.address_host_reg = HostReg_Invalid; + bpi.value_host_reg = value.host_reg; + bpi.guest_pc = m_current_instruction->pc; + + Value fastmem_base = GetFastmemStoreBase(); + Value actual_value = GetValueInHostRegister(value); + + HostReg address_reg; + if (address.IsConstant()) + { + m_emit->Mov(GetHostReg32(RSCRATCH), static_cast(address.constant_value)); + address_reg = RSCRATCH; + } + else + { + address_reg = address.host_reg; + } + + // TODO: if this gets backpatched, these instructions are wasted + + m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); + m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK); + m_emit->ldr(GetHostReg32(RARG1), + a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load + + m_register_cache.InhibitAllocation(); + bpi.host_pc = GetCurrentNearCodePointer(); + + switch (value.size) + { + case RegSize_8: + m_emit->strb(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_16: + m_emit->strh(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_32: + m_emit->str(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(RARG2))); + break; + + default: + UnreachableCode(); + break; + } + + bpi.host_code_size = static_cast( + static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(bpi.host_pc))); + + const bool old_load_fastmem_base = m_fastmem_load_base_in_register; + + // generate slowmem fallback + bpi.host_slowmem_pc = GetCurrentFarCodePointer(); + SwitchToFarCode(); + EmitStoreGuestMemorySlowmem(cbi, address, actual_value, true); + + // restore fastmem base state for the next instruction + if (old_load_fastmem_base) + fastmem_base = GetFastmemLoadBase(); + fastmem_base = GetFastmemStoreBase(); + + // return to the block code + EmitBranch(GetCurrentNearCodePointer(), false); + + SwitchToNearCode(); + m_register_cache.UninhibitAllocation(); + + m_block->loadstore_backpatch_info.push_back(bpi); +} + void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, const Value& address, const Value& value, bool in_far_code) { @@ -1278,6 +1495,35 @@ void CodeGenerator::EmitStoreGuestMemorySlowmem(const CodeBlockInstruction& cbi, } } +bool CodeGenerator::BackpatchLoadStore(const LoadStoreBackpatchInfo& lbi) +{ + Log_DevPrintf("Backpatching %p (guest PC 0x%08X) to slowmem at %p", lbi.host_pc, lbi.guest_pc, lbi.host_slowmem_pc); + + // turn it into a jump to the slowmem handler + vixl::aarch32::MacroAssembler emit(static_cast(lbi.host_pc), lbi.host_code_size, a32::A32); + + // check jump distance + const s32 displacement = GetPCDisplacement(lbi.host_pc, lbi.host_slowmem_pc); + if (!IsPCDisplacementInImmediateRange(displacement)) + { + emit.Mov(GetHostReg32(RSCRATCH), reinterpret_cast(lbi.host_slowmem_pc)); + emit.bx(GetHostReg32(RSCRATCH)); + } + else + { + a32::Label label(displacement + emit.GetCursorOffset()); + emit.b(&label); + } + + const s32 nops = (static_cast(lbi.host_code_size) - static_cast(emit.GetCursorOffset())) / 4; + Assert(nops >= 0); + for (s32 i = 0; i < nops; i++) + emit.nop(); + + JitCodeBuffer::FlushInstructionCache(lbi.host_pc, lbi.host_code_size); + return true; +} + void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) { EmitLoadGlobalAddress(RSCRATCH, ptr); diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index 6ada14a32..a4f902167 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -1298,34 +1298,63 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) { + HostReg address_reg; a64::MemOperand actual_address; if (address.IsConstant()) { m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value); - actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg)); + address_reg = result.host_reg; } else { - actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address)); + address_reg = address.host_reg; } - switch (size) + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) { - case RegSize_8: - m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address); - break; + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; - case RegSize_16: - m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address); - break; + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; - case RegSize_32: - m_emit->Ldr(GetHostReg32(result.host_reg), actual_address); - break; + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; - default: - UnreachableCode(); - break; + default: + UnreachableCode(); + break; + } + } + else + { + m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); + m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK); + m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3)); + + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + default: + UnreachableCode(); + break; + } } } @@ -1334,42 +1363,72 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, { // fastmem LoadStoreBackpatchInfo bpi; - bpi.host_pc = GetCurrentNearCodePointer(); bpi.address_host_reg = HostReg_Invalid; bpi.value_host_reg = result.host_reg; bpi.guest_pc = m_current_instruction->pc; - a64::MemOperand actual_address; + HostReg address_reg; if (address.IsConstant()) { m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value); - actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(result.host_reg)); - bpi.host_pc = GetCurrentNearCodePointer(); + address_reg = result.host_reg; } else { - actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address)); + address_reg = address.host_reg; } m_register_cache.InhibitAllocation(); - switch (size) + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) { - case RegSize_8: - m_emit->Ldrb(GetHostReg32(result.host_reg), actual_address); - break; + bpi.host_pc = GetCurrentNearCodePointer(); - case RegSize_16: - m_emit->Ldrh(GetHostReg32(result.host_reg), actual_address); - break; + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; - case RegSize_32: - m_emit->Ldr(GetHostReg32(result.host_reg), actual_address); - break; + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; - default: - UnreachableCode(); - break; + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; + + default: + UnreachableCode(); + break; + } + } + else + { + m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); + m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK); + m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3)); + + bpi.host_pc = GetCurrentNearCodePointer(); + + switch (size) + { + case RegSize_8: + m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_16: + m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_32: + m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + default: + UnreachableCode(); + break; + } } EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(Bus::RAM_READ_TICKS)); @@ -1472,42 +1531,72 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, // fastmem LoadStoreBackpatchInfo bpi; - bpi.host_pc = GetCurrentNearCodePointer(); bpi.address_host_reg = HostReg_Invalid; bpi.value_host_reg = value.host_reg; bpi.guest_pc = m_current_instruction->pc; - a64::MemOperand actual_address; + HostReg address_reg; if (address.IsConstant()) { m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value); - actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RSCRATCH)); - bpi.host_pc = GetCurrentNearCodePointer(); + address_reg = RSCRATCH; } else { - actual_address = a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address)); + address_reg = address.host_reg; } m_register_cache.InhibitAllocation(); - - switch (value.size) + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) { - case RegSize_8: - m_emit->Strb(GetHostReg8(value_in_hr), actual_address); - break; + bpi.host_pc = GetCurrentNearCodePointer(); - case RegSize_16: - m_emit->Strh(GetHostReg16(value_in_hr), actual_address); - break; + switch (value.size) + { + case RegSize_8: + m_emit->strb(GetHostReg8(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; - case RegSize_32: - m_emit->Str(GetHostReg32(value_in_hr), actual_address); - break; + case RegSize_16: + m_emit->strh(GetHostReg16(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; - default: - UnreachableCode(); - break; + case RegSize_32: + m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(address_reg))); + break; + + default: + UnreachableCode(); + break; + } + } + else + { + m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); + m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK); + m_emit->add(GetHostReg64(RARG3), GetFastmemBasePtrReg(), Bus::FASTMEM_LUT_NUM_PAGES * sizeof(u32*)); + m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetHostReg64(RARG3), GetHostReg32(RARG1), a64::LSL, 3)); + + bpi.host_pc = GetCurrentNearCodePointer(); + + switch (value.size) + { + case RegSize_8: + m_emit->strb(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_16: + m_emit->strh(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + case RegSize_32: + m_emit->str(GetHostReg32(value_in_hr.host_reg), a64::MemOperand(GetHostReg64(RARG1), GetHostReg32(RARG2))); + break; + + default: + UnreachableCode(); + break; + } } bpi.host_code_size = static_cast( diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp index 043ef7055..ddd4e3346 100644 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -41,8 +41,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const { Value result = m_register_cache.AllocateScratch(size); -#ifdef WITH_FASTMEM - if (g_settings.IsUsingFastmem() && Bus::IsRAMAddress(static_cast(address.constant_value))) { // have to mask away the high bits for mirrors, since we don't map them in fastmem @@ -54,12 +52,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const EmitLoadGlobal(result.GetHostRegister(), size, ptr); } -#else - - EmitLoadGlobal(result.GetHostRegister(), size, ptr); - -#endif - m_delayed_cycles_add += read_ticks; return result; } @@ -67,7 +59,7 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const AddPendingCycles(true); -#ifdef WITH_FASTMEM + Value result = m_register_cache.AllocateScratch(HostPointerSize); const bool use_fastmem = address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true; if (address_spec) @@ -82,7 +74,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const use_fastmem ? "yes" : "no"); } - Value result = m_register_cache.AllocateScratch(RegSize_64); if (g_settings.IsUsingFastmem() && use_fastmem) { EmitLoadGuestMemoryFastmem(cbi, address, size, result); @@ -93,14 +84,6 @@ Value CodeGenerator::EmitLoadGuestMemory(const CodeBlockInstruction& cbi, const EmitLoadGuestMemorySlowmem(cbi, address, size, result, false); } -#else - - Value result = m_register_cache.AllocateScratch(HostPointerSize); - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitLoadGuestMemorySlowmem(cbi, address, size, result, false); - -#endif - // Downcast to ignore upper 56/48/32 bits. This should be a noop. if (result.size != size) { @@ -145,8 +128,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const AddPendingCycles(true); -#ifdef WITH_FASTMEM - const bool use_fastmem = address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true; if (address_spec) { @@ -169,13 +150,6 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const m_register_cache.FlushCallerSavedGuestRegisters(true, true); EmitStoreGuestMemorySlowmem(cbi, address, value, false); } - -#else - - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitStoreGuestMemorySlowmem(cbi, address, value, false); - -#endif } #ifndef CPU_X64 diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp index 4cea1c964..6ecef3c44 100644 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -1760,61 +1760,88 @@ void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) { - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - if (address.IsConstant() && address.constant_value >= 0x80000000) + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) { - actual_address = &result; - m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); + // can't store displacements > 0x80000000 in-line + const Value* actual_address = &address; + if (address.IsConstant() && address.constant_value >= 0x80000000) + { + actual_address = &result; + m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); + } + + // TODO: movsx/zx inline here + switch (size) + { + case RegSize_8: + { + if (actual_address->IsConstant()) + { + m_emit->mov(GetHostReg8(result.host_reg), + m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]); + } + else + { + m_emit->mov(GetHostReg8(result.host_reg), + m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); + } + } + break; + + case RegSize_16: + { + if (actual_address->IsConstant()) + { + m_emit->mov(GetHostReg16(result.host_reg), + m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]); + } + else + { + m_emit->mov(GetHostReg16(result.host_reg), + m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); + } + } + break; + + case RegSize_32: + { + if (actual_address->IsConstant()) + { + m_emit->mov(GetHostReg32(result.host_reg), + m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]); + } + else + { + m_emit->mov(GetHostReg32(result.host_reg), + m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); + } + } + break; + } } - - // TODO: movsx/zx inline here - switch (size) + else { - case RegSize_8: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; + // TODO: We could mask the LSBs here for unaligned protection. + EmitCopyValue(RARG1, address); + m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1)); + m_emit->shr(GetHostReg32(RARG1), 12); + m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK); + m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); - case RegSize_16: + switch (size) { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; + case RegSize_8: + m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); + break; - case RegSize_32: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } + case RegSize_16: + m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); + break; + + case RegSize_32: + m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); + break; } - break; } } @@ -1828,63 +1855,93 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi, bpi.value_host_reg = result.host_reg; bpi.guest_pc = m_current_instruction->pc; - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - if (address.IsConstant() && address.constant_value >= 0x80000000) + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) { - actual_address = &result; - m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); - bpi.host_pc = GetCurrentNearCodePointer(); + // can't store displacements > 0x80000000 in-line + const Value* actual_address = &address; + if (address.IsConstant() && address.constant_value >= 0x80000000) + { + actual_address = &result; + m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); + bpi.host_pc = GetCurrentNearCodePointer(); + } + + m_register_cache.InhibitAllocation(); + + switch (size) + { + case RegSize_8: + { + if (actual_address->IsConstant()) + { + m_emit->mov(GetHostReg8(result.host_reg), + m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]); + } + else + { + m_emit->mov(GetHostReg8(result.host_reg), + m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); + } + } + break; + + case RegSize_16: + { + if (actual_address->IsConstant()) + { + m_emit->mov(GetHostReg16(result.host_reg), + m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]); + } + else + { + m_emit->mov(GetHostReg16(result.host_reg), + m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); + } + } + break; + + case RegSize_32: + { + if (actual_address->IsConstant()) + { + m_emit->mov(GetHostReg32(result.host_reg), + m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]); + } + else + { + m_emit->mov(GetHostReg32(result.host_reg), + m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); + } + } + break; + } } - - m_register_cache.InhibitAllocation(); - - switch (size) + else { - case RegSize_8: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg8(result.host_reg), - m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; + m_register_cache.InhibitAllocation(); - case RegSize_16: - { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg16(result.host_reg), - m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } - } - break; + // TODO: We could mask the LSBs here for unaligned protection. + EmitCopyValue(RARG1, address); + m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1)); + m_emit->shr(GetHostReg32(RARG1), 12); + m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK); + m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); + bpi.host_pc = GetCurrentNearCodePointer(); - case RegSize_32: + switch (size) { - if (actual_address->IsConstant()) - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value]); - } - else - { - m_emit->mov(GetHostReg32(result.host_reg), - m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)]); - } + case RegSize_8: + m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); + break; + + case RegSize_16: + m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); + break; + + case RegSize_32: + m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]); + break; } - break; } // TODO: BIOS reads... @@ -1997,110 +2054,156 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi, bpi.value_host_reg = value.host_reg; bpi.guest_pc = m_current_instruction->pc; - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - Value temp_address; - if (address.IsConstant() && address.constant_value >= 0x80000000) + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) { - temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32); - actual_address = &temp_address; - m_emit->mov(GetHostReg32(temp_address), address.constant_value); - bpi.host_pc = GetCurrentNearCodePointer(); + // can't store displacements > 0x80000000 in-line + const Value* actual_address = &address; + Value temp_address; + if (address.IsConstant() && address.constant_value >= 0x80000000) + { + temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32); + actual_address = &temp_address; + m_emit->mov(GetHostReg32(temp_address), address.constant_value); + bpi.host_pc = GetCurrentNearCodePointer(); + } + + m_register_cache.InhibitAllocation(); + + switch (value.size) + { + case RegSize_8: + { + if (actual_address->IsConstant()) + { + if (value.IsConstant()) + { + m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value); + } + else + { + m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], + GetHostReg8(value.host_reg)); + } + } + else + { + if (value.IsConstant()) + { + m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], + value.constant_value); + } + else + { + m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], + GetHostReg8(value.host_reg)); + } + } + } + break; + + case RegSize_16: + { + if (actual_address->IsConstant()) + { + if (value.IsConstant()) + { + m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value); + } + else + { + m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], + GetHostReg16(value.host_reg)); + } + } + else + { + if (value.IsConstant()) + { + m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], + value.constant_value); + } + else + { + m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], + GetHostReg16(value.host_reg)); + } + } + } + break; + + case RegSize_32: + { + if (actual_address->IsConstant()) + { + if (value.IsConstant()) + { + m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value); + } + else + { + m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], + GetHostReg32(value.host_reg)); + } + } + else + { + if (value.IsConstant()) + { + m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], + value.constant_value); + } + else + { + m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], + GetHostReg32(value.host_reg)); + } + } + } + break; + } } - - m_register_cache.InhibitAllocation(); - - switch (value.size) + else { - case RegSize_8: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value); - } - else - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + actual_address->constant_value], - GetHostReg8(value.host_reg)); - } - } - else - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - value.constant_value); - } - else - { - m_emit->mov(m_emit->byte[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - GetHostReg8(value.host_reg)); - } - } - } - break; + m_register_cache.InhibitAllocation(); - case RegSize_16: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value); - } - else - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + actual_address->constant_value], - GetHostReg16(value.host_reg)); - } - } - else - { - if (value.IsConstant()) - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - value.constant_value); - } - else - { - m_emit->mov(m_emit->word[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - GetHostReg16(value.host_reg)); - } - } - } - break; + // TODO: We could mask the LSBs here for unaligned protection. + EmitCopyValue(RARG1, address); + m_emit->mov(GetHostReg32(RARG2), GetHostReg32(RARG1)); + m_emit->shr(GetHostReg32(RARG1), 12); + m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK); + m_emit->mov(GetHostReg64(RARG1), + m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8 + (Bus::FASTMEM_LUT_NUM_PAGES * 8)]); + bpi.host_pc = GetCurrentNearCodePointer(); - case RegSize_32: + switch (value.size) { - if (actual_address->IsConstant()) + case RegSize_8: { if (value.IsConstant()) - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], value.constant_value); - } + m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value); else - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + actual_address->constant_value], - GetHostReg32(value.host_reg)); - } + m_emit->mov(m_emit->byte[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg8(value)); } - else + break; + + case RegSize_16: { if (value.IsConstant()) - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - value.constant_value); - } + m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value); else - { - m_emit->mov(m_emit->dword[GetFastmemBasePtrReg() + GetHostReg64(actual_address->host_reg)], - GetHostReg32(value.host_reg)); - } + m_emit->mov(m_emit->word[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg16(value)); } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], value.constant_value); + else + m_emit->mov(m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)], GetHostReg32(value)); + } + break; } - break; } // insert nops, we need at least 5 bytes for a relative jump diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h index aa9330e77..400ff9131 100644 --- a/src/core/cpu_recompiler_thunks.h +++ b/src/core/cpu_recompiler_thunks.h @@ -32,12 +32,8 @@ void UncheckedWriteMemoryByte(u32 address, u8 value); void UncheckedWriteMemoryHalfWord(u32 address, u16 value); void UncheckedWriteMemoryWord(u32 address, u32 value); -#ifdef WITH_FASTMEM - void UpdateFastmemMapping(); -#endif - } // namespace Recompiler::Thunks } // namespace CPU diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index b7d8d48a7..39a5fc645 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -425,7 +425,7 @@ void HostInterface::SetDefaultSettings(SettingsInterface& si) si.SetStringValue("CPU", "ExecutionMode", Settings::GetCPUExecutionModeName(Settings::DEFAULT_CPU_EXECUTION_MODE)); si.SetBoolValue("CPU", "RecompilerMemoryExceptions", false); si.SetBoolValue("CPU", "ICache", false); - si.SetBoolValue("CPU", "Fastmem", true); + si.SetBoolValue("CPU", "FastmemMode", Settings::GetCPUFastmemModeName(Settings::DEFAULT_CPU_FASTMEM_MODE)); si.SetStringValue("GPU", "Renderer", Settings::GetRendererName(Settings::DEFAULT_GPU_RENDERER)); si.SetIntValue("GPU", "ResolutionScale", 1); @@ -548,6 +548,15 @@ void HostInterface::FixIncompatibleSettings(bool display_osd_messages) g_settings.cpu_execution_mode = CPUExecutionMode::CachedInterpreter; } } + +#ifndef WITH_MMAP_FASTMEM + if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) + { + AddOSDMessage( + TranslateStdString("OSDMessage", "mmap fastmem is not available on this platform, using LUT instead."), 20.0f); + g_settings.cpu_fastmem_mode = CPUFastmemMode::LUT; + } +#endif } void HostInterface::SaveSettings(SettingsInterface& si) @@ -594,7 +603,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) System::UpdateThrottlePeriod(); if (g_settings.cpu_execution_mode != old_settings.cpu_execution_mode || - g_settings.cpu_fastmem != old_settings.cpu_fastmem) + g_settings.cpu_fastmem_mode != old_settings.cpu_fastmem_mode) { AddFormattedOSDMessage( 5.0f, TranslateString("OSDMessage", "Switching to %s CPU execution mode."), diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 15bdbcb48..8dadf8aca 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -130,7 +130,9 @@ void Settings::Load(SettingsInterface& si) UpdateOverclockActive(); cpu_recompiler_memory_exceptions = si.GetBoolValue("CPU", "RecompilerMemoryExceptions", false); cpu_recompiler_icache = si.GetBoolValue("CPU", "RecompilerICache", false); - cpu_fastmem = si.GetBoolValue("CPU", "Fastmem", true); + cpu_fastmem_mode = ParseCPUFastmemMode( + si.GetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(DEFAULT_CPU_FASTMEM_MODE)).c_str()) + .value_or(DEFAULT_CPU_FASTMEM_MODE); gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str()) .value_or(DEFAULT_GPU_RENDERER); @@ -266,7 +268,7 @@ void Settings::Save(SettingsInterface& si) const si.SetIntValue("CPU", "OverclockDenominator", cpu_overclock_denominator); si.SetBoolValue("CPU", "RecompilerMemoryExceptions", cpu_recompiler_memory_exceptions); si.SetBoolValue("CPU", "RecompilerICache", cpu_recompiler_icache); - si.SetBoolValue("CPU", "Fastmem", cpu_fastmem); + si.SetStringValue("CPU", "FastmemMode", GetCPUFastmemModeName(cpu_fastmem_mode)); si.SetStringValue("GPU", "Renderer", GetRendererName(gpu_renderer)); si.SetStringValue("GPU", "Adapter", gpu_adapter.c_str()); @@ -484,6 +486,37 @@ const char* Settings::GetCPUExecutionModeDisplayName(CPUExecutionMode mode) return s_cpu_execution_mode_display_names[static_cast(mode)]; } +static std::array(CPUFastmemMode::Count)> s_cpu_fastmem_mode_names = { + {"Disabled", "MMap", "LUT"}}; +static std::array(CPUFastmemMode::Count)> s_cpu_fastmem_mode_display_names = { + {TRANSLATABLE("CPUFastmemMode", "Disabled (Slowest)"), + TRANSLATABLE("CPUFastmemMode", "MMap (Hardware, Fastest, 64-Bit Only)"), + TRANSLATABLE("CPUFastmemMode", "LUT (Faster)")}}; + +std::optional Settings::ParseCPUFastmemMode(const char* str) +{ + u8 index = 0; + for (const char* name : s_cpu_fastmem_mode_names) + { + if (StringUtil::Strcasecmp(name, str) == 0) + return static_cast(index); + + index++; + } + + return std::nullopt; +} + +const char* Settings::GetCPUFastmemModeName(CPUFastmemMode mode) +{ + return s_cpu_fastmem_mode_names[static_cast(mode)]; +} + +const char* Settings::GetCPUFastmemModeDisplayName(CPUFastmemMode mode) +{ + return s_cpu_fastmem_mode_display_names[static_cast(mode)]; +} + static constexpr auto s_gpu_renderer_names = make_array( #ifdef WIN32 "D3D11", diff --git a/src/core/settings.h b/src/core/settings.h index 00214b055..da879b4ff 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -76,7 +76,7 @@ struct Settings bool cpu_overclock_active = false; bool cpu_recompiler_memory_exceptions = false; bool cpu_recompiler_icache = false; - bool cpu_fastmem = true; + CPUFastmemMode cpu_fastmem_mode = CPUFastmemMode::Disabled; float emulation_speed = 1.0f; float fast_forward_speed = 0.0f; @@ -188,7 +188,8 @@ struct Settings ALWAYS_INLINE bool IsUsingFastmem() const { - return (cpu_fastmem && cpu_execution_mode == CPUExecutionMode::Recompiler && !cpu_recompiler_memory_exceptions); + return (cpu_fastmem_mode != CPUFastmemMode::Disabled && cpu_execution_mode == CPUExecutionMode::Recompiler && + !cpu_recompiler_memory_exceptions); } bool HasAnyPerGameMemoryCards() const; @@ -227,6 +228,10 @@ struct Settings static const char* GetCPUExecutionModeName(CPUExecutionMode mode); static const char* GetCPUExecutionModeDisplayName(CPUExecutionMode mode); + static std::optional ParseCPUFastmemMode(const char* str); + static const char* GetCPUFastmemModeName(CPUFastmemMode mode); + static const char* GetCPUFastmemModeDisplayName(CPUFastmemMode mode); + static std::optional ParseRendererName(const char* str); static const char* GetRendererName(GPURenderer renderer); static const char* GetRendererDisplayName(GPURenderer renderer); @@ -264,7 +269,17 @@ struct Settings static constexpr GPUTextureFilter DEFAULT_GPU_TEXTURE_FILTER = GPUTextureFilter::Nearest; static constexpr ConsoleRegion DEFAULT_CONSOLE_REGION = ConsoleRegion::Auto; +#ifdef WITH_RECOMPILER static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler; +#ifdef WITH_MMAP_FASTMEM + static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::MMap; +#else + static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::LUT; +#endif +#else + static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter; + static constexpr CPUFastmemMode DEFAULT_CPU_FASTMEM_MODE = CPUFastmemMode::Disabled; +#endif #ifndef ANDROID static constexpr AudioBackend DEFAULT_AUDIO_BACKEND = AudioBackend::Cubeb; diff --git a/src/core/types.h b/src/core/types.h index c447e0293..f70d2eb5f 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -135,8 +135,16 @@ enum : u32 NUM_CONTROLLER_AND_CARD_PORTS = 2 }; -enum : u32 +enum class CPUFastmemMode { - CPU_CODE_CACHE_PAGE_SIZE = 4096, - CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE + Disabled, + MMap, + LUT, + Count +}; + +enum : size_t +{ + HOST_PAGE_SIZE = 4096, + HOST_PAGE_OFFSET_MASK = HOST_PAGE_SIZE - 1, }; diff --git a/src/duckstation-libretro/libretro_host_interface.cpp b/src/duckstation-libretro/libretro_host_interface.cpp index 99d7ae72f..8588ed578 100644 --- a/src/duckstation-libretro/libretro_host_interface.cpp +++ b/src/duckstation-libretro/libretro_host_interface.cpp @@ -788,12 +788,17 @@ static std::array s_option_definitions = {{ "to performance. If games are running too fast, try enabling this option.", {{"true", "Enabled"}, {"false", "Disabled"}}, "false"}, - {"duckstation_CPU.Fastmem", + {"duckstation_CPU.FastmemMode", "CPU Recompiler Fast Memory Access", "Uses page faults to determine hardware memory accesses at runtime. Can provide a significant performance " "improvement in some games, but make the core more difficult to debug.", - {{"true", "Enabled"}, {"false", "Disabled"}}, - "true"}, + {{"Disabled", "Disabled (Slowest)"}, {"MMap", "MMap (Hardware, Fastest, 64-Bit Only)"}, {"LUT", "LUT (Faster)"}}, +#if defined(CPU_X64) || defined(CPU_AARCH64) + "MMap" +#else + "LUT" +#endif + }, {}, }}; diff --git a/src/duckstation-qt/advancedsettingswidget.cpp b/src/duckstation-qt/advancedsettingswidget.cpp index bef59261f..ae7ce56f1 100644 --- a/src/duckstation-qt/advancedsettingswidget.cpp +++ b/src/duckstation-qt/advancedsettingswidget.cpp @@ -57,6 +57,40 @@ static void setIntRangeTweakOption(QTableWidget* table, int row, int value) cb->setValue(value); } +template +static void addChoiceTweakOption(QtHostInterface* host_interface, QTableWidget* table, QString name, + std::string section, std::string key, std::optional (*parse_callback)(const char*), + const char* (*get_value_callback)(T), const char* (*get_display_callback)(T), + const char* tr_context, u32 num_values, T default_value) +{ + const int row = table->rowCount(); + const std::string current_value = + host_interface->GetStringSettingValue(section.c_str(), key.c_str(), get_value_callback(default_value)); + + table->insertRow(row); + + QTableWidgetItem* name_item = new QTableWidgetItem(name); + name_item->setFlags(name_item->flags() & ~(Qt::ItemIsEditable | Qt::ItemIsSelectable)); + table->setItem(row, 0, name_item); + + QComboBox* cb = new QComboBox(table); + for (u32 i = 0; i < num_values; i++) + cb->addItem(qApp->translate(tr_context, get_display_callback(static_cast(i)))); + + SettingWidgetBinder::BindWidgetToEnumSetting(host_interface, cb, std::move(section), std::move(key), parse_callback, + get_value_callback, default_value); + table->setCellWidget(row, 1, cb); +} + +template +static void setChoiceTweakOption(QTableWidget* table, int row, T value) +{ + QWidget* widget = table->cellWidget(row, 1); + QComboBox* cb = qobject_cast(widget); + Assert(cb); + cb->setCurrentIndex(static_cast(value)); +} + AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, QWidget* parent, SettingsDialog* dialog) : QWidget(parent), m_host_interface(host_interface) { @@ -90,8 +124,10 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(QtHostInterface* host_interface, addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Memory Exceptions"), "CPU", "RecompilerMemoryExceptions", false); - addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Fast Memory Access"), "CPU", - "Fastmem", true); + addChoiceTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler Fast Memory Access"), "CPU", + "FastmemMode", Settings::ParseCPUFastmemMode, Settings::GetCPUFastmemModeName, + Settings::GetCPUFastmemModeDisplayName, "CPUFastmemMode", + static_cast(CPUFastmemMode::Count), Settings::DEFAULT_CPU_FASTMEM_MODE); addBooleanTweakOption(m_host_interface, m_ui.tweakOptionTable, tr("Enable Recompiler ICache"), "CPU", "RecompilerICache", false); @@ -119,7 +155,7 @@ void AdvancedSettingsWidget::onResetToDefaultClicked() setBooleanTweakOption(m_ui.tweakOptionTable, 1, false); setBooleanTweakOption(m_ui.tweakOptionTable, 2, false); setBooleanTweakOption(m_ui.tweakOptionTable, 3, false); - setBooleanTweakOption(m_ui.tweakOptionTable, 4, true); + setChoiceTweakOption(m_ui.tweakOptionTable, 4, Settings::DEFAULT_CPU_FASTMEM_MODE); setBooleanTweakOption(m_ui.tweakOptionTable, 5, false); setIntRangeTweakOption(m_ui.tweakOptionTable, 6, static_cast(Settings::DEFAULT_DMA_MAX_SLICE_TICKS)); setIntRangeTweakOption(m_ui.tweakOptionTable, 7, static_cast(Settings::DEFAULT_DMA_HALT_TICKS)); diff --git a/src/duckstation-qt/duckstation-qt.vcxproj b/src/duckstation-qt/duckstation-qt.vcxproj index 3d7fe4e40..dd3f79285 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj +++ b/src/duckstation-qt/duckstation-qt.vcxproj @@ -516,7 +516,7 @@ Level4 Disabled - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) @@ -538,7 +538,7 @@ Level4 Disabled - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) @@ -584,7 +584,7 @@ Level4 Disabled - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) @@ -608,7 +608,7 @@ Level4 Disabled - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) @@ -682,7 +682,7 @@ MaxSpeed true - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) true false @@ -706,7 +706,7 @@ MaxSpeed true - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) true false @@ -730,7 +730,7 @@ MaxSpeed true - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) true true @@ -755,7 +755,7 @@ MaxSpeed true - WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + WITH_DISCORD_PRESENCE=1;WITH_SDL2=1;WITH_RECOMPILER=1;WITH_MMAP_FASTMEM=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) $(SolutionDir)dep\glad\Include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\minizip\include;$(SolutionDir)dep\vulkan-loader\include;$(SolutionDir)src;$(SolutionDir)dep\msvc\qt5-x64\include;%(AdditionalIncludeDirectories) true true diff --git a/src/duckstation-qt/duckstation-qt.vcxproj.filters b/src/duckstation-qt/duckstation-qt.vcxproj.filters index 82bc61a72..fddd6d03b 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj.filters +++ b/src/duckstation-qt/duckstation-qt.vcxproj.filters @@ -61,6 +61,10 @@ + + + + @@ -105,6 +109,8 @@ + + @@ -124,6 +130,8 @@ + + diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index a1f58bd36..cb86449d9 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -913,7 +913,21 @@ void SDLHostInterface::DrawQuickSettingsMenu() settings_changed |= ImGui::MenuItem("Recompiler Memory Exceptions", nullptr, &m_settings_copy.cpu_recompiler_memory_exceptions); - settings_changed |= ImGui::MenuItem("Recompiler Fastmem", nullptr, &m_settings_copy.cpu_fastmem); + if (ImGui::BeginMenu("Recompiler Fastmem")) + { + for (u32 i = 0; i < static_cast(CPUFastmemMode::Count); i++) + { + if (ImGui::MenuItem(Settings::GetCPUFastmemModeDisplayName(static_cast(i)), nullptr, + m_settings_copy.cpu_fastmem_mode == static_cast(i))) + { + m_settings_copy.cpu_fastmem_mode = static_cast(i); + settings_changed = true; + } + } + + ImGui::EndMenu(); + } + settings_changed |= ImGui::MenuItem("Recompiler ICache", nullptr, &m_settings_copy.cpu_recompiler_icache); ImGui::Separator();