CPU: Default to new recompiler/remove old recompiler

This commit is contained in:
Stenzek 2024-11-21 16:05:39 +10:00
parent d2d06adeeb
commit f67eacc071
No known key found for this signature in database
28 changed files with 1937 additions and 14394 deletions

View File

@ -127,20 +127,12 @@ add_library(core
) )
set(RECOMPILER_SRCS set(RECOMPILER_SRCS
cpu_recompiler_code_generator.cpp cpu_recompiler.cpp
cpu_recompiler_code_generator.h cpu_recompiler.h
cpu_recompiler_code_generator_generic.cpp
cpu_recompiler_register_cache.cpp
cpu_recompiler_register_cache.h
cpu_recompiler_thunks.h cpu_recompiler_thunks.h
cpu_recompiler_types.h cpu_recompiler_types.h
) )
set(NEWREC_SOURCES
cpu_newrec_compiler.cpp
cpu_newrec_compiler.h
)
target_precompile_headers(core PRIVATE "pch.h") target_precompile_headers(core PRIVATE "pch.h")
target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
@ -148,11 +140,11 @@ target_link_libraries(core PUBLIC Threads::Threads common util)
target_link_libraries(core PRIVATE xxhash imgui rapidyaml rcheevos cpuinfo::cpuinfo ZLIB::ZLIB Zstd::Zstd libzip::zip) target_link_libraries(core PRIVATE xxhash imgui rapidyaml rcheevos cpuinfo::cpuinfo ZLIB::ZLIB Zstd::Zstd libzip::zip)
if(CPU_ARCH_X64) if(CPU_ARCH_X64)
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1") target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES} target_sources(core PRIVATE
cpu_recompiler_code_generator_x64.cpp ${RECOMPILER_SRCS}
cpu_newrec_compiler_x64.cpp cpu_recompiler_x64.cpp
cpu_newrec_compiler_x64.h cpu_recompiler_x64.h
) )
target_link_libraries(core PRIVATE xbyak) target_link_libraries(core PRIVATE xbyak)
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
@ -161,33 +153,34 @@ if(CPU_ARCH_X64)
message(STATUS "Building x64 recompiler.") message(STATUS "Building x64 recompiler.")
endif() endif()
if(CPU_ARCH_ARM32) if(CPU_ARCH_ARM32)
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1") target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES} target_sources(core PRIVATE
cpu_recompiler_code_generator_aarch32.cpp ${RECOMPILER_SRCS}
cpu_newrec_compiler_aarch32.cpp cpu_recompiler_arm32.cpp
cpu_newrec_compiler_aarch32.h cpu_recompiler_arm32.h
) )
target_link_libraries(core PUBLIC vixl) target_link_libraries(core PUBLIC vixl)
message(STATUS "Building AArch32 recompiler.") message(STATUS "Building ARM32 recompiler.")
endif() endif()
if(CPU_ARCH_ARM64) if(CPU_ARCH_ARM64)
target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1") target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES} target_sources(core PRIVATE
cpu_recompiler_code_generator_aarch64.cpp ${RECOMPILER_SRCS}
cpu_newrec_compiler_aarch64.cpp cpu_recompiler_arm64.cpp
cpu_newrec_compiler_aarch64.h cpu_recompiler_arm64.h
) )
target_link_libraries(core PUBLIC vixl) target_link_libraries(core PUBLIC vixl)
message(STATUS "Building AArch64 recompiler.") message(STATUS "Building ARM64 recompiler.")
endif() endif()
if(CPU_ARCH_RISCV64) if(CPU_ARCH_RISCV64)
target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1") target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1")
target_sources(core PRIVATE ${NEWREC_SOURCES} target_sources(core PRIVATE
cpu_newrec_compiler_riscv64.cpp ${RECOMPILER_SRCS}
cpu_newrec_compiler_riscv64.h cpu_recompiler_riscv64.cpp
cpu_recompiler_riscv64.h
) )
target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas) target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas)
message(STATUS "Building RISC-V 64-bit recompiler.") message(STATUS "Building RISC-V-64 recompiler.")
endif() endif()
# Copy the provided data directory to the output directory. Borrowed from PCSX2. # Copy the provided data directory to the output directory. Borrowed from PCSX2.

View File

@ -7,7 +7,6 @@
<PreprocessorDefinitions Condition="('$(Platform)'!='ARM64')">ENABLE_RAINTEGRATION=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions Condition="('$(Platform)'!='ARM64')">ENABLE_RAINTEGRATION=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM' Or '$(Platform)'=='ARM64')">ENABLE_RECOMPILER=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM' Or '$(Platform)'=='ARM64')">ENABLE_RECOMPILER=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="('$(Platform)'=='x64' Or '$(Platform)'=='ARM64')">ENABLE_NEWREC=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include</AdditionalIncludeDirectories>

View File

@ -14,31 +14,19 @@
<ClCompile Include="cpu_core.cpp" /> <ClCompile Include="cpu_core.cpp" />
<ClCompile Include="cpu_disasm.cpp" /> <ClCompile Include="cpu_disasm.cpp" />
<ClCompile Include="cpu_code_cache.cpp" /> <ClCompile Include="cpu_code_cache.cpp" />
<ClCompile Include="cpu_newrec_compiler.cpp" /> <ClCompile Include="cpu_recompiler.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch32.cpp"> <ClCompile Include="cpu_recompiler_arm32.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp"> <ClCompile Include="cpu_recompiler_arm64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="cpu_newrec_compiler_riscv64.cpp"> <ClCompile Include="cpu_recompiler_riscv64.cpp">
<ExcludedFromBuild>true</ExcludedFromBuild> <ExcludedFromBuild>true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="cpu_newrec_compiler_x64.cpp"> <ClCompile Include="cpu_recompiler_x64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="cpu_recompiler_code_generator.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_aarch32.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_recompiler_code_generator_aarch64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_recompiler_code_generator_generic.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_x64.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="cpu_recompiler_register_cache.cpp" />
<ClCompile Include="cpu_types.cpp" /> <ClCompile Include="cpu_types.cpp" />
<ClCompile Include="digital_controller.cpp" /> <ClCompile Include="digital_controller.cpp" />
<ClCompile Include="fullscreen_ui.cpp" /> <ClCompile Include="fullscreen_ui.cpp" />
@ -105,21 +93,19 @@
<ClInclude Include="cpu_core_private.h" /> <ClInclude Include="cpu_core_private.h" />
<ClInclude Include="cpu_disasm.h" /> <ClInclude Include="cpu_disasm.h" />
<ClInclude Include="cpu_code_cache.h" /> <ClInclude Include="cpu_code_cache.h" />
<ClInclude Include="cpu_newrec_compiler.h" /> <ClInclude Include="cpu_recompiler.h" />
<ClInclude Include="cpu_newrec_compiler_aarch32.h"> <ClInclude Include="cpu_recompiler_arm32.h">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='ARM'">true</ExcludedFromBuild>
</ClInclude> </ClInclude>
<ClInclude Include="cpu_newrec_compiler_aarch64.h"> <ClInclude Include="cpu_recompiler_arm64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClInclude> </ClInclude>
<ClInclude Include="cpu_newrec_compiler_riscv64.h"> <ClInclude Include="cpu_recompiler_riscv64.h">
<ExcludedFromBuild>true</ExcludedFromBuild> <ExcludedFromBuild>true</ExcludedFromBuild>
</ClInclude> </ClInclude>
<ClInclude Include="cpu_newrec_compiler_x64.h"> <ClInclude Include="cpu_recompiler_x64.h">
<ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Platform)'!='x64'">true</ExcludedFromBuild>
</ClInclude> </ClInclude>
<ClInclude Include="cpu_recompiler_code_generator.h" />
<ClInclude Include="cpu_recompiler_register_cache.h" />
<ClInclude Include="cpu_recompiler_thunks.h" /> <ClInclude Include="cpu_recompiler_thunks.h" />
<ClInclude Include="cpu_recompiler_types.h" /> <ClInclude Include="cpu_recompiler_types.h" />
<ClInclude Include="digital_controller.h" /> <ClInclude Include="digital_controller.h" />

View File

@ -23,12 +23,7 @@
<ClCompile Include="gpu_hw_shadergen.cpp" /> <ClCompile Include="gpu_hw_shadergen.cpp" />
<ClCompile Include="bios.cpp" /> <ClCompile Include="bios.cpp" />
<ClCompile Include="cpu_code_cache.cpp" /> <ClCompile Include="cpu_code_cache.cpp" />
<ClCompile Include="cpu_recompiler_register_cache.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_x64.cpp" />
<ClCompile Include="cpu_recompiler_code_generator.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_generic.cpp" />
<ClCompile Include="cpu_types.cpp" /> <ClCompile Include="cpu_types.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_aarch64.cpp" />
<ClCompile Include="sio.cpp" /> <ClCompile Include="sio.cpp" />
<ClCompile Include="controller.cpp" /> <ClCompile Include="controller.cpp" />
<ClCompile Include="analog_controller.cpp" /> <ClCompile Include="analog_controller.cpp" />
@ -43,7 +38,6 @@
<ClCompile Include="cheats.cpp" /> <ClCompile Include="cheats.cpp" />
<ClCompile Include="memory_card_image.cpp" /> <ClCompile Include="memory_card_image.cpp" />
<ClCompile Include="analog_joystick.cpp" /> <ClCompile Include="analog_joystick.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_aarch32.cpp" />
<ClCompile Include="gpu_backend.cpp" /> <ClCompile Include="gpu_backend.cpp" />
<ClCompile Include="gpu_sw_backend.cpp" /> <ClCompile Include="gpu_sw_backend.cpp" />
<ClCompile Include="multitap.cpp" /> <ClCompile Include="multitap.cpp" />
@ -58,11 +52,11 @@
<ClCompile Include="hotkeys.cpp" /> <ClCompile Include="hotkeys.cpp" />
<ClCompile Include="gpu_shadergen.cpp" /> <ClCompile Include="gpu_shadergen.cpp" />
<ClCompile Include="pch.cpp" /> <ClCompile Include="pch.cpp" />
<ClCompile Include="cpu_newrec_compiler.cpp" /> <ClCompile Include="cpu_recompiler.cpp" />
<ClCompile Include="cpu_newrec_compiler_x64.cpp" /> <ClCompile Include="cpu_recompiler_x64.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch64.cpp" /> <ClCompile Include="cpu_recompiler_arm64.cpp" />
<ClCompile Include="cpu_newrec_compiler_riscv64.cpp" /> <ClCompile Include="cpu_recompiler_riscv64.cpp" />
<ClCompile Include="cpu_newrec_compiler_aarch32.cpp" /> <ClCompile Include="cpu_recompiler_arm32.cpp" />
<ClCompile Include="justifier.cpp" /> <ClCompile Include="justifier.cpp" />
<ClCompile Include="gdb_server.cpp" /> <ClCompile Include="gdb_server.cpp" />
<ClCompile Include="gpu_sw_rasterizer.cpp" /> <ClCompile Include="gpu_sw_rasterizer.cpp" />
@ -98,9 +92,7 @@
<ClInclude Include="bios.h" /> <ClInclude Include="bios.h" />
<ClInclude Include="cpu_recompiler_types.h" /> <ClInclude Include="cpu_recompiler_types.h" />
<ClInclude Include="cpu_code_cache.h" /> <ClInclude Include="cpu_code_cache.h" />
<ClInclude Include="cpu_recompiler_register_cache.h" />
<ClInclude Include="cpu_recompiler_thunks.h" /> <ClInclude Include="cpu_recompiler_thunks.h" />
<ClInclude Include="cpu_recompiler_code_generator.h" />
<ClInclude Include="sio.h" /> <ClInclude Include="sio.h" />
<ClInclude Include="controller.h" /> <ClInclude Include="controller.h" />
<ClInclude Include="analog_controller.h" /> <ClInclude Include="analog_controller.h" />
@ -134,11 +126,11 @@
<ClInclude Include="gpu_shadergen.h" /> <ClInclude Include="gpu_shadergen.h" />
<ClInclude Include="pch.h" /> <ClInclude Include="pch.h" />
<ClInclude Include="cpu_code_cache_private.h" /> <ClInclude Include="cpu_code_cache_private.h" />
<ClInclude Include="cpu_newrec_compiler.h" /> <ClInclude Include="cpu_recompiler.h" />
<ClInclude Include="cpu_newrec_compiler_x64.h" /> <ClInclude Include="cpu_recompiler_x64.h" />
<ClInclude Include="cpu_newrec_compiler_aarch64.h" /> <ClInclude Include="cpu_recompiler_arm64.h" />
<ClInclude Include="cpu_newrec_compiler_riscv64.h" /> <ClInclude Include="cpu_recompiler_riscv64.h" />
<ClInclude Include="cpu_newrec_compiler_aarch32.h" /> <ClInclude Include="cpu_recompiler_arm32.h" />
<ClInclude Include="achievements_private.h" /> <ClInclude Include="achievements_private.h" />
<ClInclude Include="justifier.h" /> <ClInclude Include="justifier.h" />
<ClInclude Include="gdb_server.h" /> <ClInclude Include="gdb_server.h" />

View File

@ -30,11 +30,7 @@ LOG_CHANNEL(CodeCache);
// #define ENABLE_RECOMPILER_PROFILING 1 // #define ENABLE_RECOMPILER_PROFILING 1
#ifdef ENABLE_RECOMPILER #ifdef ENABLE_RECOMPILER
#include "cpu_recompiler_code_generator.h" #include "cpu_recompiler.h"
#endif
#ifdef ENABLE_NEWREC
#include "cpu_newrec_compiler.h"
#endif #endif
#include <map> #include <map>
@ -165,15 +161,14 @@ static u32 s_total_host_instructions_emitted = 0;
#endif #endif
} // namespace CPU::CodeCache } // namespace CPU::CodeCache
bool CPU::CodeCache::IsUsingAnyRecompiler() bool CPU::CodeCache::IsUsingRecompiler()
{ {
return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler || return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
g_settings.cpu_execution_mode == CPUExecutionMode::NewRec);
} }
bool CPU::CodeCache::IsUsingFastmem() bool CPU::CodeCache::IsUsingFastmem()
{ {
return g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled; return (g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled);
} }
bool CPU::CodeCache::ProcessStartup(Error* error) bool CPU::CodeCache::ProcessStartup(Error* error)
@ -217,7 +212,7 @@ void CPU::CodeCache::Reset()
{ {
ClearBlocks(); ClearBlocks();
if (IsUsingAnyRecompiler()) if (IsUsingRecompiler())
{ {
ResetCodeBuffer(); ResetCodeBuffer();
CompileASMFunctions(); CompileASMFunctions();
@ -232,7 +227,7 @@ void CPU::CodeCache::Shutdown()
void CPU::CodeCache::Execute() void CPU::CodeCache::Execute()
{ {
if (IsUsingAnyRecompiler()) if (IsUsingRecompiler())
{ {
g_enter_recompiler(); g_enter_recompiler();
UnreachableCode(); UnreachableCode();
@ -500,9 +495,8 @@ CPU::CodeCache::Block* CPU::CodeCache::CreateBlock(u32 pc, const BlockInstructio
return block; return block;
} }
// Old rec doesn't use backprop info, don't waste time filling it. // populate backpropogation information for liveness queries
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) FillBlockRegInfo(block);
FillBlockRegInfo(block);
// add it to the tracking list for its page // add it to the tracking list for its page
AddBlockToPageList(block); AddBlockToPageList(block);
@ -1316,7 +1310,7 @@ void CPU::CodeCache::FillBlockRegInfo(Block* block)
void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc) void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc)
{ {
// TODO: this doesn't currently handle when the cache overflows... // TODO: this doesn't currently handle when the cache overflows...
DebugAssert(IsUsingAnyRecompiler()); DebugAssert(IsUsingRecompiler());
MemMap::BeginCodeWrite(); MemMap::BeginCodeWrite();
Block* block = LookupBlock(start_pc); Block* block = LookupBlock(start_pc);
@ -1450,11 +1444,9 @@ void CPU::CodeCache::ResetCodeBuffer()
s_code_size = RECOMPILER_CODE_CACHE_SIZE - RECOMPILER_FAR_CODE_CACHE_SIZE; s_code_size = RECOMPILER_CODE_CACHE_SIZE - RECOMPILER_FAR_CODE_CACHE_SIZE;
s_code_used = 0; s_code_used = 0;
// Use half the far code size when using newrec and memory exceptions aren't enabled. It's only used for backpatching. // Use half the far code size when memory exceptions aren't enabled. It's only used for backpatching.
const u32 far_code_size = const u32 far_code_size = (!g_settings.cpu_recompiler_memory_exceptions) ? (RECOMPILER_FAR_CODE_CACHE_SIZE / 2) :
(g_settings.cpu_execution_mode == CPUExecutionMode::NewRec && !g_settings.cpu_recompiler_memory_exceptions) ? RECOMPILER_FAR_CODE_CACHE_SIZE;
(RECOMPILER_FAR_CODE_CACHE_SIZE / 2) :
RECOMPILER_FAR_CODE_CACHE_SIZE;
s_far_code_size = far_code_size; s_far_code_size = far_code_size;
s_far_code_ptr = (far_code_size > 0) ? (static_cast<u8*>(s_code_ptr) + s_code_size) : nullptr; s_far_code_ptr = (far_code_size > 0) ? (static_cast<u8*>(s_code_ptr) + s_code_size) : nullptr;
s_free_far_code_ptr = s_far_code_ptr; s_free_far_code_ptr = s_far_code_ptr;
@ -1572,14 +1564,7 @@ bool CPU::CodeCache::CompileBlock(Block* block)
#ifdef ENABLE_RECOMPILER #ifdef ENABLE_RECOMPILER
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
{ host_code = Recompiler::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size);
Recompiler::CodeGenerator codegen;
host_code = codegen.CompileBlock(block, &host_code_size, &host_far_code_size);
}
#endif
#ifdef ENABLE_NEWREC
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
host_code = NewRec::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size);
#endif #endif
block->host_code = host_code; block->host_code = host_code;
@ -1715,20 +1700,17 @@ PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exc
BackpatchLoadStore(exception_pc, info); BackpatchLoadStore(exception_pc, info);
// queue block for recompilation later // queue block for recompilation later
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) Block* block = LookupBlock(info.guest_block);
if (block)
{ {
Block* block = LookupBlock(info.guest_block); // This is a bit annoying, we have to remove it from the page list if it's a RAM block.
if (block) DEV_LOG("Queuing block {:08X} for recompilation due to backpatch", block->pc);
{ RemoveBlockFromPageList(block);
// This is a bit annoying, we have to remove it from the page list if it's a RAM block. InvalidateBlock(block, BlockState::NeedsRecompile);
DEV_LOG("Queuing block {:08X} for recompilation due to backpatch", block->pc);
RemoveBlockFromPageList(block);
InvalidateBlock(block, BlockState::NeedsRecompile);
// Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback. // Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback.
block->compile_frame = System::GetFrameNumber(); block->compile_frame = System::GetFrameNumber();
block->compile_count = 1; block->compile_count = 1;
}
} }
MemMap::EndCodeWrite(); MemMap::EndCodeWrite();
@ -1748,11 +1730,7 @@ void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchI
{ {
#ifdef ENABLE_RECOMPILER #ifdef ENABLE_RECOMPILER
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
Recompiler::CodeGenerator::BackpatchLoadStore(host_pc, info); Recompiler::BackpatchLoadStore(host_pc, info);
#endif
#ifdef ENABLE_NEWREC
if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
NewRec::BackpatchLoadStore(host_pc, info);
#endif #endif
} }

View File

@ -11,7 +11,7 @@ class Error;
namespace CPU::CodeCache { namespace CPU::CodeCache {
/// Returns true if any recompiler is in use. /// Returns true if any recompiler is in use.
bool IsUsingAnyRecompiler(); bool IsUsingRecompiler();
/// Returns true if any recompiler and fastmem is in use. /// Returns true if any recompiler and fastmem is in use.
bool IsUsingFastmem(); bool IsUsingFastmem();

View File

@ -13,7 +13,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
namespace CPU::NewRec { namespace CPU::Recompiler {
// Global options // Global options
static constexpr bool EMULATE_LOAD_DELAYS = true; static constexpr bool EMULATE_LOAD_DELAYS = true;
@ -35,11 +35,11 @@ static constexpr bool HAS_MEMORY_OPERANDS = false;
#endif #endif
// TODO: Get rid of the virtuals... somehow. // TODO: Get rid of the virtuals... somehow.
class Compiler class Recompiler
{ {
public: public:
Compiler(); Recompiler();
virtual ~Compiler(); virtual ~Recompiler();
const void* CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size); const void* CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size);
@ -271,9 +271,9 @@ protected:
void CompileInstruction(); void CompileInstruction();
void CompileBranchDelaySlot(bool dirty_pc = true); void CompileBranchDelaySlot(bool dirty_pc = true);
void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags), void CompileTemplate(void (Recompiler::*const_func)(CompileFlags), void (Recompiler::*func)(CompileFlags),
const void* pgxp_cpu_func, u32 tflags); const void* pgxp_cpu_func, u32 tflags);
void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, void CompileLoadStoreTemplate(void (Recompiler::*func)(CompileFlags, MemoryAccessSize, bool, bool,
const std::optional<VirtualMemoryAddress>&), const std::optional<VirtualMemoryAddress>&),
MemoryAccessSize size, bool store, bool sign, u32 tflags); MemoryAccessSize size, bool store, bool sign, u32 tflags);
void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store, bool use_fastmem); void FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store, bool use_fastmem);
@ -539,5 +539,5 @@ u32 CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address,
TickCount cycles_to_remove, u32 gpr_bitmask, u8 address_register, u8 data_register, TickCount cycles_to_remove, u32 gpr_bitmask, u8 address_register, u8 data_register,
MemoryAccessSize size, bool is_signed, bool is_load); MemoryAccessSize size, bool is_signed, bool is_load);
extern Compiler* g_compiler; extern Recompiler* g_compiler;
} // namespace CPU::NewRec } // namespace CPU::Recompiler

View File

@ -3,7 +3,7 @@
#pragma once #pragma once
#include "cpu_newrec_compiler.h" #include "cpu_recompiler.h"
#include <memory> #include <memory>
@ -12,13 +12,13 @@
#include "vixl/aarch32/assembler-aarch32.h" #include "vixl/aarch32/assembler-aarch32.h"
#include "vixl/aarch32/operands-aarch32.h" #include "vixl/aarch32/operands-aarch32.h"
namespace CPU::NewRec { namespace CPU::Recompiler {
class AArch32Compiler final : public Compiler class ARM32Recompiler final : public Recompiler
{ {
public: public:
AArch32Compiler(); ARM32Recompiler();
~AArch32Compiler() override; ~ARM32Recompiler() override;
protected: protected:
const char* GetHostRegName(u32 reg) const override; const char* GetHostRegName(u32 reg) const override;
@ -165,6 +165,6 @@ private:
#endif #endif
}; };
} // namespace CPU::NewRec } // namespace CPU::Recompiler
#endif // CPU_ARCH_ARM32 #endif // CPU_ARCH_ARM32

View File

@ -3,7 +3,7 @@
#pragma once #pragma once
#include "cpu_newrec_compiler.h" #include "cpu_recompiler.h"
#include <memory> #include <memory>
@ -11,13 +11,13 @@
#include "vixl/aarch64/assembler-aarch64.h" #include "vixl/aarch64/assembler-aarch64.h"
namespace CPU::NewRec { namespace CPU::Recompiler {
class AArch64Compiler final : public Compiler class ARM64Recompiler final : public Recompiler
{ {
public: public:
AArch64Compiler(); ARM64Recompiler();
~AArch64Compiler() override; ~ARM64Recompiler() override;
protected: protected:
const char* GetHostRegName(u32 reg) const override; const char* GetHostRegName(u32 reg) const override;
@ -166,6 +166,6 @@ private:
#endif #endif
}; };
} // namespace CPU::NewRec } // namespace CPU::Recompiler
#endif // CPU_ARCH_ARM64 #endif // CPU_ARCH_ARM64

File diff suppressed because it is too large Load Diff

View File

@ -1,314 +0,0 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
#include "cpu_code_cache_private.h"
#include "cpu_recompiler_register_cache.h"
#include "cpu_recompiler_thunks.h"
#include "cpu_recompiler_types.h"
#include "cpu_types.h"
#include <array>
#include <utility>
namespace CPU::Recompiler {
enum class Condition : u8
{
Always,
NotEqual,
Equal,
Overflow,
Greater,
GreaterEqual,
LessEqual,
Less,
Negative,
PositiveOrZero,
Above, // unsigned variant of Greater
AboveEqual, // unsigned variant of GreaterEqual
Below, // unsigned variant of Less
BelowEqual, // unsigned variant of LessEqual
NotZero,
Zero
};
class CodeGenerator
{
public:
using SpeculativeValue = std::optional<u32>;
struct CodeBlockInstruction
{
const Instruction* instruction;
const CodeCache::InstructionInfo* info;
};
CodeGenerator();
~CodeGenerator();
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
static void BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi);
const void* CompileBlock(CodeCache::Block* block, u32* out_host_code_size, u32* out_host_far_code_size);
//////////////////////////////////////////////////////////////////////////
// Code Generation
//////////////////////////////////////////////////////////////////////////
void EmitBeginBlock(bool allocate_registers = true);
void EmitEndBlock(bool free_registers, const void* jump_to);
void EmitExceptionExit();
void EmitExceptionExitOnBool(const Value& value);
const void* FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size);
void EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size);
void EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size);
void EmitCopyValue(HostReg to_reg, const Value& value);
void EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags);
void EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags);
void EmitCmp(HostReg to_reg, const Value& value);
void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply);
void EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size,
bool signed_divide);
void EmitInc(HostReg to_reg, RegSize size);
void EmitDec(HostReg to_reg, RegSize size);
void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
bool assume_amount_masked = true);
void EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
bool assume_amount_masked = true);
void EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
bool assume_amount_masked = true);
void EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value);
void EmitOr(HostReg to_reg, HostReg from_reg, const Value& value);
void EmitXor(HostReg to_reg, HostReg from_reg, const Value& value);
void EmitTest(HostReg to_reg, const Value& value);
void EmitNot(HostReg to_reg, RegSize size);
void EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition);
void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg);
void EmitStoreGuestRegister(Reg guest_reg, const Value& value);
void EmitStoreInterpreterLoadDelay(Reg reg, const Value& value);
void EmitFlushInterpreterLoadDelay();
void EmitMoveNextInterpreterLoadDelay();
void EmitCancelInterpreterLoadDelayForReg(Reg reg);
void EmitICacheCheckAndUpdate();
void EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size);
void EmitStallUntilGTEComplete();
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
void EmitStoreCPUStructField(u32 offset, const Value& value);
void EmitAddCPUStructField(u32 offset, const Value& value);
void EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr);
void EmitStoreGlobal(void* ptr, const Value& value);
void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr);
// Automatically generates an exception handler.
Value EmitLoadGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
const SpeculativeValue& address_spec, RegSize size);
void EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result);
void EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
RegSize size, Value& result);
void EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
RegSize size, Value& result, bool in_far_code);
void EmitStoreGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address,
const SpeculativeValue& address_spec, RegSize size, const Value& value);
void EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value);
void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, RegSize size, const Value& value, bool in_far_code);
void EnsureMembaseLoaded();
void EmitUpdateFastmemBase();
// Unconditional branch to pointer. May allocate a scratch register.
void EmitBranch(const void* address, bool allow_scratch = true);
void EmitBranch(LabelType* label);
// Branching, generates two paths.
void EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, LabelType* label);
void EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, LabelType* label);
void EmitConditionalBranch(Condition condition, bool invert, LabelType* label);
void EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label);
void EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label);
void EmitBindLabel(LabelType* label);
u32 PrepareStackForCall();
void RestoreStackAfterCall(u32 adjust_size);
void EmitCall(const void* ptr);
void EmitFunctionCallPtr(Value* return_value, const void* ptr);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
const Value& arg3);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
const Value& arg3, const Value& arg4);
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr));
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1);
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1, arg2);
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2,
const Value& arg3)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1, arg2, arg3);
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2,
const Value& arg3, const Value& arg4)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1, arg2, arg3, arg4);
}
// Host register saving.
void EmitPushHostReg(HostReg reg, u32 position);
void EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position);
void EmitPopHostReg(HostReg reg, u32 position);
void EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position);
// Value ops
Value AddValues(const Value& lhs, const Value& rhs, bool set_flags);
Value SubValues(const Value& lhs, const Value& rhs, bool set_flags);
std::pair<Value, Value> MulValues(const Value& lhs, const Value& rhs, bool signed_multiply);
Value ShlValues(const Value& lhs, const Value& rhs, bool assume_amount_masked = true);
Value ShrValues(const Value& lhs, const Value& rhs, bool assume_amount_masked = true);
Value SarValues(const Value& lhs, const Value& rhs, bool assume_amount_masked = true);
Value OrValues(const Value& lhs, const Value& rhs);
void OrValueInPlace(Value& lhs, const Value& rhs);
Value AndValues(const Value& lhs, const Value& rhs);
void AndValueInPlace(Value& lhs, const Value& rhs);
Value XorValues(const Value& lhs, const Value& rhs);
Value NotValue(const Value& val);
const TickCount* GetFetchMemoryAccessTimePtr() const;
// Raising exception if condition is true.
void GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, Exception excode,
Condition condition = Condition::Always);
private:
// Host register setup
void InitHostRegs();
Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend);
void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend);
Value GetValueInHostRegister(const Value& value, bool allow_zero_register = true);
Value GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register = true);
void SwitchToFarCode();
void SwitchToNearCode();
void* GetStartNearCodePointer() const;
void* GetCurrentCodePointer() const;
void* GetCurrentNearCodePointer() const;
void* GetCurrentFarCodePointer() const;
//////////////////////////////////////////////////////////////////////////
// Code Generation Helpers
//////////////////////////////////////////////////////////////////////////
// branch target, memory address, etc
void BlockPrologue();
void BlockEpilogue();
void InstructionPrologue(Instruction instruction, const CodeCache::InstructionInfo& info, TickCount cycles,
bool force_sync = false);
void InstructionEpilogue(Instruction instruction, const CodeCache::InstructionInfo& info);
void TruncateBlockAtCurrentInstruction();
void AddPendingCycles(bool commit);
void AddGTETicks(TickCount ticks);
void StallUntilGTEComplete();
Value CalculatePC(u32 offset = 0);
Value GetCurrentInstructionPC(u32 offset = 0);
void WriteNewPC(const Value& value, bool commit);
Value DoGTERegisterRead(u32 index);
void DoGTERegisterWrite(u32 index, const Value& value);
//////////////////////////////////////////////////////////////////////////
// Instruction Code Generators
//////////////////////////////////////////////////////////////////////////
bool CompileInstruction(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Fallback(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Nop(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Bitwise(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Shift(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Load(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Store(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_LoadLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_StoreLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_MoveHiLo(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Add(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Subtract(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Multiply(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Divide(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_SignedDivide(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_SetLess(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_Branch(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_lui(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_cop0(Instruction instruction, const CodeCache::InstructionInfo& info);
bool Compile_cop2(Instruction instruction, const CodeCache::InstructionInfo& info);
CodeCache::Block* m_block = nullptr;
CodeBlockInstruction m_block_start = {};
CodeBlockInstruction m_block_end = {};
CodeBlockInstruction m_current_instruction = {};
RegisterCache m_register_cache;
CodeEmitter m_near_emitter;
CodeEmitter m_far_emitter;
CodeEmitter* m_emit;
TickCount m_delayed_cycles_add = 0;
TickCount m_gte_done_cycle = 0;
u32 m_pc = 0;
bool m_pc_valid = false;
bool m_block_linked = false;
// whether various flags need to be reset.
bool m_current_instruction_in_branch_delay_slot_dirty = false;
bool m_branch_was_taken_dirty = false;
bool m_current_instruction_was_branch_taken_dirty = false;
bool m_load_delay_dirty = false;
bool m_next_load_delay_dirty = false;
bool m_gte_busy_cycles_dirty = false;
bool m_membase_loaded = false;
//////////////////////////////////////////////////////////////////////////
// Speculative Constants
//////////////////////////////////////////////////////////////////////////
struct SpeculativeConstants
{
std::array<SpeculativeValue, static_cast<u8>(Reg::count)> regs;
std::unordered_map<PhysicalMemoryAddress, SpeculativeValue> memory;
SpeculativeValue cop0_sr;
};
void InitSpeculativeRegs();
void InvalidateSpeculativeValues();
SpeculativeValue SpeculativeReadReg(Reg reg);
void SpeculativeWriteReg(Reg reg, SpeculativeValue value);
SpeculativeValue SpeculativeReadMemory(u32 address);
void SpeculativeWriteMemory(VirtualMemoryAddress address, SpeculativeValue value);
bool SpeculativeIsCacheIsolated();
SpeculativeConstants m_speculative_constants;
};
} // namespace CPU::Recompiler

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,254 +0,0 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_recompiler_code_generator.h"
#include "settings.h"
#include "common/log.h"
LOG_CHANNEL(Recompiler);
namespace CPU::Recompiler {
void CodeGenerator::EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg)
{
EmitLoadCPUStructField(host_reg, RegSize_32, State::GPRRegisterOffset(static_cast<u32>(guest_reg)));
}
void CodeGenerator::EmitStoreGuestRegister(Reg guest_reg, const Value& value)
{
DebugAssert(value.size == RegSize_32);
EmitStoreCPUStructField(State::GPRRegisterOffset(static_cast<u32>(guest_reg)), value);
}
void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value)
{
DebugAssert(value.size == RegSize_32 && value.IsInHostRegister());
EmitStoreCPUStructField(OFFSETOF(State, load_delay_reg), Value::FromConstantU8(static_cast<u8>(reg)));
EmitStoreCPUStructField(OFFSETOF(State, load_delay_value), value);
m_load_delay_dirty = true;
}
Value CodeGenerator::EmitLoadGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, const SpeculativeValue& address_spec, RegSize size)
{
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
{
TickCount read_ticks;
void* ptr = GetDirectReadMemoryPointer(
static_cast<u32>(address.constant_value),
(size == RegSize_8) ? MemoryAccessSize::Byte :
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word),
&read_ticks);
if (ptr)
{
Value result = m_register_cache.AllocateScratch(size);
// TODO: mask off...
if (CodeCache::IsUsingFastmem() && Bus::IsRAMAddress(static_cast<u32>(address.constant_value)))
{
// have to mask away the high bits for mirrors, since we don't map them in fastmem
EmitLoadGuestRAMFastmem(Value::FromConstantU32(static_cast<u32>(address.constant_value) & Bus::g_ram_mask),
size, result);
}
else
{
EmitLoadGlobal(result.GetHostRegister(), size, ptr);
}
m_delayed_cycles_add += read_ticks;
return result;
}
}
Value result = m_register_cache.AllocateScratch(HostPointerSize);
const bool use_fastmem = !g_settings.cpu_recompiler_memory_exceptions &&
(address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) &&
!SpeculativeIsCacheIsolated();
if (address_spec)
{
if (!use_fastmem)
{
DEBUG_LOG("Non-constant load at 0x{:08X}, speculative address 0x{:08X}, using fastmem = {}", info.pc,
*address_spec, use_fastmem ? "yes" : "no");
}
}
else
{
DEBUG_LOG("Non-constant load at 0x{:08X}, speculative address UNKNOWN, using fastmem = {}", info.pc,
use_fastmem ? "yes" : "no");
}
if (CodeCache::IsUsingFastmem() && use_fastmem)
{
EmitLoadGuestMemoryFastmem(instruction, info, address, size, result);
}
else
{
AddPendingCycles(true);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, false);
}
// Downcast to ignore upper 56/48/32 bits. This should be a noop.
if (result.size != size)
{
switch (size)
{
case RegSize_8:
ConvertValueSizeInPlace(&result, RegSize_8, false);
break;
case RegSize_16:
ConvertValueSizeInPlace(&result, RegSize_16, false);
break;
case RegSize_32:
ConvertValueSizeInPlace(&result, RegSize_32, false);
break;
default:
UnreachableCode();
break;
}
}
return result;
}
void CodeGenerator::EmitStoreGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info,
const Value& address, const SpeculativeValue& address_spec, RegSize size,
const Value& value)
{
if (address.IsConstant() && !SpeculativeIsCacheIsolated())
{
void* ptr = GetDirectWriteMemoryPointer(
static_cast<u32>(address.constant_value),
(size == RegSize_8) ? MemoryAccessSize::Byte :
((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word));
if (ptr)
{
if (value.size != size)
EmitStoreGlobal(ptr, value.ViewAsSize(size));
else
EmitStoreGlobal(ptr, value);
return;
}
}
const bool use_fastmem = !g_settings.cpu_recompiler_memory_exceptions &&
(address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) &&
!SpeculativeIsCacheIsolated();
if (address_spec)
{
if (!use_fastmem)
{
DEBUG_LOG("Non-constant store at 0x{:08X}, speculative address 0x{:08X}, using fastmem = {}", info.pc,
*address_spec, use_fastmem ? "yes" : "no");
}
}
else
{
DEBUG_LOG("Non-constant store at 0x{:08X}, speculative address UNKNOWN, using fastmem = {}", info.pc,
use_fastmem ? "yes" : "no");
}
if (CodeCache::IsUsingFastmem() && use_fastmem)
{
EmitStoreGuestMemoryFastmem(instruction, info, address, size, value);
}
else
{
AddPendingCycles(true);
m_register_cache.FlushCallerSavedGuestRegisters(true, true);
EmitStoreGuestMemorySlowmem(instruction, info, address, size, value, false);
}
}
#if 0 // Not used
void CodeGenerator::EmitICacheCheckAndUpdate()
{
Value temp = m_register_cache.AllocateScratch(RegSize_32);
if (GetSegmentForAddress(m_pc) >= Segment::KSEG1)
{
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks));
EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(),
Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)), false);
EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), temp);
}
else
{
// cached path
Value temp2 = m_register_cache.AllocateScratch(RegSize_32);
m_register_cache.InhibitAllocation();
VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
{
const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc);
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
if (fill_ticks <= 0)
continue;
const u32 line = GetICacheLine(current_pc);
const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
LabelType cache_hit;
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset);
EmitCopyValue(temp2.GetHostRegister(), Value::FromConstantU32(current_pc));
EmitCmp(temp2.GetHostRegister(), temp);
EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), temp2, &cache_hit);
EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks));
EmitStoreCPUStructField(offset, temp2);
EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(static_cast<u32>(fill_ticks)),
false);
EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), temp);
EmitBindLabel(&cache_hit);
}
m_register_cache.UninhibitAllocation();
}
}
#endif
#if 0 // Not Used
void CodeGenerator::EmitStallUntilGTEComplete()
{
Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32);
Value gte_completion_tick = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks));
EmitLoadCPUStructField(gte_completion_tick.GetHostRegister(), RegSize_32, OFFSETOF(State, gte_completion_tick));
// commit cycles here, should always be nonzero
if (m_delayed_cycles_add > 0)
{
EmitAdd(pending_ticks.GetHostRegister(), pending_ticks.GetHostRegister(),
Value::FromConstantU32(m_delayed_cycles_add), false);
m_delayed_cycles_add = 0;
}
LabelType gte_done;
EmitSub(gte_completion_tick.GetHostRegister(), gte_completion_tick.GetHostRegister(), pending_ticks, true);
EmitConditionalBranch(Condition::Below, false, &gte_done);
// add stall ticks
EmitAdd(pending_ticks.GetHostRegister(), pending_ticks.GetHostRegister(), gte_completion_tick, false);
// store new ticks
EmitBindLabel(&gte_done);
EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), pending_ticks);
}
#endif
} // namespace CPU::Recompiler

File diff suppressed because it is too large Load Diff

View File

@ -1,945 +0,0 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "cpu_recompiler_register_cache.h"
#include "cpu_recompiler_code_generator.h"
#include "common/log.h"
#include <cinttypes>
LOG_CHANNEL(Recompiler);
namespace CPU::Recompiler {
Value::Value() = default;
Value::Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_)
: regcache(regcache_), constant_value(constant_), size(size_), flags(flags_)
{
}
Value::Value(const Value& other)
: regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size),
flags(other.flags)
{
AssertMsg(!other.IsScratch(), "Can't copy a temporary register");
}
Value::Value(Value&& other)
: regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size),
flags(other.flags)
{
other.Clear();
}
Value::Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_)
: regcache(regcache_), host_reg(reg_), size(size_), flags(flags_)
{
}
Value::~Value()
{
Release();
}
Value& Value::operator=(const Value& other)
{
AssertMsg(!other.IsScratch(), "Can't copy a temporary register");
Release();
regcache = other.regcache;
constant_value = other.constant_value;
host_reg = other.host_reg;
size = other.size;
flags = other.flags;
return *this;
}
Value& Value::operator=(Value&& other)
{
Release();
regcache = other.regcache;
constant_value = other.constant_value;
host_reg = other.host_reg;
size = other.size;
flags = other.flags;
other.Clear();
return *this;
}
void Value::Clear()
{
regcache = nullptr;
constant_value = 0;
host_reg = {};
size = RegSize_8;
flags = ValueFlags::None;
}
void Value::Release()
{
if (IsScratch())
{
DebugAssert(IsInHostRegister() && regcache);
regcache->FreeHostReg(host_reg);
}
}
void Value::ReleaseAndClear()
{
Release();
Clear();
}
void Value::Discard()
{
DebugAssert(IsInHostRegister());
regcache->DiscardHostReg(host_reg);
}
void Value::Undiscard()
{
DebugAssert(IsInHostRegister());
regcache->UndiscardHostReg(host_reg);
}
RegisterCache::RegisterCache(CodeGenerator& code_generator) : m_code_generator(code_generator)
{
m_state.guest_reg_order.fill(Reg::count);
}
RegisterCache::~RegisterCache()
{
Assert(m_state_stack.empty());
}
void RegisterCache::SetHostRegAllocationOrder(std::initializer_list<HostReg> regs)
{
size_t index = 0;
for (HostReg reg : regs)
{
m_state.host_reg_state[reg] = HostRegState::Usable;
m_host_register_allocation_order[index++] = reg;
}
m_state.available_count = static_cast<u32>(index);
}
void RegisterCache::SetCallerSavedHostRegs(std::initializer_list<HostReg> regs)
{
for (HostReg reg : regs)
m_state.host_reg_state[reg] |= HostRegState::CallerSaved;
}
void RegisterCache::SetCalleeSavedHostRegs(std::initializer_list<HostReg> regs)
{
for (HostReg reg : regs)
m_state.host_reg_state[reg] |= HostRegState::CalleeSaved;
}
void RegisterCache::SetCPUPtrHostReg(HostReg reg)
{
m_cpu_ptr_host_register = reg;
}
bool RegisterCache::IsUsableHostReg(HostReg reg) const
{
return (m_state.host_reg_state[reg] & HostRegState::Usable) != HostRegState::None;
}
bool RegisterCache::IsHostRegInUse(HostReg reg) const
{
return (m_state.host_reg_state[reg] & HostRegState::InUse) != HostRegState::None;
}
bool RegisterCache::HasFreeHostRegister() const
{
for (const HostRegState state : m_state.host_reg_state)
{
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable))
return true;
}
return false;
}
u32 RegisterCache::GetUsedHostRegisters() const
{
u32 count = 0;
for (const HostRegState state : m_state.host_reg_state)
{
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable | HostRegState::InUse))
count++;
}
return count;
}
u32 RegisterCache::GetFreeHostRegisters() const
{
u32 count = 0;
for (const HostRegState state : m_state.host_reg_state)
{
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable))
count++;
}
return count;
}
HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InUse */)
{
if (m_state.allocator_inhibit_count > 0)
Panic("Allocating when inhibited");
// try for a free register in allocation order
for (u32 i = 0; i < m_state.available_count; i++)
{
const HostReg reg = m_host_register_allocation_order[i];
if ((m_state.host_reg_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable)
{
if (AllocateHostReg(reg, state))
return reg;
}
}
// evict one of the cached guest registers
if (!EvictOneGuestRegister())
Panic("Failed to evict guest register for new allocation");
return AllocateHostReg(state);
}
bool RegisterCache::AllocateHostReg(HostReg reg, HostRegState state /*= HostRegState::InUse*/)
{
if ((m_state.host_reg_state[reg] & HostRegState::InUse) == HostRegState::InUse)
return false;
m_state.host_reg_state[reg] |= state;
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
// new register we need to save..
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
m_code_generator.EmitPushHostReg(reg, GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = reg;
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
return reg;
}
void RegisterCache::DiscardHostReg(HostReg reg)
{
DebugAssert(IsHostRegInUse(reg));
DEBUG_LOG("Discarding host register {}", m_code_generator.GetHostRegName(reg));
m_state.host_reg_state[reg] |= HostRegState::Discarded;
}
void RegisterCache::UndiscardHostReg(HostReg reg)
{
DebugAssert(IsHostRegInUse(reg));
DEBUG_LOG("Undiscarding host register {}", m_code_generator.GetHostRegName(reg));
m_state.host_reg_state[reg] &= ~HostRegState::Discarded;
}
void RegisterCache::FreeHostReg(HostReg reg)
{
DebugAssert(IsHostRegInUse(reg));
DEBUG_LOG("Freeing host register {}", m_code_generator.GetHostRegName(reg));
m_state.host_reg_state[reg] &= ~HostRegState::InUse;
}
void RegisterCache::EnsureHostRegFree(HostReg reg)
{
if (!IsHostRegInUse(reg))
return;
for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
{
if (m_state.guest_reg_state[i].IsInHostRegister() && m_state.guest_reg_state[i].GetHostRegister() == reg)
FlushGuestRegister(static_cast<Reg>(i), true, true);
}
}
Value RegisterCache::GetCPUPtr()
{
return Value::FromHostReg(this, m_cpu_ptr_host_register, HostPointerSize);
}
Value RegisterCache::AllocateScratch(RegSize size, HostReg reg /* = HostReg_Invalid */)
{
if (reg == HostReg_Invalid)
{
reg = AllocateHostReg();
}
else
{
Assert(!IsHostRegInUse(reg));
if (!AllocateHostReg(reg))
Panic("Failed to allocate specific host register");
}
DEBUG_LOG("Allocating host register {} as scratch", m_code_generator.GetHostRegName(reg));
return Value::FromScratch(this, reg, size);
}
void RegisterCache::ReserveCallerSavedRegisters()
{
for (u32 reg = 0; reg < HostReg_Count; reg++)
{
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
}
}
u32 RegisterCache::PushCallerSavedRegisters() const
{
u32 position = GetActiveCalleeSavedRegisterCount();
u32 count = 0;
for (u32 i = 0; i < HostReg_Count; i++)
{
if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
m_code_generator.EmitPushHostReg(static_cast<HostReg>(i), position + count);
count++;
}
}
return count;
}
u32 RegisterCache::PopCallerSavedRegisters() const
{
u32 count = 0;
for (u32 i = 0; i < HostReg_Count; i++)
{
if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
count++;
}
}
if (count == 0)
return 0;
u32 position = GetActiveCalleeSavedRegisterCount() + count - 1;
u32 i = (HostReg_Count - 1);
do
{
if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
u32 reg_pair;
for (reg_pair = (i - 1); reg_pair > 0 && reg_pair < HostReg_Count; reg_pair--)
{
if ((m_state.host_reg_state[reg_pair] &
(HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
m_code_generator.EmitPopHostRegPair(static_cast<HostReg>(reg_pair), static_cast<HostReg>(i), position);
position -= 2;
i = reg_pair;
break;
}
}
if (reg_pair == 0)
{
m_code_generator.EmitPopHostReg(static_cast<HostReg>(i), position);
position--;
}
}
i--;
} while (i > 0);
return count;
}
u32 RegisterCache::PopCalleeSavedRegisters(bool commit)
{
if (m_state.callee_saved_order_count == 0)
return 0;
u32 count = 0;
u32 i = m_state.callee_saved_order_count;
do
{
const HostReg reg = m_state.callee_saved_order[i - 1];
DebugAssert((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
(HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated));
if (i > 1)
{
const HostReg reg2 = m_state.callee_saved_order[i - 2];
DebugAssert((m_state.host_reg_state[reg2] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
(HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated));
m_code_generator.EmitPopHostRegPair(reg2, reg, i - 1);
i -= 2;
count += 2;
if (commit)
{
m_state.host_reg_state[reg] &= ~HostRegState::CalleeSavedAllocated;
m_state.host_reg_state[reg2] &= ~HostRegState::CalleeSavedAllocated;
}
}
else
{
m_code_generator.EmitPopHostReg(reg, i - 1);
if (commit)
m_state.host_reg_state[reg] &= ~HostRegState::CalleeSavedAllocated;
count++;
i--;
}
} while (i > 0);
if (commit)
m_state.callee_saved_order_count = 0;
return count;
}
void RegisterCache::ReserveCalleeSavedRegisters()
{
for (u32 reg = 0; reg < HostReg_Count; reg++)
{
if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
DebugAssert(m_state.callee_saved_order_count < HostReg_Count);
// can we find a paired register? (mainly for ARM)
u32 reg_pair;
for (reg_pair = reg + 1; reg_pair < HostReg_Count; reg_pair++)
{
if ((m_state.host_reg_state[reg_pair] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
m_code_generator.EmitPushHostRegPair(static_cast<HostReg>(reg), static_cast<HostReg>(reg_pair),
GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg_pair);
m_state.host_reg_state[reg_pair] |= HostRegState::CalleeSavedAllocated;
reg = reg_pair;
break;
}
}
if (reg_pair == HostReg_Count)
{
m_code_generator.EmitPushHostReg(static_cast<HostReg>(reg), GetActiveCalleeSavedRegisterCount());
m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast<HostReg>(reg);
m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated;
}
}
}
}
void RegisterCache::AssumeCalleeSavedRegistersAreSaved()
{
for (u32 i = 0; i < HostReg_Count; i++)
{
if ((m_state.host_reg_state[i] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
m_state.host_reg_state[i] &= ~HostRegState::CalleeSaved;
}
}
}
void RegisterCache::PushState()
{
// need to copy this manually because of the load delay values
RegAllocState save_state;
save_state.host_reg_state = m_state.host_reg_state;
save_state.callee_saved_order = m_state.callee_saved_order;
save_state.guest_reg_state = m_state.guest_reg_state;
save_state.guest_reg_order = m_state.guest_reg_order;
save_state.available_count = m_state.available_count;
save_state.callee_saved_order_count = m_state.callee_saved_order_count;
save_state.guest_reg_order_count = m_state.guest_reg_order_count;
save_state.allocator_inhibit_count = m_state.allocator_inhibit_count;
save_state.load_delay_register = m_state.load_delay_register;
save_state.load_delay_value.regcache = m_state.load_delay_value.regcache;
save_state.load_delay_value.host_reg = m_state.load_delay_value.host_reg;
save_state.load_delay_value.size = m_state.load_delay_value.size;
save_state.load_delay_value.flags = m_state.load_delay_value.flags;
save_state.next_load_delay_register = m_state.next_load_delay_register;
save_state.next_load_delay_value.regcache = m_state.next_load_delay_value.regcache;
save_state.next_load_delay_value.host_reg = m_state.next_load_delay_value.host_reg;
save_state.next_load_delay_value.size = m_state.next_load_delay_value.size;
save_state.next_load_delay_value.flags = m_state.next_load_delay_value.flags;
m_state_stack.push(std::move(save_state));
}
void RegisterCache::PopState()
{
Assert(!m_state_stack.empty());
// prevent destructor -> freeing of host reg
m_state.load_delay_value.Clear();
m_state.next_load_delay_value.Clear();
m_state = std::move(m_state_stack.top());
m_state_stack.pop();
}
Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */,
HostReg forced_host_reg /* = HostReg_Invalid */)
{
// register zero is always zero
if (guest_reg == Reg::zero)
{
// return a scratch value of zero if it's forced
if (force_host_register)
{
Value temp = AllocateScratch(RegSize_32, forced_host_reg);
m_code_generator.EmitXor(temp.host_reg, temp.host_reg, temp);
return temp;
}
return Value::FromConstantU32(0);
}
Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (cache_value.IsValid())
{
if (cache_value.IsInHostRegister())
{
PushRegisterToOrder(guest_reg);
// if it's in the wrong register, return it as scratch
if (forced_host_reg == HostReg_Invalid || cache_value.GetHostRegister() == forced_host_reg)
return cache_value;
Value temp = AllocateScratch(RegSize_32, forced_host_reg);
m_code_generator.EmitCopyValue(forced_host_reg, cache_value);
return temp;
}
else if (force_host_register)
{
// if it's not in a register, it should be constant
DebugAssert(cache_value.IsConstant());
HostReg host_reg;
if (forced_host_reg == HostReg_Invalid)
{
host_reg = AllocateHostReg();
}
else
{
Assert(!IsHostRegInUse(forced_host_reg));
if (!AllocateHostReg(forced_host_reg))
Panic("Failed to allocate specific host register");
host_reg = forced_host_reg;
}
DEBUG_LOG("Allocated host register {} for constant guest register {} (0x{:X})",
m_code_generator.GetHostRegName(host_reg), GetRegName(guest_reg), cache_value.constant_value);
m_code_generator.EmitCopyValue(host_reg, cache_value);
cache_value.AddHostReg(this, host_reg);
AppendRegisterToOrder(guest_reg);
// if we're forcing a host register, we're probably going to be changing the value,
// in which case the constant won't be correct anyway. so just drop it.
cache_value.ClearConstant();
return cache_value;
}
else
{
// constant
return cache_value;
}
}
HostReg host_reg;
if (forced_host_reg == HostReg_Invalid)
{
host_reg = AllocateHostReg();
}
else
{
Assert(!IsHostRegInUse(forced_host_reg));
if (!AllocateHostReg(forced_host_reg))
Panic("Failed to allocate specific host register");
host_reg = forced_host_reg;
}
m_code_generator.EmitLoadGuestRegister(host_reg, guest_reg);
DEBUG_LOG("Loading guest register {} to host register {}{}", GetRegName(guest_reg),
m_code_generator.GetHostRegName(host_reg, RegSize_32), cache ? " (cached)" : "");
if (cache)
{
// Now in cache.
cache_value.SetHostReg(this, host_reg, RegSize_32);
AppendRegisterToOrder(guest_reg);
return cache_value;
}
else
{
// Skip caching, return the register as a value.
return Value::FromScratch(this, host_reg, RegSize_32);
}
}
Value RegisterCache::ReadGuestRegisterToScratch(Reg guest_reg)
{
HostReg host_reg = AllocateHostReg();
Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (cache_value.IsValid())
{
m_code_generator.EmitCopyValue(host_reg, cache_value);
if (cache_value.IsConstant())
{
DEBUG_LOG("Copying guest register {} from constant 0x{:08X} to scratch host register {}", GetRegName(guest_reg),
static_cast<u32>(cache_value.constant_value), m_code_generator.GetHostRegName(host_reg, RegSize_32));
}
else
{
DEBUG_LOG("Copying guest register {} from {} to scratch host register {}", GetRegName(guest_reg),
m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32),
m_code_generator.GetHostRegName(host_reg, RegSize_32));
}
}
else
{
m_code_generator.EmitLoadGuestRegister(host_reg, guest_reg);
DEBUG_LOG("Loading guest register {} to scratch host register {}", GetRegName(guest_reg),
m_code_generator.GetHostRegName(host_reg, RegSize_32));
}
return Value::FromScratch(this, host_reg, RegSize_32);
}
Value RegisterCache::WriteGuestRegister(Reg guest_reg, Value&& value)
{
// ignore writes to register zero
DebugAssert(value.size == RegSize_32);
if (guest_reg == Reg::zero)
return std::move(value);
// cancel any load delay delay
if (m_state.load_delay_register == guest_reg)
{
DEBUG_LOG("Cancelling load delay of register {} because of non-delayed write", GetRegName(guest_reg));
m_state.load_delay_register = Reg::count;
m_state.load_delay_value.ReleaseAndClear();
}
Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg)
{
// updating the register value.
DEBUG_LOG("Updating guest register {} (in host register {})", GetRegName(guest_reg),
m_code_generator.GetHostRegName(value.host_reg, RegSize_32));
cache_value = std::move(value);
cache_value.SetDirty();
return cache_value;
}
InvalidateGuestRegister(guest_reg);
DebugAssert(!cache_value.IsValid());
if (value.IsConstant())
{
// No need to allocate a host register, and we can defer the store.
cache_value = value;
cache_value.SetDirty();
return cache_value;
}
AppendRegisterToOrder(guest_reg);
// If it's a temporary, we can bind that to the guest register.
if (value.IsScratch())
{
DEBUG_LOG("Binding scratch register {} to guest register {}",
m_code_generator.GetHostRegName(value.host_reg, RegSize_32), GetRegName(guest_reg));
cache_value = std::move(value);
cache_value.flags &= ~ValueFlags::Scratch;
cache_value.SetDirty();
return Value::FromHostReg(this, cache_value.host_reg, RegSize_32);
}
// Allocate host register, and copy value to it.
HostReg host_reg = AllocateHostReg();
m_code_generator.EmitCopyValue(host_reg, value);
cache_value.SetHostReg(this, host_reg, RegSize_32);
cache_value.SetDirty();
DEBUG_LOG("Copying non-scratch register {} to {} to guest register {}",
m_code_generator.GetHostRegName(value.host_reg, RegSize_32),
m_code_generator.GetHostRegName(host_reg, RegSize_32), GetRegName(guest_reg));
return Value::FromHostReg(this, cache_value.host_reg, RegSize_32);
}
void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value)
{
// ignore writes to register zero
DebugAssert(value.size == RegSize_32);
if (guest_reg == Reg::zero)
return;
// two load delays in a row? cancel the first one.
if (guest_reg == m_state.load_delay_register)
{
DEBUG_LOG("Cancelling load delay of register {} due to new load delay", GetRegName(guest_reg));
m_state.load_delay_register = Reg::count;
m_state.load_delay_value.ReleaseAndClear();
}
// two load delay case with interpreter load delay
m_code_generator.EmitCancelInterpreterLoadDelayForReg(guest_reg);
// set up the load delay at the end of this instruction
Value& cache_value = m_state.next_load_delay_value;
Assert(m_state.next_load_delay_register == Reg::count);
m_state.next_load_delay_register = guest_reg;
// If it's a temporary, we can bind that to the guest register.
if (value.IsScratch())
{
DEBUG_LOG("Binding scratch register {} to load-delayed guest register {}",
m_code_generator.GetHostRegName(value.host_reg, RegSize_32), GetRegName(guest_reg));
cache_value = std::move(value);
return;
}
// Allocate host register, and copy value to it.
cache_value = AllocateScratch(RegSize_32);
m_code_generator.EmitCopyValue(cache_value.host_reg, value);
DEBUG_LOG("Copying non-scratch register {} to {} to load-delayed guest register {}",
m_code_generator.GetHostRegName(value.host_reg, RegSize_32),
m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32), GetRegName(guest_reg));
}
void RegisterCache::UpdateLoadDelay()
{
// flush current load delay
if (m_state.load_delay_register != Reg::count)
{
// have to clear first because otherwise it'll release the value
Reg reg = m_state.load_delay_register;
Value value = std::move(m_state.load_delay_value);
m_state.load_delay_register = Reg::count;
WriteGuestRegister(reg, std::move(value));
}
// next load delay -> load delay
if (m_state.next_load_delay_register != Reg::count)
{
m_state.load_delay_register = m_state.next_load_delay_register;
m_state.load_delay_value = std::move(m_state.next_load_delay_value);
m_state.next_load_delay_register = Reg::count;
}
}
void RegisterCache::CancelLoadDelay()
{
if (m_state.load_delay_register == Reg::count)
return;
DEBUG_LOG("Cancelling load delay of register {}", GetRegName(m_state.load_delay_register));
m_state.load_delay_register = Reg::count;
m_state.load_delay_value.ReleaseAndClear();
}
void RegisterCache::WriteLoadDelayToCPU(bool clear)
{
// There shouldn't be a flush at the same time as there's a new load delay.
Assert(m_state.next_load_delay_register == Reg::count);
if (m_state.load_delay_register != Reg::count)
{
DEBUG_LOG("Flushing pending load delay of {}", GetRegName(m_state.load_delay_register));
m_code_generator.EmitStoreInterpreterLoadDelay(m_state.load_delay_register, m_state.load_delay_value);
if (clear)
{
m_state.load_delay_register = Reg::count;
m_state.load_delay_value.ReleaseAndClear();
}
}
}
void RegisterCache::FlushLoadDelay(bool clear)
{
Assert(m_state.next_load_delay_register == Reg::count);
if (m_state.load_delay_register != Reg::count)
{
// if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next
// non-exception-raised path. TODO: push/pop whole state would avoid this issue
m_code_generator.EmitStoreGuestRegister(m_state.load_delay_register, m_state.load_delay_value);
if (clear)
{
m_state.load_delay_register = Reg::count;
m_state.load_delay_value.ReleaseAndClear();
}
}
}
void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty)
{
Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (cache_value.IsDirty())
{
if (cache_value.IsInHostRegister())
{
DEBUG_LOG("Flushing guest register {} from host register {}", GetRegName(guest_reg),
m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32));
}
else if (cache_value.IsConstant())
{
DEBUG_LOG("Flushing guest register {} from constant 0x{:X}", GetRegName(guest_reg), cache_value.constant_value);
}
m_code_generator.EmitStoreGuestRegister(guest_reg, cache_value);
if (clear_dirty)
cache_value.ClearDirty();
}
if (invalidate)
InvalidateGuestRegister(guest_reg);
}
void RegisterCache::InvalidateGuestRegister(Reg guest_reg)
{
Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
if (!cache_value.IsValid())
return;
if (cache_value.IsInHostRegister())
{
FreeHostReg(cache_value.host_reg);
ClearRegisterFromOrder(guest_reg);
}
DEBUG_LOG("Invalidating guest register {}", GetRegName(guest_reg));
cache_value.Clear();
}
void RegisterCache::InvalidateAllNonDirtyGuestRegisters()
{
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
{
Value& cache_value = m_state.guest_reg_state[reg];
if (cache_value.IsValid() && !cache_value.IsDirty())
InvalidateGuestRegister(static_cast<Reg>(reg));
}
}
void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty)
{
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
FlushGuestRegister(static_cast<Reg>(reg), invalidate, clear_dirty);
}
void RegisterCache::FlushCallerSavedGuestRegisters(bool invalidate, bool clear_dirty)
{
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
{
const Value& gr = m_state.guest_reg_state[reg];
if (!gr.IsInHostRegister() ||
(m_state.host_reg_state[gr.GetHostRegister()] & HostRegState::CallerSaved) != HostRegState::CallerSaved)
{
continue;
}
FlushGuestRegister(static_cast<Reg>(reg), invalidate, clear_dirty);
}
}
bool RegisterCache::EvictOneGuestRegister()
{
if (m_state.guest_reg_order_count == 0)
return false;
// evict the register used the longest time ago
Reg evict_reg = m_state.guest_reg_order[m_state.guest_reg_order_count - 1];
DEBUG_LOG("Evicting guest register {}", GetRegName(evict_reg));
FlushGuestRegister(evict_reg, true, true);
return HasFreeHostRegister();
}
void RegisterCache::ClearRegisterFromOrder(Reg reg)
{
for (u32 i = 0; i < m_state.guest_reg_order_count; i++)
{
if (m_state.guest_reg_order[i] == reg)
{
// move the registers after backwards into this spot
const u32 count_after = m_state.guest_reg_order_count - i - 1;
if (count_after > 0)
std::memmove(&m_state.guest_reg_order[i], &m_state.guest_reg_order[i + 1], sizeof(Reg) * count_after);
else
m_state.guest_reg_order[i] = Reg::count;
m_state.guest_reg_order_count--;
return;
}
}
Panic("Clearing register from order not in order");
}
void RegisterCache::PushRegisterToOrder(Reg reg)
{
for (u32 i = 0; i < m_state.guest_reg_order_count; i++)
{
if (m_state.guest_reg_order[i] == reg)
{
// move the registers after backwards into this spot
const u32 count_before = i;
if (count_before > 0)
std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * count_before);
m_state.guest_reg_order[0] = reg;
return;
}
}
Panic("Attempt to push register which is not ordered");
}
void RegisterCache::AppendRegisterToOrder(Reg reg)
{
DebugAssert(m_state.guest_reg_order_count < HostReg_Count);
if (m_state.guest_reg_order_count > 0)
std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * m_state.guest_reg_order_count);
m_state.guest_reg_order[0] = reg;
m_state.guest_reg_order_count++;
}
void RegisterCache::InhibitAllocation()
{
m_state.allocator_inhibit_count++;
}
void RegisterCache::UninhibitAllocation()
{
Assert(m_state.allocator_inhibit_count > 0);
m_state.allocator_inhibit_count--;
}
} // namespace CPU::Recompiler

View File

@ -1,449 +0,0 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
#include "cpu_recompiler_types.h"
#include "cpu_types.h"
#include "common/assert.h"
#if defined(CPU_ARCH_ARM32)
#include "vixl/aarch32/macro-assembler-aarch32.h"
#elif defined(CPU_ARCH_ARM64)
#include "vixl/aarch64/macro-assembler-aarch64.h"
#endif
#include <array>
#include <optional>
#include <stack>
#include <tuple>
namespace CPU::Recompiler {
enum RegSize : u8
{
RegSize_8,
RegSize_16,
RegSize_32,
RegSize_64,
};
#if defined(CPU_ARCH_X64)
using HostReg = unsigned;
using CodeEmitter = Xbyak::CodeGenerator;
using LabelType = Xbyak::Label;
enum : u32
{
HostReg_Count = 16
};
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
constexpr RegSize HostPointerSize = RegSize_64;
#elif defined(CPU_ARCH_ARM32)
using HostReg = unsigned;
using CodeEmitter = vixl::aarch32::MacroAssembler;
using LabelType = vixl::aarch32::Label;
enum : u32
{
HostReg_Count = vixl::aarch32::kNumberOfRegisters
};
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
constexpr RegSize HostPointerSize = RegSize_32;
#elif defined(CPU_ARCH_ARM64)
using HostReg = unsigned;
using CodeEmitter = vixl::aarch64::MacroAssembler;
using LabelType = vixl::aarch64::Label;
enum : u32
{
HostReg_Count = vixl::aarch64::kNumberOfRegisters
};
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
constexpr RegSize HostPointerSize = RegSize_64;
#else
#error Unknown architecture.
#endif
class CodeGenerator;
class RegisterCache;
enum class HostRegState : u8
{
None = 0,
Usable = (1 << 1), // Can be allocated
CallerSaved = (1 << 2), // Register is caller-saved, and should be saved/restored after calling a function.
CalleeSaved = (1 << 3), // Register is callee-saved, and should be restored after leaving the block.
InUse = (1 << 4), // In-use, must be saved/restored across function call.
CalleeSavedAllocated = (1 << 5), // Register was callee-saved and allocated, so should be restored before returning.
Discarded = (1 << 6), // Register contents is not used, so do not preserve across function calls.
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(HostRegState);
enum class ValueFlags : u8
{
None = 0,
Valid = (1 << 0),
Constant = (1 << 1), // The value itself is constant, and not in a register.
InHostRegister = (1 << 2), // The value itself is located in a host register.
Scratch = (1 << 3), // The value is temporary, and will be released after the Value is destroyed.
Dirty = (1 << 4), // For register cache values, the value needs to be written back to the CPU struct.
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(ValueFlags);
struct Value
{
RegisterCache* regcache = nullptr;
u64 constant_value = 0;
HostReg host_reg = {};
RegSize size = RegSize_8;
ValueFlags flags = ValueFlags::None;
Value();
Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_);
Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_);
Value(const Value& other);
Value(Value&& other);
~Value();
Value& operator=(const Value& other);
Value& operator=(Value&& other);
bool IsConstant() const { return (flags & ValueFlags::Constant) != ValueFlags::None; }
bool IsValid() const { return (flags & ValueFlags::Valid) != ValueFlags::None; }
bool IsInHostRegister() const { return (flags & ValueFlags::InHostRegister) != ValueFlags::None; }
bool IsScratch() const { return (flags & ValueFlags::Scratch) != ValueFlags::None; }
/// Returns the host register this value is bound to.
HostReg GetHostRegister() const
{
DebugAssert(IsInHostRegister());
return host_reg;
}
/// Returns true if this value is constant and has the specified value.
bool HasConstantValue(u64 cv) const
{
return (((flags & ValueFlags::Constant) != ValueFlags::None) && constant_value == cv);
}
/// Removes the contents of this value. Use with care, as scratch/temporaries are not released.
void Clear();
/// Releases the host register if needed, and clears the contents.
void ReleaseAndClear();
/// Flags the value is being discarded. Call Undiscard() to track again.
void Discard();
void Undiscard();
void AddHostReg(RegisterCache* regcache_, HostReg hr)
{
DebugAssert(IsValid());
regcache = regcache_;
host_reg = hr;
flags |= ValueFlags::InHostRegister;
}
void SetHostReg(RegisterCache* regcache_, HostReg hr, RegSize size_)
{
regcache = regcache_;
constant_value = 0;
host_reg = hr;
size = size_;
flags = ValueFlags::Valid | ValueFlags::InHostRegister;
}
void ClearConstant()
{
// By clearing the constant bit, we should already be in a host register.
DebugAssert(IsInHostRegister());
flags &= ~ValueFlags::Constant;
}
bool IsDirty() const { return (flags & ValueFlags::Dirty) != ValueFlags::None; }
void SetDirty() { flags |= ValueFlags::Dirty; }
void ClearDirty() { flags &= ~ValueFlags::Dirty; }
/// Returns the same register viewed as a different size.
Value ViewAsSize(RegSize view_size) const
{
if (view_size == size)
return *this;
if (IsConstant())
{
// truncate to size
switch (view_size)
{
case RegSize_8:
return Value::FromConstant(constant_value & UINT64_C(0xFF), RegSize_8);
case RegSize_16:
return Value::FromConstant(constant_value & UINT64_C(0xFFFF), RegSize_16);
case RegSize_32:
return Value::FromConstant(constant_value & UINT64_C(0xFFFFFFFF), RegSize_32);
case RegSize_64:
default:
return Value::FromConstant(constant_value, view_size);
}
}
if (IsInHostRegister())
return Value::FromHostReg(regcache, host_reg, view_size);
// invalid?
return Value();
}
/// Returns the constant value as a signed 32-bit integer, suitable as an immediate.
s32 GetS32ConstantValue() const
{
switch (size)
{
case RegSize_8:
return static_cast<s32>(SignExtend32(Truncate8(constant_value)));
case RegSize_16:
return static_cast<s32>(SignExtend32(Truncate16(constant_value)));
case RegSize_32:
case RegSize_64:
default:
return static_cast<s32>(constant_value);
}
}
/// Returns the constant value as a signed 64-bit integer, suitable as an immediate.
s64 GetS64ConstantValue() const
{
switch (size)
{
case RegSize_8:
return static_cast<s64>(SignExtend64(Truncate8(constant_value)));
case RegSize_16:
return static_cast<s64>(SignExtend64(Truncate16(constant_value)));
case RegSize_32:
return static_cast<s64>(SignExtend64(Truncate32(constant_value)));
case RegSize_64:
default:
return static_cast<s64>(constant_value);
}
}
static Value FromHostReg(RegisterCache* regcache, HostReg reg, RegSize size)
{
return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister);
}
static Value FromScratch(RegisterCache* regcache, HostReg reg, RegSize size)
{
return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister | ValueFlags::Scratch);
}
static Value FromConstant(u64 cv, RegSize size)
{
return Value(nullptr, cv, size, ValueFlags::Valid | ValueFlags::Constant);
}
static Value FromConstantU8(u8 value) { return FromConstant(ZeroExtend64(value), RegSize_8); }
static Value FromConstantU16(u16 value) { return FromConstant(ZeroExtend64(value), RegSize_16); }
static Value FromConstantU32(u32 value) { return FromConstant(ZeroExtend64(value), RegSize_32); }
static Value FromConstantS32(s32 value) { return FromConstant(ZeroExtend64(static_cast<u32>(value)), RegSize_32); }
static Value FromConstantU64(u64 value) { return FromConstant(value, RegSize_64); }
static Value FromConstantPtr(const void* pointer)
{
#if defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_X64)
return FromConstant(static_cast<u64>(reinterpret_cast<uintptr_t>(pointer)), RegSize_64);
#elif defined(CPU_ARCH_ARM32)
return FromConstant(static_cast<u32>(reinterpret_cast<uintptr_t>(pointer)), RegSize_32);
#else
return FromConstant(0, RegSize_32);
#endif
}
private:
void Release();
};
class RegisterCache
{
public:
RegisterCache(CodeGenerator& code_generator);
~RegisterCache();
u32 GetActiveCalleeSavedRegisterCount() const { return m_state.callee_saved_order_count; }
//////////////////////////////////////////////////////////////////////////
// Register Allocation
//////////////////////////////////////////////////////////////////////////
void SetHostRegAllocationOrder(std::initializer_list<HostReg> regs);
void SetCallerSavedHostRegs(std::initializer_list<HostReg> regs);
void SetCalleeSavedHostRegs(std::initializer_list<HostReg> regs);
void SetCPUPtrHostReg(HostReg reg);
/// Returns true if the register is permitted to be used in the register cache.
bool IsUsableHostReg(HostReg reg) const;
bool IsHostRegInUse(HostReg reg) const;
bool HasFreeHostRegister() const;
u32 GetUsedHostRegisters() const;
u32 GetFreeHostRegisters() const;
/// Allocates a new host register. If there are no free registers, the guest register which was accessed the longest
/// time ago will be evicted.
HostReg AllocateHostReg(HostRegState state = HostRegState::InUse);
/// Allocates a specific host register. If this register is not free, returns false.
bool AllocateHostReg(HostReg reg, HostRegState state = HostRegState::InUse);
/// Flags the host register as discard-able. This means that the contents is no longer required, and will not be
/// pushed when saving caller-saved registers.
void DiscardHostReg(HostReg reg);
/// Clears the discard-able flag on a host register, so that the contents will be preserved across function calls.
void UndiscardHostReg(HostReg reg);
/// Frees a host register, making it usable in future allocations.
void FreeHostReg(HostReg reg);
/// Ensures a host register is free, removing any value cached.
void EnsureHostRegFree(HostReg reg);
/// Preallocates caller saved registers, enabling later use without stack pushes.
void ReserveCallerSavedRegisters();
/// Push/pop volatile host registers. Returns the number of registers pushed/popped.
u32 PushCallerSavedRegisters() const;
u32 PopCallerSavedRegisters() const;
/// Restore callee-saved registers. Call at the end of the function.
u32 PopCalleeSavedRegisters(bool commit);
/// Preallocates caller saved registers, enabling later use without stack pushes.
void ReserveCalleeSavedRegisters();
/// Removes the callee-saved register flag from all registers. Call when compiling code blocks.
void AssumeCalleeSavedRegistersAreSaved();
/// Pushes the register allocator state, use when entering branched code.
void PushState();
/// Pops the register allocator state, use when leaving branched code.
void PopState();
//////////////////////////////////////////////////////////////////////////
// Scratch Register Allocation
//////////////////////////////////////////////////////////////////////////
Value GetCPUPtr();
Value AllocateScratch(RegSize size, HostReg reg = HostReg_Invalid);
//////////////////////////////////////////////////////////////////////////
// Guest Register Caching
//////////////////////////////////////////////////////////////////////////
/// Returns true if the specified guest register is cached.
bool IsGuestRegisterCached(Reg guest_reg) const
{
const Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
return cache_value.IsConstant() || cache_value.IsInHostRegister();
}
/// Returns true if the specified guest register is cached and in a host register.
bool IsGuestRegisterInHostRegister(Reg guest_reg) const
{
const Value& cache_value = m_state.guest_reg_state[static_cast<u8>(guest_reg)];
return cache_value.IsInHostRegister();
}
/// Returns the host register if the guest register is cached.
std::optional<HostReg> GetHostRegisterForGuestRegister(Reg guest_reg) const
{
if (!m_state.guest_reg_state[static_cast<u8>(guest_reg)].IsInHostRegister())
return std::nullopt;
return m_state.guest_reg_state[static_cast<u8>(guest_reg)].GetHostRegister();
}
/// Returns true if there is a load delay which will be stored at the end of the instruction.
bool HasLoadDelay() const { return m_state.load_delay_register != Reg::count; }
Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false,
HostReg forced_host_reg = HostReg_Invalid);
/// Reads the guest register to a caller-owned scratch register. This will ensure the cache won't invalidate the value
/// from some other write.
Value ReadGuestRegisterToScratch(Reg guest_reg);
/// Creates a copy of value, and stores it to guest_reg.
Value WriteGuestRegister(Reg guest_reg, Value&& value);
/// Stores the specified value to the guest register after the next instruction (load delay).
void WriteGuestRegisterDelayed(Reg guest_reg, Value&& value);
/// Returns the current target for a load delay, or Reg::count.
Reg GetLoadDelayRegister() const { return m_state.load_delay_register; }
const Value& GetLoadDelayValue() const { return m_state.load_delay_value; }
/// Moves load delay to the next load delay, and writes any previous load delay to the destination register.
void UpdateLoadDelay();
/// Cancels any present load delay.
void CancelLoadDelay();
/// Writes the load delay to the CPU structure, so it is synced up with the interpreter.
void WriteLoadDelayToCPU(bool clear);
/// Flushes the load delay, i.e. writes it to the destination register.
void FlushLoadDelay(bool clear);
void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty);
void InvalidateGuestRegister(Reg guest_reg);
void InvalidateAllNonDirtyGuestRegisters();
void FlushAllGuestRegisters(bool invalidate, bool clear_dirty);
void FlushCallerSavedGuestRegisters(bool invalidate, bool clear_dirty);
bool EvictOneGuestRegister();
/// Temporarily prevents register allocation.
void InhibitAllocation();
void UninhibitAllocation();
private:
void ClearRegisterFromOrder(Reg reg);
void PushRegisterToOrder(Reg reg);
void AppendRegisterToOrder(Reg reg);
CodeGenerator& m_code_generator;
std::array<HostReg, HostReg_Count> m_host_register_allocation_order{};
HostReg m_cpu_ptr_host_register = {};
struct RegAllocState
{
std::array<HostRegState, HostReg_Count> host_reg_state{};
std::array<HostReg, HostReg_Count> callee_saved_order{};
std::array<Value, static_cast<u8>(Reg::count)> guest_reg_state{};
std::array<Reg, HostReg_Count> guest_reg_order{};
u32 available_count = 0;
u32 callee_saved_order_count = 0;
u32 guest_reg_order_count = 0;
u32 allocator_inhibit_count = 0;
Reg load_delay_register = Reg::count;
Value load_delay_value{};
Reg next_load_delay_register = Reg::count;
Value next_load_delay_value{};
} m_state;
std::stack<RegAllocState> m_state_stack;
};
} // namespace CPU::Recompiler

View File

@ -1,7 +1,7 @@
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0 // SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "cpu_newrec_compiler_riscv64.h" #include "cpu_recompiler_riscv64.h"
#include "cpu_code_cache_private.h" #include "cpu_code_cache_private.h"
#include "cpu_core_private.h" #include "cpu_core_private.h"
#include "cpu_pgxp.h" #include "cpu_pgxp.h"
@ -33,7 +33,7 @@ extern "C" {
static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr
namespace CPU::NewRec { namespace CPU::Recompiler {
using namespace biscuit; using namespace biscuit;
@ -53,10 +53,10 @@ using CPU::Recompiler::rvIsCallerSavedRegister;
using CPU::Recompiler::rvIsValidSExtITypeImm; using CPU::Recompiler::rvIsValidSExtITypeImm;
using CPU::Recompiler::rvMoveAddressToReg; using CPU::Recompiler::rvMoveAddressToReg;
RISCV64Compiler s_instance; RISCV64Recompiler s_instance;
Compiler* g_compiler = &s_instance; Recompiler* g_compiler = &s_instance;
} // namespace CPU::NewRec } // namespace CPU::Recompiler
bool CPU::Recompiler::rvIsCallerSavedRegister(u32 id) bool CPU::Recompiler::rvIsCallerSavedRegister(u32 id)
{ {
@ -332,19 +332,19 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
return BLOCK_LINK_SIZE; return BLOCK_LINK_SIZE;
} }
CPU::NewRec::RISCV64Compiler::RISCV64Compiler() = default; CPU::Recompiler::RISCV64Recompiler::RISCV64Recompiler() = default;
CPU::NewRec::RISCV64Compiler::~RISCV64Compiler() = default; CPU::Recompiler::RISCV64Recompiler::~RISCV64Recompiler() = default;
const void* CPU::NewRec::RISCV64Compiler::GetCurrentCodePointer() const void* CPU::Recompiler::RISCV64Recompiler::GetCurrentCodePointer()
{ {
return rvAsm->GetCursorPointer(); return rvAsm->GetCursorPointer();
} }
void CPU::NewRec::RISCV64Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, void CPU::Recompiler::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space,
u8* far_code_buffer, u32 far_code_space) u8* far_code_buffer, u32 far_code_space)
{ {
Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space);
// TODO: don't recreate this every time.. // TODO: don't recreate this every time..
DebugAssert(!m_emitter && !m_far_emitter && !rvAsm); DebugAssert(!m_emitter && !m_far_emitter && !rvAsm);
@ -370,7 +370,7 @@ void CPU::NewRec::RISCV64Compiler::Reset(CodeCache::Block* block, u8* code_buffe
} }
} }
void CPU::NewRec::RISCV64Compiler::SwitchToFarCode( void CPU::Recompiler::RISCV64Recompiler::SwitchToFarCode(
bool emit_jump, bool emit_jump,
void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) /* = nullptr */, void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) /* = nullptr */,
const biscuit::GPR& rs1 /* = biscuit::zero */, const biscuit::GPR& rs2 /* = biscuit::zero */) const biscuit::GPR& rs1 /* = biscuit::zero */, const biscuit::GPR& rs2 /* = biscuit::zero */)
@ -394,7 +394,7 @@ void CPU::NewRec::RISCV64Compiler::SwitchToFarCode(
rvAsm = m_far_emitter.get(); rvAsm = m_far_emitter.get();
} }
void CPU::NewRec::RISCV64Compiler::SwitchToNearCode(bool emit_jump) void CPU::Recompiler::RISCV64Recompiler::SwitchToNearCode(bool emit_jump)
{ {
DebugAssert(rvAsm == m_far_emitter.get()); DebugAssert(rvAsm == m_far_emitter.get());
if (emit_jump) if (emit_jump)
@ -402,19 +402,19 @@ void CPU::NewRec::RISCV64Compiler::SwitchToNearCode(bool emit_jump)
rvAsm = m_emitter.get(); rvAsm = m_emitter.get();
} }
void CPU::NewRec::RISCV64Compiler::EmitMov(const biscuit::GPR& dst, u32 val) void CPU::Recompiler::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val)
{ {
rvEmitMov(rvAsm, dst, val); rvEmitMov(rvAsm, dst, val);
} }
void CPU::NewRec::RISCV64Compiler::EmitCall(const void* ptr) void CPU::Recompiler::RISCV64Recompiler::EmitCall(const void* ptr)
{ {
rvEmitCall(rvAsm, ptr); rvEmitCall(rvAsm, ptr);
} }
void CPU::NewRec::RISCV64Compiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm, void CPU::Recompiler::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,
void (biscuit::Assembler::*iop)(GPR, GPR, u32), void (biscuit::Assembler::*iop)(GPR, GPR, u32),
void (biscuit::Assembler::*rop)(GPR, GPR, GPR)) void (biscuit::Assembler::*rop)(GPR, GPR, GPR))
{ {
DebugAssert(rd != RSCRATCH && rs != RSCRATCH); DebugAssert(rd != RSCRATCH && rs != RSCRATCH);
@ -428,83 +428,83 @@ void CPU::NewRec::RISCV64Compiler::SafeImmSExtIType(const biscuit::GPR& rd, cons
(rvAsm->*rop)(rd, rs, RSCRATCH); (rvAsm->*rop)(rd, rs, RSCRATCH);
} }
void CPU::NewRec::RISCV64Compiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDI), SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDI),
&Assembler::ADD); &Assembler::ADD);
} }
void CPU::NewRec::RISCV64Compiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW), SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),
&Assembler::ADDW); &Assembler::ADDW);
} }
void CPU::NewRec::RISCV64Compiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
const u32 nimm = static_cast<u32>(-static_cast<s32>(imm)); const u32 nimm = static_cast<u32>(-static_cast<s32>(imm));
SafeImmSExtIType(rd, rs, nimm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW), SafeImmSExtIType(rd, rs, nimm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),
&Assembler::ADDW); &Assembler::ADDW);
} }
void CPU::NewRec::RISCV64Compiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND); SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND);
} }
void CPU::NewRec::RISCV64Compiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR); SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR);
} }
void CPU::NewRec::RISCV64Compiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR); SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR);
} }
void CPU::NewRec::RISCV64Compiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTI), SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTI),
&Assembler::SLT); &Assembler::SLT);
} }
void CPU::NewRec::RISCV64Compiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) void CPU::Recompiler::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
{ {
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTIU), SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTIU),
&Assembler::SLTU); &Assembler::SLTU);
} }
void CPU::NewRec::RISCV64Compiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) void CPU::Recompiler::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)
{ {
rvEmitSExtB(rvAsm, rd, rs); rvEmitSExtB(rvAsm, rd, rs);
} }
void CPU::NewRec::RISCV64Compiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) void CPU::Recompiler::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)
{ {
rvEmitUExtB(rvAsm, rd, rs); rvEmitUExtB(rvAsm, rd, rs);
} }
void CPU::NewRec::RISCV64Compiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) void CPU::Recompiler::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)
{ {
rvEmitSExtH(rvAsm, rd, rs); rvEmitSExtH(rvAsm, rd, rs);
} }
void CPU::NewRec::RISCV64Compiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) void CPU::Recompiler::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)
{ {
rvEmitUExtH(rvAsm, rd, rs); rvEmitUExtH(rvAsm, rd, rs);
} }
void CPU::NewRec::RISCV64Compiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) void CPU::Recompiler::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)
{ {
rvEmitDSExtW(rvAsm, rd, rs); rvEmitDSExtW(rvAsm, rd, rs);
} }
void CPU::NewRec::RISCV64Compiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) void CPU::Recompiler::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)
{ {
rvEmitDUExtW(rvAsm, rd, rs); rvEmitDUExtW(rvAsm, rd, rs);
} }
void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) void CPU::Recompiler::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
{ {
// store it first to reduce code size, because we can offset // store it first to reduce code size, because we can offset
// TODO: 64-bit displacement is needed :/ // TODO: 64-bit displacement is needed :/
@ -543,7 +543,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr,
rvAsm->Bind(&block_unchanged); rvAsm->Bind(&block_unchanged);
} }
void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate() void CPU::Recompiler::RISCV64Recompiler::GenerateICacheCheckAndUpdate()
{ {
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
{ {
@ -599,8 +599,8 @@ void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate()
} }
} }
void CPU::NewRec::RISCV64Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, void CPU::Recompiler::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/,
s32 arg3reg /*= -1*/) s32 arg3reg /*= -1*/)
{ {
if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.Index())) if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.Index()))
rvAsm->MV(RARG1, GPR(arg1reg)); rvAsm->MV(RARG1, GPR(arg1reg));
@ -611,7 +611,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateCall(const void* func, s32 arg1reg /*
EmitCall(func); EmitCall(func);
} }
void CPU::NewRec::RISCV64Compiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test) void CPU::Recompiler::RISCV64Recompiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test)
{ {
if (newpc.has_value()) if (newpc.has_value())
{ {
@ -628,7 +628,7 @@ void CPU::NewRec::RISCV64Compiler::EndBlock(const std::optional<u32>& newpc, boo
EndAndLinkBlock(newpc, do_event_test, false); EndAndLinkBlock(newpc, do_event_test, false);
} }
void CPU::NewRec::RISCV64Compiler::EndBlockWithException(Exception excode) void CPU::Recompiler::RISCV64Recompiler::EndBlockWithException(Exception excode)
{ {
// flush regs, but not pc, it's going to get overwritten // flush regs, but not pc, it's going to get overwritten
// flush cycles because of the GTE instruction stuff... // flush cycles because of the GTE instruction stuff...
@ -646,8 +646,8 @@ void CPU::NewRec::RISCV64Compiler::EndBlockWithException(Exception excode)
EndAndLinkBlock(std::nullopt, true, false); EndAndLinkBlock(std::nullopt, true, false);
} }
void CPU::NewRec::RISCV64Compiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, void CPU::Recompiler::RISCV64Recompiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test,
bool force_run_events) bool force_run_events)
{ {
// event test // event test
// pc should've been flushed // pc should've been flushed
@ -711,7 +711,7 @@ void CPU::NewRec::RISCV64Compiler::EndAndLinkBlock(const std::optional<u32>& new
} }
} }
const void* CPU::NewRec::RISCV64Compiler::EndCompile(u32* code_size, u32* far_code_size) const void* CPU::Recompiler::RISCV64Recompiler::EndCompile(u32* code_size, u32* far_code_size)
{ {
u8* const code = m_emitter->GetBufferPointer(0); u8* const code = m_emitter->GetBufferPointer(0);
*code_size = static_cast<u32>(m_emitter->GetCodeBuffer().GetSizeInBytes()); *code_size = static_cast<u32>(m_emitter->GetCodeBuffer().GetSizeInBytes());
@ -722,7 +722,7 @@ const void* CPU::NewRec::RISCV64Compiler::EndCompile(u32* code_size, u32* far_co
return code; return code;
} }
const char* CPU::NewRec::RISCV64Compiler::GetHostRegName(u32 reg) const const char* CPU::Recompiler::RISCV64Recompiler::GetHostRegName(u32 reg) const
{ {
static constexpr std::array<const char*, 32> reg64_names = { static constexpr std::array<const char*, 32> reg64_names = {
{"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", {"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
@ -730,22 +730,22 @@ const char* CPU::NewRec::RISCV64Compiler::GetHostRegName(u32 reg) const
return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN";
} }
void CPU::NewRec::RISCV64Compiler::LoadHostRegWithConstant(u32 reg, u32 val) void CPU::Recompiler::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val)
{ {
EmitMov(GPR(reg), val); EmitMov(GPR(reg), val);
} }
void CPU::NewRec::RISCV64Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) void CPU::Recompiler::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr)
{ {
rvAsm->LW(GPR(reg), PTR(ptr)); rvAsm->LW(GPR(reg), PTR(ptr));
} }
void CPU::NewRec::RISCV64Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) void CPU::Recompiler::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr)
{ {
rvAsm->SW(GPR(reg), PTR(ptr)); rvAsm->SW(GPR(reg), PTR(ptr));
} }
void CPU::NewRec::RISCV64Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) void CPU::Recompiler::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr)
{ {
if (val == 0) if (val == 0)
{ {
@ -757,23 +757,23 @@ void CPU::NewRec::RISCV64Compiler::StoreConstantToCPUPointer(u32 val, const void
rvAsm->SW(RSCRATCH, PTR(ptr)); rvAsm->SW(RSCRATCH, PTR(ptr));
} }
void CPU::NewRec::RISCV64Compiler::CopyHostReg(u32 dst, u32 src) void CPU::Recompiler::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src)
{ {
if (src != dst) if (src != dst)
rvAsm->MV(GPR(dst), GPR(src)); rvAsm->MV(GPR(dst), GPR(src));
} }
void CPU::NewRec::RISCV64Compiler::AssertRegOrConstS(CompileFlags cf) const void CPU::Recompiler::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const
{ {
DebugAssert(cf.valid_host_s || cf.const_s); DebugAssert(cf.valid_host_s || cf.const_s);
} }
void CPU::NewRec::RISCV64Compiler::AssertRegOrConstT(CompileFlags cf) const void CPU::Recompiler::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const
{ {
DebugAssert(cf.valid_host_t || cf.const_t); DebugAssert(cf.valid_host_t || cf.const_t);
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg) biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg)
{ {
if (cf.valid_host_s) if (cf.valid_host_s)
{ {
@ -795,7 +795,7 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegS(CompileFlags cf, const
} }
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg) biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg)
{ {
if (cf.valid_host_t) if (cf.valid_host_t)
{ {
@ -817,37 +817,37 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegT(CompileFlags cf, const
} }
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegD(CompileFlags cf) const biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const
{ {
DebugAssert(cf.valid_host_d); DebugAssert(cf.valid_host_d);
return GPR(cf.host_d); return GPR(cf.host_d);
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegS(CompileFlags cf) const biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const
{ {
DebugAssert(cf.valid_host_s); DebugAssert(cf.valid_host_s);
return GPR(cf.host_s); return GPR(cf.host_s);
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegT(CompileFlags cf) const biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const
{ {
DebugAssert(cf.valid_host_t); DebugAssert(cf.valid_host_t);
return GPR(cf.host_t); return GPR(cf.host_t);
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegLO(CompileFlags cf) const biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const
{ {
DebugAssert(cf.valid_host_lo); DebugAssert(cf.valid_host_lo);
return GPR(cf.host_lo); return GPR(cf.host_lo);
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegHI(CompileFlags cf) const biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const
{ {
DebugAssert(cf.valid_host_hi); DebugAssert(cf.valid_host_hi);
return GPR(cf.host_hi); return GPR(cf.host_hi);
} }
void CPU::NewRec::RISCV64Compiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf)
{ {
if (cf.valid_host_s) if (cf.valid_host_s)
{ {
@ -865,7 +865,7 @@ void CPU::NewRec::RISCV64Compiler::MoveSToReg(const biscuit::GPR& dst, CompileFl
} }
} }
void CPU::NewRec::RISCV64Compiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf)
{ {
if (cf.valid_host_t) if (cf.valid_host_t)
{ {
@ -883,10 +883,10 @@ void CPU::NewRec::RISCV64Compiler::MoveTToReg(const biscuit::GPR& dst, CompileFl
} }
} }
void CPU::NewRec::RISCV64Compiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg) void CPU::Recompiler::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg)
{ {
DebugAssert(reg < Reg::count); DebugAssert(reg < Reg::count);
if (const std::optional<u32> hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) if (const std::optional<u32> hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg))
rvAsm->MV(dst, GPR(hreg.value())); rvAsm->MV(dst, GPR(hreg.value()));
else if (HasConstantReg(reg)) else if (HasConstantReg(reg))
EmitMov(dst, GetConstantRegU32(reg)); EmitMov(dst, GetConstantRegU32(reg));
@ -894,9 +894,9 @@ void CPU::NewRec::RISCV64Compiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg
rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast<u8>(reg)])); rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast<u8>(reg)]));
} }
void CPU::NewRec::RISCV64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, void CPU::Recompiler::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val,
Reg arg2reg /* = Reg::count */, Reg arg2reg /* = Reg::count */,
Reg arg3reg /* = Reg::count */) Reg arg3reg /* = Reg::count */)
{ {
DebugAssert(g_settings.gpu_pgxp_enable); DebugAssert(g_settings.gpu_pgxp_enable);
@ -911,9 +911,9 @@ void CPU::NewRec::RISCV64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func
EmitCall(func); EmitCall(func);
} }
void CPU::NewRec::RISCV64Compiler::Flush(u32 flags) void CPU::Recompiler::RISCV64Recompiler::Flush(u32 flags)
{ {
Compiler::Flush(flags); Recompiler::Flush(flags);
if (flags & FLUSH_PC && m_dirty_pc) if (flags & FLUSH_PC && m_dirty_pc)
{ {
@ -1000,7 +1000,7 @@ void CPU::NewRec::RISCV64Compiler::Flush(u32 flags)
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_Fallback() void CPU::Recompiler::RISCV64Recompiler::Compile_Fallback()
{ {
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits);
@ -1028,7 +1028,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_Fallback()
#endif #endif
} }
void CPU::NewRec::RISCV64Compiler::CheckBranchTarget(const biscuit::GPR& pcreg) void CPU::Recompiler::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg)
{ {
if (!g_settings.cpu_recompiler_memory_exceptions) if (!g_settings.cpu_recompiler_memory_exceptions)
return; return;
@ -1044,7 +1044,7 @@ void CPU::NewRec::RISCV64Compiler::CheckBranchTarget(const biscuit::GPR& pcreg)
SwitchToNearCode(false); SwitchToNearCode(false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_jr(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_jr(CompileFlags cf)
{ {
const GPR pcreg = CFGetRegS(cf); const GPR pcreg = CFGetRegS(cf);
CheckBranchTarget(pcreg); CheckBranchTarget(pcreg);
@ -1055,7 +1055,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_jr(CompileFlags cf)
EndBlock(std::nullopt, true); EndBlock(std::nullopt, true);
} }
void CPU::NewRec::RISCV64Compiler::Compile_jalr(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_jalr(CompileFlags cf)
{ {
const GPR pcreg = CFGetRegS(cf); const GPR pcreg = CFGetRegS(cf);
if (MipsD() != Reg::zero) if (MipsD() != Reg::zero)
@ -1068,7 +1068,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_jalr(CompileFlags cf)
EndBlock(std::nullopt, true); EndBlock(std::nullopt, true);
} }
void CPU::NewRec::RISCV64Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) void CPU::Recompiler::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond)
{ {
AssertRegOrConstS(cf); AssertRegOrConstS(cf);
@ -1098,7 +1098,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_bxx(CompileFlags cf, BranchCondition
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1; const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1;
if (!cf.valid_host_t) if (!cf.valid_host_t)
MoveTToReg(RARG1, cf); MoveTToReg(RARG1, cf);
if (cond == Compiler::BranchCondition::Equal) if (cond == Recompiler::BranchCondition::Equal)
rvAsm->BEQ(rs, rt, &taken); rvAsm->BEQ(rs, rt, &taken);
else else
rvAsm->BNE(rs, rt, &taken); rvAsm->BNE(rs, rt, &taken);
@ -1146,7 +1146,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_bxx(CompileFlags cf, BranchCondition
EndBlock(taken_pc, true); EndBlock(taken_pc, true);
} }
void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf, bool overflow) void CPU::Recompiler::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow)
{ {
const GPR rs = CFGetRegS(cf); const GPR rs = CFGetRegS(cf);
const GPR rt = CFGetRegT(cf); const GPR rt = CFGetRegT(cf);
@ -1169,27 +1169,27 @@ void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf, bool overflow)
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_addi(CompileFlags cf)
{ {
Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions);
} }
void CPU::NewRec::RISCV64Compiler::Compile_addiu(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_addiu(CompileFlags cf)
{ {
Compile_addi(cf, false); Compile_addi(cf, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_slti(CompileFlags cf)
{ {
Compile_slti(cf, true); Compile_slti(cf, true);
} }
void CPU::NewRec::RISCV64Compiler::Compile_sltiu(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_sltiu(CompileFlags cf)
{ {
Compile_slti(cf, false); Compile_slti(cf, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf, bool sign) void CPU::Recompiler::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign)
{ {
if (sign) if (sign)
SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());
@ -1197,7 +1197,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf, bool sign)
SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());
} }
void CPU::NewRec::RISCV64Compiler::Compile_andi(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_andi(CompileFlags cf)
{ {
const GPR rt = CFGetRegT(cf); const GPR rt = CFGetRegT(cf);
if (const u32 imm = inst->i.imm_zext32(); imm != 0) if (const u32 imm = inst->i.imm_zext32(); imm != 0)
@ -1206,7 +1206,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_andi(CompileFlags cf)
EmitMov(rt, 0); EmitMov(rt, 0);
} }
void CPU::NewRec::RISCV64Compiler::Compile_ori(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_ori(CompileFlags cf)
{ {
const GPR rt = CFGetRegT(cf); const GPR rt = CFGetRegT(cf);
const GPR rs = CFGetRegS(cf); const GPR rs = CFGetRegS(cf);
@ -1216,7 +1216,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_ori(CompileFlags cf)
rvAsm->MV(rt, rs); rvAsm->MV(rt, rs);
} }
void CPU::NewRec::RISCV64Compiler::Compile_xori(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_xori(CompileFlags cf)
{ {
const GPR rt = CFGetRegT(cf); const GPR rt = CFGetRegT(cf);
const GPR rs = CFGetRegS(cf); const GPR rs = CFGetRegS(cf);
@ -1226,7 +1226,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_xori(CompileFlags cf)
rvAsm->MV(rt, rs); rvAsm->MV(rt, rs);
} }
void CPU::NewRec::RISCV64Compiler::Compile_shift( void CPU::Recompiler::RISCV64Recompiler::Compile_shift(
CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))
{ {
@ -1238,22 +1238,22 @@ void CPU::NewRec::RISCV64Compiler::Compile_shift(
rvAsm->MV(rd, rt); rvAsm->MV(rd, rt);
} }
void CPU::NewRec::RISCV64Compiler::Compile_sll(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_sll(CompileFlags cf)
{ {
Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);
} }
void CPU::NewRec::RISCV64Compiler::Compile_srl(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_srl(CompileFlags cf)
{ {
Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);
} }
void CPU::NewRec::RISCV64Compiler::Compile_sra(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_sra(CompileFlags cf)
{ {
Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);
} }
void CPU::NewRec::RISCV64Compiler::Compile_variable_shift( void CPU::Recompiler::RISCV64Recompiler::Compile_variable_shift(
CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))
{ {
@ -1279,22 +1279,22 @@ void CPU::NewRec::RISCV64Compiler::Compile_variable_shift(
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_sllv(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_sllv(CompileFlags cf)
{ {
Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);
} }
void CPU::NewRec::RISCV64Compiler::Compile_srlv(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_srlv(CompileFlags cf)
{ {
Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);
} }
void CPU::NewRec::RISCV64Compiler::Compile_srav(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_srav(CompileFlags cf)
{ {
Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);
} }
void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf, bool sign) void CPU::Recompiler::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign)
{ {
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
if (!cf.valid_host_s) if (!cf.valid_host_s)
@ -1325,17 +1325,17 @@ void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf, bool sign)
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_mult(CompileFlags cf)
{ {
Compile_mult(cf, true); Compile_mult(cf, true);
} }
void CPU::NewRec::RISCV64Compiler::Compile_multu(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_multu(CompileFlags cf)
{ {
Compile_mult(cf, false); Compile_mult(cf, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_div(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_div(CompileFlags cf)
{ {
// 36 Volume I: RISC-V User-Level ISA V2.2 // 36 Volume I: RISC-V User-Level ISA V2.2
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
@ -1375,7 +1375,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_div(CompileFlags cf)
rvAsm->Bind(&done); rvAsm->Bind(&done);
} }
void CPU::NewRec::RISCV64Compiler::Compile_divu(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_divu(CompileFlags cf)
{ {
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
if (!cf.valid_host_s) if (!cf.valid_host_s)
@ -1393,8 +1393,8 @@ void CPU::NewRec::RISCV64Compiler::Compile_divu(CompileFlags cf)
rvAsm->REMUW(rhi, rs, rt); rvAsm->REMUW(rhi, rs, rt);
} }
void CPU::NewRec::RISCV64Compiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, void CPU::Recompiler::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res,
const biscuit::GPR& reg_to_discard) const biscuit::GPR& reg_to_discard)
{ {
SwitchToFarCode(true, &Assembler::BEQ, long_res, res); SwitchToFarCode(true, &Assembler::BEQ, long_res, res);
@ -1410,9 +1410,9 @@ void CPU::NewRec::RISCV64Compiler::TestOverflow(const biscuit::GPR& long_res, co
SwitchToNearCode(false); SwitchToNearCode(false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_dst_op( void CPU::Recompiler::RISCV64Recompiler::Compile_dst_op(
CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),
void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow) void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow)
{ {
AssertRegOrConstS(cf); AssertRegOrConstS(cf);
@ -1476,29 +1476,29 @@ void CPU::NewRec::RISCV64Compiler::Compile_dst_op(
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_add(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_add(CompileFlags cf)
{ {
Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Compiler::SafeADDIW, &Assembler::ADD, true, Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true,
g_settings.cpu_recompiler_memory_exceptions); g_settings.cpu_recompiler_memory_exceptions);
} }
void CPU::NewRec::RISCV64Compiler::Compile_addu(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_addu(CompileFlags cf)
{ {
Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Compiler::SafeADDIW, &Assembler::ADD, true, false); Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_sub(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_sub(CompileFlags cf)
{ {
Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Compiler::SafeSUBIW, &Assembler::SUB, false, Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false,
g_settings.cpu_recompiler_memory_exceptions); g_settings.cpu_recompiler_memory_exceptions);
} }
void CPU::NewRec::RISCV64Compiler::Compile_subu(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_subu(CompileFlags cf)
{ {
Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Compiler::SafeSUBIW, &Assembler::SUB, false, false); Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_and(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_and(CompileFlags cf)
{ {
AssertRegOrConstS(cf); AssertRegOrConstS(cf);
AssertRegOrConstT(cf); AssertRegOrConstT(cf);
@ -1516,10 +1516,10 @@ void CPU::NewRec::RISCV64Compiler::Compile_and(CompileFlags cf)
return; return;
} }
Compile_dst_op(cf, &Assembler::AND, &RISCV64Compiler::SafeANDI, &Assembler::AND, true, false); Compile_dst_op(cf, &Assembler::AND, &RISCV64Recompiler::SafeANDI, &Assembler::AND, true, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_or(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_or(CompileFlags cf)
{ {
AssertRegOrConstS(cf); AssertRegOrConstS(cf);
AssertRegOrConstT(cf); AssertRegOrConstT(cf);
@ -1532,10 +1532,10 @@ void CPU::NewRec::RISCV64Compiler::Compile_or(CompileFlags cf)
return; return;
} }
Compile_dst_op(cf, &Assembler::OR, &RISCV64Compiler::SafeORI, &Assembler::OR, true, false); Compile_dst_op(cf, &Assembler::OR, &RISCV64Recompiler::SafeORI, &Assembler::OR, true, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_xor(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_xor(CompileFlags cf)
{ {
AssertRegOrConstS(cf); AssertRegOrConstS(cf);
AssertRegOrConstT(cf); AssertRegOrConstT(cf);
@ -1554,26 +1554,26 @@ void CPU::NewRec::RISCV64Compiler::Compile_xor(CompileFlags cf)
return; return;
} }
Compile_dst_op(cf, &Assembler::XOR, &RISCV64Compiler::SafeXORI, &Assembler::XOR, true, false); Compile_dst_op(cf, &Assembler::XOR, &RISCV64Recompiler::SafeXORI, &Assembler::XOR, true, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_nor(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_nor(CompileFlags cf)
{ {
Compile_or(cf); Compile_or(cf);
rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf)); rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf));
} }
void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_slt(CompileFlags cf)
{ {
Compile_slt(cf, true); Compile_slt(cf, true);
} }
void CPU::NewRec::RISCV64Compiler::Compile_sltu(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_sltu(CompileFlags cf)
{ {
Compile_slt(cf, false); Compile_slt(cf, false);
} }
void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf, bool sign) void CPU::Recompiler::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign)
{ {
AssertRegOrConstS(cf); AssertRegOrConstS(cf);
AssertRegOrConstT(cf); AssertRegOrConstT(cf);
@ -1598,7 +1598,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf, bool sign)
} }
} }
biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg( biscuit::GPR CPU::Recompiler::RISCV64Recompiler::ComputeLoadStoreAddressArg(
CompileFlags cf, const std::optional<VirtualMemoryAddress>& address, const std::optional<const biscuit::GPR>& reg) CompileFlags cf, const std::optional<VirtualMemoryAddress>& address, const std::optional<const biscuit::GPR>& reg)
{ {
const u32 imm = inst->i.imm_sext32(); const u32 imm = inst->i.imm_sext32();
@ -1639,8 +1639,9 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg(
} }
template<typename RegAllocFn> template<typename RegAllocFn>
biscuit::GPR CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, biscuit::GPR CPU::Recompiler::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size,
bool use_fastmem, const RegAllocFn& dst_reg_alloc) bool sign, bool use_fastmem,
const RegAllocFn& dst_reg_alloc)
{ {
if (use_fastmem) if (use_fastmem)
{ {
@ -1769,8 +1770,8 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr
return dst_reg; return dst_reg;
} }
void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, void CPU::Recompiler::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg,
MemoryAccessSize size, bool use_fastmem) MemoryAccessSize size, bool use_fastmem)
{ {
if (use_fastmem) if (use_fastmem)
{ {
@ -1869,8 +1870,9 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, void CPU::Recompiler::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{ {
const std::optional<GPR> addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ? const std::optional<GPR> addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ?
std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) :
@ -1897,8 +1899,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, void CPU::Recompiler::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{ {
DebugAssert(size == MemoryAccessSize::Word && !sign); DebugAssert(size == MemoryAccessSize::Word && !sign);
@ -1991,8 +1994,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, void CPU::Recompiler::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{ {
const u32 index = static_cast<u32>(inst->r.rt.GetValue()); const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, true); const auto [ptr, action] = GetGTERegisterPointer(index, true);
@ -2076,8 +2080,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, void CPU::Recompiler::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{ {
AssertRegOrConstS(cf); AssertRegOrConstS(cf);
AssertRegOrConstT(cf); AssertRegOrConstT(cf);
@ -2103,8 +2108,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, void CPU::Recompiler::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{ {
DebugAssert(size == MemoryAccessSize::Word && !sign); DebugAssert(size == MemoryAccessSize::Word && !sign);
@ -2177,8 +2183,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, void CPU::Recompiler::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign,
const std::optional<VirtualMemoryAddress>& address) bool use_fastmem,
const std::optional<VirtualMemoryAddress>& address)
{ {
const u32 index = static_cast<u32>(inst->r.rt.GetValue()); const u32 index = static_cast<u32>(inst->r.rt.GetValue());
const auto [ptr, action] = GetGTERegisterPointer(index, false); const auto [ptr, action] = GetGTERegisterPointer(index, false);
@ -2234,7 +2241,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_mtc0(CompileFlags cf)
{ {
// TODO: we need better constant setting here.. which will need backprop // TODO: we need better constant setting here.. which will need backprop
AssertRegOrConstT(cf); AssertRegOrConstT(cf);
@ -2314,7 +2321,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf)
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_rfe(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_rfe(CompileFlags cf)
{ {
// shift mode bits right two, preserving upper bits // shift mode bits right two, preserving upper bits
rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits)); rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));
@ -2327,7 +2334,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_rfe(CompileFlags cf)
TestInterrupts(RARG1); TestInterrupts(RARG1);
} }
void CPU::NewRec::RISCV64Compiler::TestInterrupts(const biscuit::GPR& sr) void CPU::Recompiler::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr)
{ {
DebugAssert(sr != RSCRATCH); DebugAssert(sr != RSCRATCH);
@ -2380,7 +2387,7 @@ void CPU::NewRec::RISCV64Compiler::TestInterrupts(const biscuit::GPR& sr)
rvAsm->Bind(&no_interrupt); rvAsm->Bind(&no_interrupt);
} }
void CPU::NewRec::RISCV64Compiler::Compile_mfc2(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_mfc2(CompileFlags cf)
{ {
const u32 index = inst->cop.Cop2Index(); const u32 index = inst->cop.Cop2Index();
const Reg rt = inst->r.rt; const Reg rt = inst->r.rt;
@ -2420,7 +2427,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_mfc2(CompileFlags cf)
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_mtc2(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_mtc2(CompileFlags cf)
{ {
const u32 index = inst->cop.Cop2Index(); const u32 index = inst->cop.Cop2Index();
const auto [ptr, action] = GetGTERegisterPointer(index, true); const auto [ptr, action] = GetGTERegisterPointer(index, true);
@ -2482,7 +2489,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_mtc2(CompileFlags cf)
} }
} }
void CPU::NewRec::RISCV64Compiler::Compile_cop2(CompileFlags cf) void CPU::Recompiler::RISCV64Recompiler::Compile_cop2(CompileFlags cf)
{ {
TickCount func_ticks; TickCount func_ticks;
GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks);
@ -2494,10 +2501,10 @@ void CPU::NewRec::RISCV64Compiler::Compile_cop2(CompileFlags cf)
AddGTETicks(func_ticks); AddGTETicks(func_ticks);
} }
u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size,
TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
bool is_load) bool is_load)
{ {
Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space); Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space);
Assembler* rvAsm = &arm_asm; Assembler* rvAsm = &arm_asm;

View File

@ -3,19 +3,19 @@
#pragma once #pragma once
#include "cpu_newrec_compiler.h" #include "cpu_recompiler.h"
#include <memory> #include <memory>
#ifdef CPU_ARCH_RISCV64 #ifdef CPU_ARCH_RISCV64
namespace CPU::NewRec { namespace CPU::Recompiler {
class RISCV64Compiler final : public Compiler class RISCV64Recompiler final : public Recompiler
{ {
public: public:
RISCV64Compiler(); RISCV64Recompiler();
~RISCV64Compiler() override; ~RISCV64Recompiler() override;
protected: protected:
const char* GetHostRegName(u32 reg) const override; const char* GetHostRegName(u32 reg) const override;
@ -74,7 +74,7 @@ protected:
void Compile_divu(CompileFlags cf) override; void Compile_divu(CompileFlags cf) override;
void TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, const biscuit::GPR& reg_to_discard); void TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, const biscuit::GPR& reg_to_discard);
void Compile_dst_op(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), void Compile_dst_op(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),
void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative,
bool overflow); bool overflow);
void Compile_add(CompileFlags cf) override; void Compile_add(CompileFlags cf) override;
@ -171,6 +171,6 @@ private:
biscuit::Assembler* rvAsm; biscuit::Assembler* rvAsm;
}; };
} // namespace CPU::NewRec } // namespace CPU::Recompiler
#endif // CPU_ARCH_RISCV64 #endif // CPU_ARCH_RISCV64

View File

@ -3,19 +3,19 @@
#pragma once #pragma once
#include "cpu_newrec_compiler.h" #include "cpu_recompiler.h"
#include <memory> #include <memory>
#ifdef CPU_ARCH_X64 #ifdef CPU_ARCH_X64
namespace CPU::NewRec { namespace CPU::Recompiler {
class X64Compiler final : public Compiler class X64Recompiler final : public Recompiler
{ {
public: public:
X64Compiler(); X64Recompiler();
~X64Compiler() override; ~X64Recompiler() override;
protected: protected:
const char* GetHostRegName(u32 reg) const override; const char* GetHostRegName(u32 reg) const override;
@ -141,6 +141,6 @@ private:
Xbyak::CodeGenerator* cg; Xbyak::CodeGenerator* cg;
}; };
} // namespace CPU::NewRec } // namespace CPU::Recompiler
#endif // CPU_ARCH_X64 #endif // CPU_ARCH_X64

View File

@ -430,11 +430,6 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float
text.append_format("{}{}", first ? "" : "/", "CI"); text.append_format("{}{}", first ? "" : "/", "CI");
first = false; first = false;
} }
else if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
{
text.append_format("{}{}", first ? "" : "/", "NR");
first = false;
}
else else
{ {
if (g_settings.cpu_recompiler_icache) if (g_settings.cpu_recompiler_icache)

View File

@ -1207,13 +1207,11 @@ static constexpr const std::array s_cpu_execution_mode_names = {
"Interpreter", "Interpreter",
"CachedInterpreter", "CachedInterpreter",
"Recompiler", "Recompiler",
"NewRec",
}; };
static constexpr const std::array s_cpu_execution_mode_display_names = { static constexpr const std::array s_cpu_execution_mode_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "Interpreter (Slowest)", "CPUExecutionMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Interpreter (Slowest)", "CPUExecutionMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Cached Interpreter (Faster)", "CPUExecutionMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Cached Interpreter (Faster)", "CPUExecutionMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Recompiler (Fastest)", "CPUExecutionMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Recompiler (Fastest)", "CPUExecutionMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "New Recompiler (Experimental)", "CPUExecutionMode"),
}; };
std::optional<CPUExecutionMode> Settings::ParseCPUExecutionMode(const char* str) std::optional<CPUExecutionMode> Settings::ParseCPUExecutionMode(const char* str)

View File

@ -486,11 +486,11 @@ struct Settings
static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f; static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f;
static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f; static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f;
// Prefer oldrec over newrec for now. Except on RISC-V, where there is no oldrec. // Prefer recompiler when supported.
#if defined(CPU_ARCH_RISCV64) #ifdef ENABLE_RECOMPILER
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec;
#else
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler; static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler;
#else
static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter;
#endif #endif
// LUT still ends up faster on Apple Silicon for now, because of 16K pages. // LUT still ends up faster on Apple Silicon for now, because of 16K pages.

View File

@ -48,7 +48,6 @@ enum class CPUExecutionMode : u8
Interpreter, Interpreter,
CachedInterpreter, CachedInterpreter,
Recompiler, Recompiler,
NewRec,
Count Count
}; };