diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f6cc9dcc8..9b54df8d7 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -127,20 +127,12 @@ add_library(core ) set(RECOMPILER_SRCS - cpu_recompiler_code_generator.cpp - cpu_recompiler_code_generator.h - cpu_recompiler_code_generator_generic.cpp - cpu_recompiler_register_cache.cpp - cpu_recompiler_register_cache.h + cpu_recompiler.cpp + cpu_recompiler.h cpu_recompiler_thunks.h cpu_recompiler_types.h ) -set(NEWREC_SOURCES - cpu_newrec_compiler.cpp - cpu_newrec_compiler.h -) - target_precompile_headers(core PRIVATE "pch.h") target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(core PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") @@ -148,11 +140,11 @@ target_link_libraries(core PUBLIC Threads::Threads common util) target_link_libraries(core PRIVATE xxhash imgui rapidyaml rcheevos cpuinfo::cpuinfo ZLIB::ZLIB Zstd::Zstd libzip::zip) if(CPU_ARCH_X64) - target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1") - target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES} - cpu_recompiler_code_generator_x64.cpp - cpu_newrec_compiler_x64.cpp - cpu_newrec_compiler_x64.h + target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1") + target_sources(core PRIVATE + ${RECOMPILER_SRCS} + cpu_recompiler_x64.cpp + cpu_recompiler_x64.h ) target_link_libraries(core PRIVATE xbyak) if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") @@ -161,33 +153,34 @@ if(CPU_ARCH_X64) message(STATUS "Building x64 recompiler.") endif() if(CPU_ARCH_ARM32) - target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1") - target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES} - cpu_recompiler_code_generator_aarch32.cpp - cpu_newrec_compiler_aarch32.cpp - cpu_newrec_compiler_aarch32.h + target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1") + target_sources(core PRIVATE + ${RECOMPILER_SRCS} + cpu_recompiler_arm32.cpp + cpu_recompiler_arm32.h ) target_link_libraries(core PUBLIC vixl) - message(STATUS "Building AArch32 recompiler.") + message(STATUS "Building ARM32 recompiler.") endif() if(CPU_ARCH_ARM64) - target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1") - target_sources(core PRIVATE ${RECOMPILER_SRCS} ${NEWREC_SOURCES} - cpu_recompiler_code_generator_aarch64.cpp - cpu_newrec_compiler_aarch64.cpp - cpu_newrec_compiler_aarch64.h + target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1") + target_sources(core PRIVATE + ${RECOMPILER_SRCS} + cpu_recompiler_arm64.cpp + cpu_recompiler_arm64.h ) target_link_libraries(core PUBLIC vixl) - message(STATUS "Building AArch64 recompiler.") + message(STATUS "Building ARM64 recompiler.") endif() if(CPU_ARCH_RISCV64) - target_compile_definitions(core PUBLIC "ENABLE_NEWREC=1" "ENABLE_MMAP_FASTMEM=1") - target_sources(core PRIVATE ${NEWREC_SOURCES} - cpu_newrec_compiler_riscv64.cpp - cpu_newrec_compiler_riscv64.h + target_compile_definitions(core PUBLIC "ENABLE_RECOMPILER=1" "ENABLE_MMAP_FASTMEM=1") + target_sources(core PRIVATE + ${RECOMPILER_SRCS} + cpu_recompiler_riscv64.cpp + cpu_recompiler_riscv64.h ) target_link_libraries(core PUBLIC biscuit::biscuit riscv-disas) - message(STATUS "Building RISC-V 64-bit recompiler.") + message(STATUS "Building RISC-V-64 recompiler.") endif() # Copy the provided data directory to the output directory. Borrowed from PCSX2. diff --git a/src/core/core.props b/src/core/core.props index f841d9719..68bafc777 100644 --- a/src/core/core.props +++ b/src/core/core.props @@ -7,7 +7,6 @@ ENABLE_RAINTEGRATION=1;%(PreprocessorDefinitions) ENABLE_RECOMPILER=1;%(PreprocessorDefinitions) ENABLE_MMAP_FASTMEM=1;%(PreprocessorDefinitions) - ENABLE_NEWREC=1;%(PreprocessorDefinitions) %(AdditionalIncludeDirectories);$(SolutionDir)dep\zlib\include;$(SolutionDir)dep\rcheevos\include diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index f3798eb86..7c4a168b9 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -14,31 +14,19 @@ - - + + true - + true - + true - + true - - - true - - - true - - - - true - - @@ -105,21 +93,19 @@ - - + + true - + true - + true - + true - - diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 5a8d278a2..0e497ecc8 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -23,12 +23,7 @@ - - - - - @@ -43,7 +38,6 @@ - @@ -58,11 +52,11 @@ - - - - - + + + + + @@ -98,9 +92,7 @@ - - @@ -134,11 +126,11 @@ - - - - - + + + + + diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 1bdd6aef6..5dec2de6e 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -30,11 +30,7 @@ LOG_CHANNEL(CodeCache); // #define ENABLE_RECOMPILER_PROFILING 1 #ifdef ENABLE_RECOMPILER -#include "cpu_recompiler_code_generator.h" -#endif - -#ifdef ENABLE_NEWREC -#include "cpu_newrec_compiler.h" +#include "cpu_recompiler.h" #endif #include @@ -165,15 +161,14 @@ static u32 s_total_host_instructions_emitted = 0; #endif } // namespace CPU::CodeCache -bool CPU::CodeCache::IsUsingAnyRecompiler() +bool CPU::CodeCache::IsUsingRecompiler() { - return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler || - g_settings.cpu_execution_mode == CPUExecutionMode::NewRec); + return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler); } bool CPU::CodeCache::IsUsingFastmem() { - return g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled; + return (g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled); } bool CPU::CodeCache::ProcessStartup(Error* error) @@ -217,7 +212,7 @@ void CPU::CodeCache::Reset() { ClearBlocks(); - if (IsUsingAnyRecompiler()) + if (IsUsingRecompiler()) { ResetCodeBuffer(); CompileASMFunctions(); @@ -232,7 +227,7 @@ void CPU::CodeCache::Shutdown() void CPU::CodeCache::Execute() { - if (IsUsingAnyRecompiler()) + if (IsUsingRecompiler()) { g_enter_recompiler(); UnreachableCode(); @@ -500,9 +495,8 @@ CPU::CodeCache::Block* CPU::CodeCache::CreateBlock(u32 pc, const BlockInstructio return block; } - // Old rec doesn't use backprop info, don't waste time filling it. - if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) - FillBlockRegInfo(block); + // populate backpropogation information for liveness queries + FillBlockRegInfo(block); // add it to the tracking list for its page AddBlockToPageList(block); @@ -1316,7 +1310,7 @@ void CPU::CodeCache::FillBlockRegInfo(Block* block) void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc) { // TODO: this doesn't currently handle when the cache overflows... - DebugAssert(IsUsingAnyRecompiler()); + DebugAssert(IsUsingRecompiler()); MemMap::BeginCodeWrite(); Block* block = LookupBlock(start_pc); @@ -1450,11 +1444,9 @@ void CPU::CodeCache::ResetCodeBuffer() s_code_size = RECOMPILER_CODE_CACHE_SIZE - RECOMPILER_FAR_CODE_CACHE_SIZE; s_code_used = 0; - // Use half the far code size when using newrec and memory exceptions aren't enabled. It's only used for backpatching. - const u32 far_code_size = - (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec && !g_settings.cpu_recompiler_memory_exceptions) ? - (RECOMPILER_FAR_CODE_CACHE_SIZE / 2) : - RECOMPILER_FAR_CODE_CACHE_SIZE; + // Use half the far code size when memory exceptions aren't enabled. It's only used for backpatching. + const u32 far_code_size = (!g_settings.cpu_recompiler_memory_exceptions) ? (RECOMPILER_FAR_CODE_CACHE_SIZE / 2) : + RECOMPILER_FAR_CODE_CACHE_SIZE; s_far_code_size = far_code_size; s_far_code_ptr = (far_code_size > 0) ? (static_cast(s_code_ptr) + s_code_size) : nullptr; s_free_far_code_ptr = s_far_code_ptr; @@ -1572,14 +1564,7 @@ bool CPU::CodeCache::CompileBlock(Block* block) #ifdef ENABLE_RECOMPILER if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) - { - Recompiler::CodeGenerator codegen; - host_code = codegen.CompileBlock(block, &host_code_size, &host_far_code_size); - } -#endif -#ifdef ENABLE_NEWREC - if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) - host_code = NewRec::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size); + host_code = Recompiler::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size); #endif block->host_code = host_code; @@ -1715,20 +1700,17 @@ PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exc BackpatchLoadStore(exception_pc, info); // queue block for recompilation later - if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) + Block* block = LookupBlock(info.guest_block); + if (block) { - Block* block = LookupBlock(info.guest_block); - if (block) - { - // This is a bit annoying, we have to remove it from the page list if it's a RAM block. - DEV_LOG("Queuing block {:08X} for recompilation due to backpatch", block->pc); - RemoveBlockFromPageList(block); - InvalidateBlock(block, BlockState::NeedsRecompile); + // This is a bit annoying, we have to remove it from the page list if it's a RAM block. + DEV_LOG("Queuing block {:08X} for recompilation due to backpatch", block->pc); + RemoveBlockFromPageList(block); + InvalidateBlock(block, BlockState::NeedsRecompile); - // Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback. - block->compile_frame = System::GetFrameNumber(); - block->compile_count = 1; - } + // Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback. + block->compile_frame = System::GetFrameNumber(); + block->compile_count = 1; } MemMap::EndCodeWrite(); @@ -1748,11 +1730,7 @@ void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchI { #ifdef ENABLE_RECOMPILER if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) - Recompiler::CodeGenerator::BackpatchLoadStore(host_pc, info); -#endif -#ifdef ENABLE_NEWREC - if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) - NewRec::BackpatchLoadStore(host_pc, info); + Recompiler::BackpatchLoadStore(host_pc, info); #endif } diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h index 6ed9e7d1b..5dc82b736 100644 --- a/src/core/cpu_code_cache.h +++ b/src/core/cpu_code_cache.h @@ -11,7 +11,7 @@ class Error; namespace CPU::CodeCache { /// Returns true if any recompiler is in use. -bool IsUsingAnyRecompiler(); +bool IsUsingRecompiler(); /// Returns true if any recompiler and fastmem is in use. bool IsUsingFastmem(); diff --git a/src/core/cpu_newrec_compiler.cpp b/src/core/cpu_recompiler.cpp similarity index 78% rename from src/core/cpu_newrec_compiler.cpp rename to src/core/cpu_recompiler.cpp index 887cb49ab..597d7e293 100644 --- a/src/core/cpu_newrec_compiler.cpp +++ b/src/core/cpu_recompiler.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 -#include "cpu_newrec_compiler.h" +#include "cpu_recompiler.h" #include "cpu_code_cache.h" #include "cpu_core_private.h" #include "cpu_disasm.h" @@ -21,20 +21,20 @@ LOG_CHANNEL(Recompiler); // TODO: speculative constants // TODO: std::bitset in msvc has bounds checks even in release... -const std::array, 3> CPU::NewRec::Compiler::s_pgxp_mem_load_functions = { +const std::array, 3> CPU::Recompiler::Recompiler::s_pgxp_mem_load_functions = { {{{reinterpret_cast(&PGXP::CPU_LBx), reinterpret_cast(&PGXP::CPU_LBx)}}, {{reinterpret_cast(&PGXP::CPU_LHU), reinterpret_cast(&PGXP::CPU_LH)}}, {{reinterpret_cast(&PGXP::CPU_LW)}}}}; -const std::array CPU::NewRec::Compiler::s_pgxp_mem_store_functions = { +const std::array CPU::Recompiler::Recompiler::s_pgxp_mem_store_functions = { {reinterpret_cast(&PGXP::CPU_SB), reinterpret_cast(&PGXP::CPU_SH), reinterpret_cast(&PGXP::CPU_SW)}}; -CPU::NewRec::Compiler::Compiler() = default; +CPU::Recompiler::Recompiler::Recompiler() = default; -CPU::NewRec::Compiler::~Compiler() = default; +CPU::Recompiler::Recompiler::~Recompiler() = default; -void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, - u32 far_code_space) +void CPU::Recompiler::Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, + u8* far_code_buffer, u32 far_code_space) { m_block = block; m_compiler_pc = block->pc; @@ -66,7 +66,7 @@ void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 InitSpeculativeRegs(); } -void CPU::NewRec::Compiler::BeginBlock() +void CPU::Recompiler::Recompiler::BeginBlock() { #if 0 GenerateCall(reinterpret_cast(&CPU::CodeCache::LogCurrentState)); @@ -100,7 +100,8 @@ void CPU::NewRec::Compiler::BeginBlock() m_dirty_instruction_bits = true; } -const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size) +const void* CPU::Recompiler::Recompiler::CompileBlock(CodeCache::Block* block, u32* host_code_size, + u32* host_far_code_size) { Reset(block, CPU::CodeCache::GetFreeCodePointer(), CPU::CodeCache::GetFreeCodeSpace(), CPU::CodeCache::GetFreeFarCodePointer(), CPU::CodeCache::GetFreeFarCodeSpace()); @@ -149,7 +150,7 @@ const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* ho return code; } -void CPU::NewRec::Compiler::SetConstantReg(Reg r, u32 v) +void CPU::Recompiler::Recompiler::SetConstantReg(Reg r, u32 v) { DebugAssert(r < Reg::count && r != Reg::zero); @@ -175,7 +176,7 @@ void CPU::NewRec::Compiler::SetConstantReg(Reg r, u32 v) } } -void CPU::NewRec::Compiler::CancelLoadDelaysToReg(Reg reg) +void CPU::Recompiler::Recompiler::CancelLoadDelaysToReg(Reg reg) { if (m_load_delay_register != reg) return; @@ -186,7 +187,7 @@ void CPU::NewRec::Compiler::CancelLoadDelaysToReg(Reg reg) ClearHostReg(m_load_delay_value_register); } -void CPU::NewRec::Compiler::UpdateLoadDelay() +void CPU::Recompiler::Recompiler::UpdateLoadDelay() { if (m_load_delay_dirty) { @@ -250,7 +251,7 @@ void CPU::NewRec::Compiler::UpdateLoadDelay() } } -void CPU::NewRec::Compiler::FinishLoadDelay() +void CPU::Recompiler::Recompiler::FinishLoadDelay() { DebugAssert(!m_load_delay_dirty); if (!HasLoadDelay()) @@ -284,7 +285,7 @@ void CPU::NewRec::Compiler::FinishLoadDelay() m_load_delay_value_register = NUM_HOST_REGS; } -void CPU::NewRec::Compiler::FinishLoadDelayToReg(Reg reg) +void CPU::Recompiler::Recompiler::FinishLoadDelayToReg(Reg reg) { if (m_load_delay_dirty) { @@ -299,12 +300,12 @@ void CPU::NewRec::Compiler::FinishLoadDelayToReg(Reg reg) FinishLoadDelay(); } -u32 CPU::NewRec::Compiler::GetFlagsForNewLoadDelayedReg() const +u32 CPU::Recompiler::Recompiler::GetFlagsForNewLoadDelayedReg() const { return g_settings.gpu_pgxp_enable ? (HR_MODE_WRITE | HR_CALLEE_SAVED) : (HR_MODE_WRITE); } -void CPU::NewRec::Compiler::ClearConstantReg(Reg r) +void CPU::Recompiler::Recompiler::ClearConstantReg(Reg r) { DebugAssert(r < Reg::count && r != Reg::zero); m_constant_reg_values[static_cast(r)] = 0; @@ -312,7 +313,7 @@ void CPU::NewRec::Compiler::ClearConstantReg(Reg r) m_constant_regs_dirty.reset(static_cast(r)); } -void CPU::NewRec::Compiler::FlushConstantRegs(bool invalidate) +void CPU::Recompiler::Recompiler::FlushConstantRegs(bool invalidate) { for (u32 i = 1; i < static_cast(Reg::count); i++) { @@ -323,25 +324,25 @@ void CPU::NewRec::Compiler::FlushConstantRegs(bool invalidate) } } -CPU::Reg CPU::NewRec::Compiler::MipsD() const +CPU::Reg CPU::Recompiler::Recompiler::MipsD() const { return inst->r.rd; } -u32 CPU::NewRec::Compiler::GetConditionalBranchTarget(CompileFlags cf) const +u32 CPU::Recompiler::Recompiler::GetConditionalBranchTarget(CompileFlags cf) const { // compiler pc has already been advanced when swapping branch delay slots const u32 current_pc = m_compiler_pc - (cf.delay_slot_swapped ? sizeof(Instruction) : 0); return current_pc + (inst->i.imm_sext32() << 2); } -u32 CPU::NewRec::Compiler::GetBranchReturnAddress(CompileFlags cf) const +u32 CPU::Recompiler::Recompiler::GetBranchReturnAddress(CompileFlags cf) const { // compiler pc has already been advanced when swapping branch delay slots return m_compiler_pc + (cf.delay_slot_swapped ? 0 : sizeof(Instruction)); } -bool CPU::NewRec::Compiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd) +bool CPU::Recompiler::Recompiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd) { if constexpr (!SWAP_BRANCH_DELAY_SLOTS) return false; @@ -510,13 +511,13 @@ is_unsafe: return false; } -void CPU::NewRec::Compiler::SetCompilerPC(u32 newpc) +void CPU::Recompiler::Recompiler::SetCompilerPC(u32 newpc) { m_compiler_pc = newpc; m_dirty_pc = true; } -u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags) +u32 CPU::Recompiler::Recompiler::GetFreeHostReg(u32 flags) { const u32 req_flags = HR_USABLE | (flags & HR_CALLEE_SAVED); @@ -637,7 +638,7 @@ u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags) return lowest; } -const char* CPU::NewRec::Compiler::GetReadWriteModeString(u32 flags) +const char* CPU::Recompiler::Recompiler::GetReadWriteModeString(u32 flags) { if ((flags & (HR_MODE_READ | HR_MODE_WRITE)) == (HR_MODE_READ | HR_MODE_WRITE)) return "read-write"; @@ -649,8 +650,8 @@ const char* CPU::NewRec::Compiler::GetReadWriteModeString(u32 flags) return "UNKNOWN"; } -u32 CPU::NewRec::Compiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, - Reg reg /* = Reg::count */) +u32 CPU::Recompiler::Recompiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, + Reg reg /* = Reg::count */) { // Cancel any load delays before booting anything out if (flags & HR_MODE_WRITE && (type == HR_TYPE_CPU_REG || type == HR_TYPE_NEXT_LOAD_DELAY_VALUE)) @@ -751,8 +752,8 @@ u32 CPU::NewRec::Compiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = return hreg; } -std::optional CPU::NewRec::Compiler::CheckHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, - Reg reg /* = Reg::count */) +std::optional CPU::Recompiler::Recompiler::CheckHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, + Reg reg /* = Reg::count */) { for (u32 i = 0; i < NUM_HOST_REGS; i++) { @@ -799,12 +800,12 @@ std::optional CPU::NewRec::Compiler::CheckHostReg(u32 flags, HostRegAllocTy return std::nullopt; } -u32 CPU::NewRec::Compiler::AllocateTempHostReg(u32 flags) +u32 CPU::Recompiler::Recompiler::AllocateTempHostReg(u32 flags) { return AllocateHostReg(flags, HR_TYPE_TEMP); } -void CPU::NewRec::Compiler::SwapHostRegAlloc(u32 lhs, u32 rhs) +void CPU::Recompiler::Recompiler::SwapHostRegAlloc(u32 lhs, u32 rhs) { HostRegAlloc& lra = m_host_regs[lhs]; HostRegAlloc& rra = m_host_regs[rhs]; @@ -817,7 +818,7 @@ void CPU::NewRec::Compiler::SwapHostRegAlloc(u32 lhs, u32 rhs) std::swap(lra.counter, rra.counter); } -void CPU::NewRec::Compiler::FlushHostReg(u32 reg) +void CPU::Recompiler::Recompiler::FlushHostReg(u32 reg) { HostRegAlloc& ra = m_host_regs[reg]; if (ra.flags & HR_MODE_WRITE) @@ -862,7 +863,7 @@ void CPU::NewRec::Compiler::FlushHostReg(u32 reg) } } -void CPU::NewRec::Compiler::FreeHostReg(u32 reg) +void CPU::Recompiler::Recompiler::FreeHostReg(u32 reg) { DebugAssert(IsHostRegAllocated(reg)); DEBUG_LOG("Freeing host register {}", GetHostRegName(reg)); @@ -870,7 +871,7 @@ void CPU::NewRec::Compiler::FreeHostReg(u32 reg) ClearHostReg(reg); } -void CPU::NewRec::Compiler::ClearHostReg(u32 reg) +void CPU::Recompiler::Recompiler::ClearHostReg(u32 reg) { HostRegAlloc& ra = m_host_regs[reg]; ra.flags &= IMMUTABLE_HR_FLAGS; @@ -879,7 +880,7 @@ void CPU::NewRec::Compiler::ClearHostReg(u32 reg) ra.reg = Reg::count; } -void CPU::NewRec::Compiler::MarkRegsNeeded(HostRegAllocType type, Reg reg) +void CPU::Recompiler::Recompiler::MarkRegsNeeded(HostRegAllocType type, Reg reg) { for (u32 i = 0; i < NUM_HOST_REGS; i++) { @@ -889,7 +890,7 @@ void CPU::NewRec::Compiler::MarkRegsNeeded(HostRegAllocType type, Reg reg) } } -void CPU::NewRec::Compiler::RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg) +void CPU::Recompiler::Recompiler::RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg) { // only supported for cpu regs for now DebugAssert(new_type == HR_TYPE_TEMP || new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE); @@ -928,7 +929,7 @@ void CPU::NewRec::Compiler::RenameHostReg(u32 reg, u32 new_flags, HostRegAllocTy ra.reg = new_reg; } -void CPU::NewRec::Compiler::ClearHostRegNeeded(u32 reg) +void CPU::Recompiler::Recompiler::ClearHostRegNeeded(u32 reg) { DebugAssert(reg < NUM_HOST_REGS && IsHostRegAllocated(reg)); HostRegAlloc& ra = m_host_regs[reg]; @@ -938,7 +939,7 @@ void CPU::NewRec::Compiler::ClearHostRegNeeded(u32 reg) ra.flags &= ~HR_NEEDED; } -void CPU::NewRec::Compiler::ClearHostRegsNeeded() +void CPU::Recompiler::Recompiler::ClearHostRegsNeeded() { for (u32 i = 0; i < NUM_HOST_REGS; i++) { @@ -956,7 +957,7 @@ void CPU::NewRec::Compiler::ClearHostRegsNeeded() } } -void CPU::NewRec::Compiler::DeleteMIPSReg(Reg reg, bool flush) +void CPU::Recompiler::Recompiler::DeleteMIPSReg(Reg reg, bool flush) { DebugAssert(reg != Reg::zero); @@ -978,7 +979,7 @@ void CPU::NewRec::Compiler::DeleteMIPSReg(Reg reg, bool flush) ClearConstantReg(reg); } -bool CPU::NewRec::Compiler::TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other) +bool CPU::Recompiler::Recompiler::TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other) { // can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt if (to == from || to == other || !iinfo->RenameTest(from)) @@ -999,7 +1000,7 @@ bool CPU::NewRec::Compiler::TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg return true; } -void CPU::NewRec::Compiler::UpdateHostRegCounters() +void CPU::Recompiler::Recompiler::UpdateHostRegCounters() { const CodeCache::InstructionInfo* const info_end = m_block->InstructionsInfo() + m_block->size; @@ -1037,7 +1038,7 @@ void CPU::NewRec::Compiler::UpdateHostRegCounters() } } -void CPU::NewRec::Compiler::Flush(u32 flags) +void CPU::Recompiler::Recompiler::Flush(u32 flags) { // TODO: Flush unneeded caller-saved regs (backup/replace calle-saved needed with caller-saved) if (flags & @@ -1088,7 +1089,7 @@ void CPU::NewRec::Compiler::Flush(u32 flags) InvalidateSpeculativeValues(); } -void CPU::NewRec::Compiler::FlushConstantReg(Reg r) +void CPU::Recompiler::Recompiler::FlushConstantReg(Reg r) { DebugAssert(m_constant_regs_valid.test(static_cast(r))); DEBUG_LOG("Writing back register {} with constant value 0x{:08X}", GetRegName(r), @@ -1097,7 +1098,7 @@ void CPU::NewRec::Compiler::FlushConstantReg(Reg r) m_constant_regs_dirty.reset(static_cast(r)); } -void CPU::NewRec::Compiler::BackupHostState() +void CPU::Recompiler::Recompiler::BackupHostState() { DebugAssert(m_host_state_backup_count < m_host_state_backup.size()); @@ -1127,7 +1128,7 @@ void CPU::NewRec::Compiler::BackupHostState() m_host_state_backup_count++; } -void CPU::NewRec::Compiler::RestoreHostState() +void CPU::Recompiler::Recompiler::RestoreHostState() { DebugAssert(m_host_state_backup_count > 0); m_host_state_backup_count--; @@ -1156,8 +1157,8 @@ void CPU::NewRec::Compiler::RestoreHostState() m_cycles = bu.cycles; } -void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, u32 data_register, - MemoryAccessSize size, bool is_signed, bool is_load) +void CPU::Recompiler::Recompiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, + u32 data_register, MemoryAccessSize size, bool is_signed, bool is_load) { DebugAssert(CodeCache::IsUsingFastmem()); DebugAssert(address_register < NUM_HOST_REGS); @@ -1175,7 +1176,7 @@ void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size, is_signed, is_load); } -void CPU::NewRec::Compiler::CompileInstruction() +void CPU::Recompiler::Recompiler::CompileInstruction() { #ifdef _DEBUG TinyString str; @@ -1203,34 +1204,34 @@ void CPU::NewRec::Compiler::CompileInstruction() { switch (inst->r.funct) { - case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break; - case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break; - case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break; - case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break; - case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break; - case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break; - case InstructionFunct::jr: CompileTemplate(&Compiler::Compile_jr_const, &Compiler::Compile_jr, nullptr, TF_READS_S); break; - case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break; + case InstructionFunct::sll: CompileTemplate(&Recompiler::Compile_sll_const, &Recompiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break; + case InstructionFunct::srl: CompileTemplate(&Recompiler::Compile_srl_const, &Recompiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break; + case InstructionFunct::sra: CompileTemplate(&Recompiler::Compile_sra_const, &Recompiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break; + case InstructionFunct::sllv: CompileTemplate(&Recompiler::Compile_sllv_const, &Recompiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break; + case InstructionFunct::srlv: CompileTemplate(&Recompiler::Compile_srlv_const, &Recompiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break; + case InstructionFunct::srav: CompileTemplate(&Recompiler::Compile_srav_const, &Recompiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break; + case InstructionFunct::jr: CompileTemplate(&Recompiler::Compile_jr_const, &Recompiler::Compile_jr, nullptr, TF_READS_S); break; + case InstructionFunct::jalr: CompileTemplate(&Recompiler::Compile_jalr_const, &Recompiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break; case InstructionFunct::syscall: Compile_syscall(); break; case InstructionFunct::break_: Compile_break(); break; case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break; case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break; case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break; case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break; - case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break; - case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break; - case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break; - case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break; - case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break; - case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break; - case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break; - case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break; - case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break; - case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break; - case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break; - case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break; - case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break; - case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break; + case InstructionFunct::mult: CompileTemplate(&Recompiler::Compile_mult_const, &Recompiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break; + case InstructionFunct::multu: CompileTemplate(&Recompiler::Compile_multu_const, &Recompiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break; + case InstructionFunct::div: CompileTemplate(&Recompiler::Compile_div_const, &Recompiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break; + case InstructionFunct::divu: CompileTemplate(&Recompiler::Compile_divu_const, &Recompiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break; + case InstructionFunct::add: CompileTemplate(&Recompiler::Compile_add_const, &Recompiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break; + case InstructionFunct::addu: CompileTemplate(&Recompiler::Compile_addu_const, &Recompiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break; + case InstructionFunct::sub: CompileTemplate(&Recompiler::Compile_sub_const, &Recompiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break; + case InstructionFunct::subu: CompileTemplate(&Recompiler::Compile_subu_const, &Recompiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break; + case InstructionFunct::and_: CompileTemplate(&Recompiler::Compile_and_const, &Recompiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break; + case InstructionFunct::or_: CompileTemplate(&Recompiler::Compile_or_const, &Recompiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break; + case InstructionFunct::xor_: CompileTemplate(&Recompiler::Compile_xor_const, &Recompiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break; + case InstructionFunct::nor: CompileTemplate(&Recompiler::Compile_nor_const, &Recompiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break; + case InstructionFunct::slt: CompileTemplate(&Recompiler::Compile_slt_const, &Recompiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break; + case InstructionFunct::sltu: CompileTemplate(&Recompiler::Compile_sltu_const, &Recompiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break; default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break; } } @@ -1239,33 +1240,33 @@ void CPU::NewRec::Compiler::CompileInstruction() case InstructionOp::j: Compile_j(); break; case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break; - case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break; - case InstructionOp::blez: CompileTemplate(&Compiler::Compile_blez_const, &Compiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; - case InstructionOp::bgtz: CompileTemplate(&Compiler::Compile_bgtz_const, &Compiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; - case InstructionOp::beq: CompileTemplate(&Compiler::Compile_beq_const, &Compiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; - case InstructionOp::bne: CompileTemplate(&Compiler::Compile_bne_const, &Compiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; + case InstructionOp::b: CompileTemplate(&Recompiler::Compile_b_const, &Recompiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break; + case InstructionOp::blez: CompileTemplate(&Recompiler::Compile_blez_const, &Recompiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; + case InstructionOp::bgtz: CompileTemplate(&Recompiler::Compile_bgtz_const, &Recompiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; + case InstructionOp::beq: CompileTemplate(&Recompiler::Compile_beq_const, &Recompiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; + case InstructionOp::bne: CompileTemplate(&Recompiler::Compile_bne_const, &Recompiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; - case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break; - case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break; - case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break; - case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break; - case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break; - case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break; - case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break; + case InstructionOp::addi: CompileTemplate(&Recompiler::Compile_addi_const, &Recompiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break; + case InstructionOp::addiu: CompileTemplate(&Recompiler::Compile_addiu_const, &Recompiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break; + case InstructionOp::slti: CompileTemplate(&Recompiler::Compile_slti_const, &Recompiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break; + case InstructionOp::sltiu: CompileTemplate(&Recompiler::Compile_sltiu_const, &Recompiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break; + case InstructionOp::andi: CompileTemplate(&Recompiler::Compile_andi_const, &Recompiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break; + case InstructionOp::ori: CompileTemplate(&Recompiler::Compile_ori_const, &Recompiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break; + case InstructionOp::xori: CompileTemplate(&Recompiler::Compile_xori_const, &Recompiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break; case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break; - case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break; - case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break; - case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break; - case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break; - case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break; - case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break; - case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break; - case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break; - case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break; - case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break; - case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(false); break; - case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(true); break; + case InstructionOp::lb: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break; + case InstructionOp::lbu: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break; + case InstructionOp::lh: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break; + case InstructionOp::lhu: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break; + case InstructionOp::lw: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break; + case InstructionOp::lwl: CompileLoadStoreTemplate(&Recompiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break; + case InstructionOp::lwr: CompileLoadStoreTemplate(&Recompiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break; + case InstructionOp::sb: CompileLoadStoreTemplate(&Recompiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break; + case InstructionOp::sh: CompileLoadStoreTemplate(&Recompiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break; + case InstructionOp::sw: CompileLoadStoreTemplate(&Recompiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break; + case InstructionOp::swl: CompileLoadStoreTemplate(&Recompiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(false); break; + case InstructionOp::swr: CompileLoadStoreTemplate(&Recompiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(true); break; case InstructionOp::cop0: { @@ -1273,8 +1274,8 @@ void CPU::NewRec::Compiler::CompileInstruction() { switch (inst->cop.CommonOp()) { - case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, PGXPFN(CPU_MFC0), TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break; - case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break; + case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc0, PGXPFN(CPU_MFC0), TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break; + case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break; default: Compile_Fallback(); break; } } @@ -1282,7 +1283,7 @@ void CPU::NewRec::Compiler::CompileInstruction() { switch (inst->cop.Cop0Op()) { - case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break; + case Cop0Instruction::rfe: CompileTemplate(nullptr, &Recompiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break; default: Compile_Fallback(); break; } } @@ -1295,23 +1296,23 @@ void CPU::NewRec::Compiler::CompileInstruction() { switch (inst->cop.CommonOp()) { - case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; - case CopCommonInstruction::cfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; - case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; - case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; + case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; + case CopCommonInstruction::cfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; + case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; + case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; default: Compile_Fallback(); break; } } else { // GTE ops - CompileTemplate(nullptr, &Compiler::Compile_cop2, nullptr, TF_GTE_STALL); + CompileTemplate(nullptr, &Recompiler::Compile_cop2, nullptr, TF_GTE_STALL); } } break; - case InstructionOp::lwc2: CompileLoadStoreTemplate(&Compiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break; - case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break; + case InstructionOp::lwc2: CompileLoadStoreTemplate(&Recompiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break; + case InstructionOp::swc2: CompileLoadStoreTemplate(&Recompiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break; // swc0/lwc0/cop1/cop3 are essentially no-ops case InstructionOp::cop1: @@ -1343,7 +1344,7 @@ void CPU::NewRec::Compiler::CompileInstruction() #endif } -void CPU::NewRec::Compiler::CompileBranchDelaySlot(bool dirty_pc /* = true */) +void CPU::Recompiler::Recompiler::CompileBranchDelaySlot(bool dirty_pc /* = true */) { // Update load delay at the end of the previous instruction. UpdateLoadDelay(); @@ -1365,8 +1366,9 @@ void CPU::NewRec::Compiler::CompileBranchDelaySlot(bool dirty_pc /* = true */) m_current_instruction_branch_delay_slot = false; } -void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(CompileFlags), - void (Compiler::*func)(CompileFlags), const void* pgxp_cpu_func, u32 tflags) +void CPU::Recompiler::Recompiler::CompileTemplate(void (Recompiler::*const_func)(CompileFlags), + void (Recompiler::*func)(CompileFlags), const void* pgxp_cpu_func, + u32 tflags) { // TODO: This is where we will do memory operand optimization. Remember to kill constants! // TODO: Swap S and T if commutative @@ -1600,9 +1602,9 @@ void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(Compile } } -void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, - const std::optional&), - MemoryAccessSize size, bool store, bool sign, u32 tflags) +void CPU::Recompiler::Recompiler::CompileLoadStoreTemplate( + void (Recompiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, const std::optional&), + MemoryAccessSize size, bool store, bool sign, u32 tflags) { const Reg rs = inst->i.rs; const Reg rt = inst->i.rt; @@ -1716,13 +1718,13 @@ void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(Comp } } -void CPU::NewRec::Compiler::TruncateBlock() +void CPU::Recompiler::Recompiler::TruncateBlock() { m_block->size = ((m_current_instruction_pc - m_block->pc) / sizeof(Instruction)) + 1; iinfo->is_last_instruction = true; } -const TickCount* CPU::NewRec::Compiler::GetFetchMemoryAccessTimePtr() const +const TickCount* CPU::Recompiler::Recompiler::GetFetchMemoryAccessTimePtr() const { const TickCount* ptr = Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word); @@ -1730,8 +1732,8 @@ const TickCount* CPU::NewRec::Compiler::GetFetchMemoryAccessTimePtr() const return ptr; } -void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional& address, bool store, - bool use_fastmem) +void CPU::Recompiler::Recompiler::FlushForLoadStore(const std::optional& address, bool store, + bool use_fastmem) { if (use_fastmem) return; @@ -1740,7 +1742,7 @@ void CPU::NewRec::Compiler::FlushForLoadStore(const std::optionalj.target << 2); @@ -1780,7 +1782,7 @@ void CPU::NewRec::Compiler::Compile_j() EndBlock(newpc, true); } -void CPU::NewRec::Compiler::Compile_jr_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_jr_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); const u32 newpc = GetConstantRegU32(cf.MipsS()); @@ -1794,7 +1796,7 @@ void CPU::NewRec::Compiler::Compile_jr_const(CompileFlags cf) EndBlock(newpc, true); } -void CPU::NewRec::Compiler::Compile_jal() +void CPU::Recompiler::Recompiler::Compile_jal() { const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2); SetConstantReg(Reg::ra, GetBranchReturnAddress({})); @@ -1802,7 +1804,7 @@ void CPU::NewRec::Compiler::Compile_jal() EndBlock(newpc, true); } -void CPU::NewRec::Compiler::Compile_jalr_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_jalr_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); const u32 newpc = GetConstantRegU32(cf.MipsS()); @@ -1813,17 +1815,17 @@ void CPU::NewRec::Compiler::Compile_jalr_const(CompileFlags cf) EndBlock(newpc, true); } -void CPU::NewRec::Compiler::Compile_syscall() +void CPU::Recompiler::Recompiler::Compile_syscall() { EndBlockWithException(Exception::Syscall); } -void CPU::NewRec::Compiler::Compile_break() +void CPU::Recompiler::Recompiler::Compile_break() { EndBlockWithException(Exception::BP); } -void CPU::NewRec::Compiler::Compile_b_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_b_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); @@ -1842,7 +1844,7 @@ void CPU::NewRec::Compiler::Compile_b_const(CompileFlags cf) EndBlock(taken ? taken_pc : m_compiler_pc, true); } -void CPU::NewRec::Compiler::Compile_b(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_b(CompileFlags cf) { const u8 irt = static_cast(inst->i.rt.GetValue()); const bool bgez = ConvertToBoolUnchecked(irt & u8(1)); @@ -1854,47 +1856,47 @@ void CPU::NewRec::Compiler::Compile_b(CompileFlags cf) Compile_bxx(cf, bgez ? BranchCondition::GreaterEqualZero : BranchCondition::LessThanZero); } -void CPU::NewRec::Compiler::Compile_blez(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_blez(CompileFlags cf) { Compile_bxx(cf, BranchCondition::LessEqualZero); } -void CPU::NewRec::Compiler::Compile_blez_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_blez_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::LessEqualZero); } -void CPU::NewRec::Compiler::Compile_bgtz(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_bgtz(CompileFlags cf) { Compile_bxx(cf, BranchCondition::GreaterThanZero); } -void CPU::NewRec::Compiler::Compile_bgtz_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_bgtz_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::GreaterThanZero); } -void CPU::NewRec::Compiler::Compile_beq(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_beq(CompileFlags cf) { Compile_bxx(cf, BranchCondition::Equal); } -void CPU::NewRec::Compiler::Compile_beq_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_beq_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::Equal); } -void CPU::NewRec::Compiler::Compile_bne(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_bne(CompileFlags cf) { Compile_bxx(cf, BranchCondition::NotEqual); } -void CPU::NewRec::Compiler::Compile_bne_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_bne_const(CompileFlags cf) { Compile_bxx_const(cf, BranchCondition::NotEqual); } -void CPU::NewRec::Compiler::Compile_bxx_const(CompileFlags cf, BranchCondition cond) +void CPU::Recompiler::Recompiler::Compile_bxx_const(CompileFlags cf, BranchCondition cond) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); @@ -1935,79 +1937,79 @@ void CPU::NewRec::Compiler::Compile_bxx_const(CompileFlags cf, BranchCondition c EndBlock(taken ? taken_pc : m_compiler_pc, true); } -void CPU::NewRec::Compiler::Compile_sll_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_sll_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << inst->r.shamt); } -void CPU::NewRec::Compiler::Compile_srl_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_srl_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> inst->r.shamt); } -void CPU::NewRec::Compiler::Compile_sra_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_sra_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), static_cast(GetConstantRegS32(cf.MipsT()) >> inst->r.shamt)); } -void CPU::NewRec::Compiler::Compile_sllv_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_sllv_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << (GetConstantRegU32(cf.MipsS()) & 0x1Fu)); } -void CPU::NewRec::Compiler::Compile_srlv_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_srlv_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu)); } -void CPU::NewRec::Compiler::Compile_srav_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_srav_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), static_cast(GetConstantRegS32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu))); } -void CPU::NewRec::Compiler::Compile_and_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_and_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) & GetConstantRegU32(cf.MipsT())); } -void CPU::NewRec::Compiler::Compile_or_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_or_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT())); } -void CPU::NewRec::Compiler::Compile_xor_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_xor_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) ^ GetConstantRegU32(cf.MipsT())); } -void CPU::NewRec::Compiler::Compile_nor_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_nor_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), ~(GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT()))); } -void CPU::NewRec::Compiler::Compile_slt_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_slt_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < GetConstantRegS32(cf.MipsT()))); } -void CPU::NewRec::Compiler::Compile_sltu_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_sltu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegU32(cf.MipsS()) < GetConstantRegU32(cf.MipsT()))); } -void CPU::NewRec::Compiler::Compile_mult_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_mult_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); @@ -2017,7 +2019,7 @@ void CPU::NewRec::Compiler::Compile_mult_const(CompileFlags cf) SetConstantReg(Reg::lo, static_cast(res)); } -void CPU::NewRec::Compiler::Compile_multu_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_multu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); @@ -2026,7 +2028,7 @@ void CPU::NewRec::Compiler::Compile_multu_const(CompileFlags cf) SetConstantReg(Reg::lo, static_cast(res)); } -void CPU::NewRec::Compiler::MIPSSignedDivide(s32 num, s32 denom, u32* lo, u32* hi) +void CPU::Recompiler::Recompiler::MIPSSignedDivide(s32 num, s32 denom, u32* lo, u32* hi) { if (denom == 0) { @@ -2047,7 +2049,7 @@ void CPU::NewRec::Compiler::MIPSSignedDivide(s32 num, s32 denom, u32* lo, u32* h } } -void CPU::NewRec::Compiler::MIPSUnsignedDivide(u32 num, u32 denom, u32* lo, u32* hi) +void CPU::Recompiler::Recompiler::MIPSUnsignedDivide(u32 num, u32 denom, u32* lo, u32* hi) { if (denom == 0) { @@ -2062,7 +2064,7 @@ void CPU::NewRec::Compiler::MIPSUnsignedDivide(u32 num, u32 denom, u32* lo, u32* } } -void CPU::NewRec::Compiler::Compile_div_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_div_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); @@ -2076,7 +2078,7 @@ void CPU::NewRec::Compiler::Compile_div_const(CompileFlags cf) SetConstantReg(Reg::lo, lo); } -void CPU::NewRec::Compiler::Compile_divu_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_divu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); @@ -2090,7 +2092,7 @@ void CPU::NewRec::Compiler::Compile_divu_const(CompileFlags cf) SetConstantReg(Reg::lo, lo); } -void CPU::NewRec::Compiler::Compile_add_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_add_const(CompileFlags cf) { // TODO: Overflow DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); @@ -2098,13 +2100,13 @@ void CPU::NewRec::Compiler::Compile_add_const(CompileFlags cf) SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT())); } -void CPU::NewRec::Compiler::Compile_addu_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_addu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT())); } -void CPU::NewRec::Compiler::Compile_sub_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_sub_const(CompileFlags cf) { // TODO: Overflow DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); @@ -2112,13 +2114,13 @@ void CPU::NewRec::Compiler::Compile_sub_const(CompileFlags cf) SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT())); } -void CPU::NewRec::Compiler::Compile_subu_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_subu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT())); } -void CPU::NewRec::Compiler::Compile_addi_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_addi_const(CompileFlags cf) { // TODO: Overflow DebugAssert(HasConstantReg(cf.MipsS())); @@ -2126,43 +2128,43 @@ void CPU::NewRec::Compiler::Compile_addi_const(CompileFlags cf) SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32()); } -void CPU::NewRec::Compiler::Compile_addiu_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_addiu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32()); } -void CPU::NewRec::Compiler::Compile_slti_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_slti_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < static_cast(inst->i.imm_sext32()))); } -void CPU::NewRec::Compiler::Compile_sltiu_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_sltiu_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) < inst->i.imm_sext32()); } -void CPU::NewRec::Compiler::Compile_andi_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_andi_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) & inst->i.imm_zext32()); } -void CPU::NewRec::Compiler::Compile_ori_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_ori_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) | inst->i.imm_zext32()); } -void CPU::NewRec::Compiler::Compile_xori_const(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_xori_const(CompileFlags cf) { DebugAssert(HasConstantReg(cf.MipsS())); SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) ^ inst->i.imm_zext32()); } -void CPU::NewRec::Compiler::Compile_lui() +void CPU::Recompiler::Recompiler::Compile_lui() { if (inst->i.rt == Reg::zero) return; @@ -2191,17 +2193,17 @@ static constexpr const std::array, 16> s_cop0_table = { {&CPU::g_state.cop0_regs.EPC, 0x00000000u}, {&CPU::g_state.cop0_regs.PRID, 0x00000000u}}}; -u32* CPU::NewRec::Compiler::GetCop0RegPtr(Cop0Reg reg) +u32* CPU::Recompiler::Recompiler::GetCop0RegPtr(Cop0Reg reg) { return (static_cast(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast(reg)].first : nullptr; } -u32 CPU::NewRec::Compiler::GetCop0RegWriteMask(Cop0Reg reg) +u32 CPU::Recompiler::Recompiler::GetCop0RegWriteMask(Cop0Reg reg) { return (static_cast(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast(reg)].second : 0; } -void CPU::NewRec::Compiler::Compile_mfc0(CompileFlags cf) +void CPU::Recompiler::Recompiler::Compile_mfc0(CompileFlags cf) { const Cop0Reg r = static_cast(MipsD()); const u32* ptr = GetCop0RegPtr(r); @@ -2216,8 +2218,8 @@ void CPU::NewRec::Compiler::Compile_mfc0(CompileFlags cf) LoadHostRegFromCPUPointer(cf.host_t, ptr); } -std::pair -CPU::NewRec::Compiler::GetGTERegisterPointer(u32 index, bool writing) +std::pair +CPU::Recompiler::Recompiler::GetGTERegisterPointer(u32 index, bool writing) { if (!writing) { @@ -2309,14 +2311,14 @@ CPU::NewRec::Compiler::GetGTERegisterPointer(u32 index, bool writing) } } -void CPU::NewRec::Compiler::AddGTETicks(TickCount ticks) +void CPU::Recompiler::Recompiler::AddGTETicks(TickCount ticks) { // TODO: check, int has +1 here m_gte_done_cycle = m_cycles + ticks; DEBUG_LOG("Adding {} GTE ticks", ticks); } -void CPU::NewRec::Compiler::StallUntilGTEComplete() +void CPU::Recompiler::Recompiler::StallUntilGTEComplete() { // TODO: hack to match old rec.. this may or may not be correct behavior // it's the difference between stalling before and after the current instruction's cycle @@ -2342,7 +2344,7 @@ void CPU::NewRec::Compiler::StallUntilGTEComplete() m_cycles++; } -void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info) +void CPU::Recompiler::BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info) { // remove the cycles we added for the memory read, then take them off again after the backpatch // the normal rec path will add the ram read ticks later, so we need to take them off at the end @@ -2367,7 +2369,7 @@ void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::Loadst CPU::CodeCache::CommitFarCode(thunk_size); } -void CPU::NewRec::Compiler::InitSpeculativeRegs() +void CPU::Recompiler::Recompiler::InitSpeculativeRegs() { for (u8 i = 0; i < static_cast(Reg::count); i++) m_speculative_constants.regs[i] = g_state.regs.r[i]; @@ -2376,19 +2378,19 @@ void CPU::NewRec::Compiler::InitSpeculativeRegs() m_speculative_constants.memory.clear(); } -void CPU::NewRec::Compiler::InvalidateSpeculativeValues() +void CPU::Recompiler::Recompiler::InvalidateSpeculativeValues() { m_speculative_constants.regs.fill(std::nullopt); m_speculative_constants.memory.clear(); m_speculative_constants.cop0_sr.reset(); } -CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadReg(Reg reg) +CPU::Recompiler::Recompiler::SpecValue CPU::Recompiler::Recompiler::SpecReadReg(Reg reg) { return m_speculative_constants.regs[static_cast(reg)]; } -void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value) +void CPU::Recompiler::Recompiler::SpecWriteReg(Reg reg, SpecValue value) { if (reg == Reg::zero) return; @@ -2396,7 +2398,7 @@ void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value) m_speculative_constants.regs[static_cast(reg)] = value; } -void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg) +void CPU::Recompiler::Recompiler::SpecInvalidateReg(Reg reg) { if (reg == Reg::zero) return; @@ -2404,7 +2406,7 @@ void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg) m_speculative_constants.regs[static_cast(reg)].reset(); } -void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src) +void CPU::Recompiler::Recompiler::SpecCopyReg(Reg dst, Reg src) { if (dst == Reg::zero) return; @@ -2412,7 +2414,7 @@ void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src) m_speculative_constants.regs[static_cast(dst)] = m_speculative_constants.regs[static_cast(src)]; } -CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemoryAddress address) +CPU::Recompiler::Recompiler::SpecValue CPU::Recompiler::Recompiler::SpecReadMem(VirtualMemoryAddress address) { auto it = m_speculative_constants.memory.find(address); if (it != m_speculative_constants.memory.end()) @@ -2437,7 +2439,7 @@ CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemor return std::nullopt; } -void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value) +void CPU::Recompiler::Recompiler::SpecWriteMem(u32 address, SpecValue value) { auto it = m_speculative_constants.memory.find(address); if (it != m_speculative_constants.memory.end()) @@ -2451,12 +2453,12 @@ void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value) m_speculative_constants.memory.emplace(address, value); } -void CPU::NewRec::Compiler::SpecInvalidateMem(VirtualMemoryAddress address) +void CPU::Recompiler::Recompiler::SpecInvalidateMem(VirtualMemoryAddress address) { SpecWriteMem(address, std::nullopt); } -bool CPU::NewRec::Compiler::SpecIsCacheIsolated() +bool CPU::Recompiler::Recompiler::SpecIsCacheIsolated() { if (!m_speculative_constants.cop0_sr.has_value()) return false; @@ -2465,24 +2467,24 @@ bool CPU::NewRec::Compiler::SpecIsCacheIsolated() return sr.Isc; } -void CPU::NewRec::Compiler::SpecExec_b() +void CPU::Recompiler::Recompiler::SpecExec_b() { const bool link = (static_cast(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10); if (link) SpecWriteReg(Reg::ra, m_compiler_pc); } -void CPU::NewRec::Compiler::SpecExec_jal() +void CPU::Recompiler::Recompiler::SpecExec_jal() { SpecWriteReg(Reg::ra, m_compiler_pc); } -void CPU::NewRec::Compiler::SpecExec_jalr() +void CPU::Recompiler::Recompiler::SpecExec_jalr() { SpecWriteReg(inst->r.rd, m_compiler_pc); } -void CPU::NewRec::Compiler::SpecExec_sll() +void CPU::Recompiler::Recompiler::SpecExec_sll() { const SpecValue rt = SpecReadReg(inst->r.rt); if (rt.has_value()) @@ -2491,7 +2493,7 @@ void CPU::NewRec::Compiler::SpecExec_sll() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_srl() +void CPU::Recompiler::Recompiler::SpecExec_srl() { const SpecValue rt = SpecReadReg(inst->r.rt); if (rt.has_value()) @@ -2500,7 +2502,7 @@ void CPU::NewRec::Compiler::SpecExec_srl() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_sra() +void CPU::Recompiler::Recompiler::SpecExec_sra() { const SpecValue rt = SpecReadReg(inst->r.rt); if (rt.has_value()) @@ -2509,7 +2511,7 @@ void CPU::NewRec::Compiler::SpecExec_sra() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_sllv() +void CPU::Recompiler::Recompiler::SpecExec_sllv() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2519,7 +2521,7 @@ void CPU::NewRec::Compiler::SpecExec_sllv() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_srlv() +void CPU::Recompiler::Recompiler::SpecExec_srlv() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2529,7 +2531,7 @@ void CPU::NewRec::Compiler::SpecExec_srlv() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_srav() +void CPU::Recompiler::Recompiler::SpecExec_srav() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2539,7 +2541,7 @@ void CPU::NewRec::Compiler::SpecExec_srav() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_mult() +void CPU::Recompiler::Recompiler::SpecExec_mult() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2557,7 +2559,7 @@ void CPU::NewRec::Compiler::SpecExec_mult() } } -void CPU::NewRec::Compiler::SpecExec_multu() +void CPU::Recompiler::Recompiler::SpecExec_multu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2574,7 +2576,7 @@ void CPU::NewRec::Compiler::SpecExec_multu() } } -void CPU::NewRec::Compiler::SpecExec_div() +void CPU::Recompiler::Recompiler::SpecExec_div() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2592,7 +2594,7 @@ void CPU::NewRec::Compiler::SpecExec_div() } } -void CPU::NewRec::Compiler::SpecExec_divu() +void CPU::Recompiler::Recompiler::SpecExec_divu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2610,12 +2612,12 @@ void CPU::NewRec::Compiler::SpecExec_divu() } } -void CPU::NewRec::Compiler::SpecExec_add() +void CPU::Recompiler::Recompiler::SpecExec_add() { SpecExec_addu(); } -void CPU::NewRec::Compiler::SpecExec_addu() +void CPU::Recompiler::Recompiler::SpecExec_addu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2625,12 +2627,12 @@ void CPU::NewRec::Compiler::SpecExec_addu() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_sub() +void CPU::Recompiler::Recompiler::SpecExec_sub() { SpecExec_subu(); } -void CPU::NewRec::Compiler::SpecExec_subu() +void CPU::Recompiler::Recompiler::SpecExec_subu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2640,7 +2642,7 @@ void CPU::NewRec::Compiler::SpecExec_subu() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_and() +void CPU::Recompiler::Recompiler::SpecExec_and() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2650,7 +2652,7 @@ void CPU::NewRec::Compiler::SpecExec_and() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_or() +void CPU::Recompiler::Recompiler::SpecExec_or() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2660,7 +2662,7 @@ void CPU::NewRec::Compiler::SpecExec_or() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_xor() +void CPU::Recompiler::Recompiler::SpecExec_xor() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2670,7 +2672,7 @@ void CPU::NewRec::Compiler::SpecExec_xor() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_nor() +void CPU::Recompiler::Recompiler::SpecExec_nor() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2680,7 +2682,7 @@ void CPU::NewRec::Compiler::SpecExec_nor() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_slt() +void CPU::Recompiler::Recompiler::SpecExec_slt() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2690,7 +2692,7 @@ void CPU::NewRec::Compiler::SpecExec_slt() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_sltu() +void CPU::Recompiler::Recompiler::SpecExec_sltu() { const SpecValue rs = SpecReadReg(inst->r.rs); const SpecValue rt = SpecReadReg(inst->r.rt); @@ -2700,12 +2702,12 @@ void CPU::NewRec::Compiler::SpecExec_sltu() SpecInvalidateReg(inst->r.rd); } -void CPU::NewRec::Compiler::SpecExec_addi() +void CPU::Recompiler::Recompiler::SpecExec_addi() { SpecExec_addiu(); } -void CPU::NewRec::Compiler::SpecExec_addiu() +void CPU::Recompiler::Recompiler::SpecExec_addiu() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) @@ -2714,7 +2716,7 @@ void CPU::NewRec::Compiler::SpecExec_addiu() SpecInvalidateReg(inst->i.rt); } -void CPU::NewRec::Compiler::SpecExec_slti() +void CPU::Recompiler::Recompiler::SpecExec_slti() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) @@ -2723,7 +2725,7 @@ void CPU::NewRec::Compiler::SpecExec_slti() SpecInvalidateReg(inst->i.rt); } -void CPU::NewRec::Compiler::SpecExec_sltiu() +void CPU::Recompiler::Recompiler::SpecExec_sltiu() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) @@ -2732,7 +2734,7 @@ void CPU::NewRec::Compiler::SpecExec_sltiu() SpecInvalidateReg(inst->i.rt); } -void CPU::NewRec::Compiler::SpecExec_andi() +void CPU::Recompiler::Recompiler::SpecExec_andi() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) @@ -2741,7 +2743,7 @@ void CPU::NewRec::Compiler::SpecExec_andi() SpecInvalidateReg(inst->i.rt); } -void CPU::NewRec::Compiler::SpecExec_ori() +void CPU::Recompiler::Recompiler::SpecExec_ori() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) @@ -2750,7 +2752,7 @@ void CPU::NewRec::Compiler::SpecExec_ori() SpecInvalidateReg(inst->i.rt); } -void CPU::NewRec::Compiler::SpecExec_xori() +void CPU::Recompiler::Recompiler::SpecExec_xori() { const SpecValue rs = SpecReadReg(inst->i.rs); if (rs.has_value()) @@ -2759,18 +2761,18 @@ void CPU::NewRec::Compiler::SpecExec_xori() SpecInvalidateReg(inst->i.rt); } -void CPU::NewRec::Compiler::SpecExec_lui() +void CPU::Recompiler::Recompiler::SpecExec_lui() { SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16); } -CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecExec_LoadStoreAddr() +CPU::Recompiler::Recompiler::SpecValue CPU::Recompiler::Recompiler::SpecExec_LoadStoreAddr() { const SpecValue rs = SpecReadReg(inst->i.rs); return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs; } -void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign) +void CPU::Recompiler::Recompiler::SpecExec_lxx(MemoryAccessSize size, bool sign) { const SpecValue addr = SpecExec_LoadStoreAddr(); SpecValue val; @@ -2800,13 +2802,13 @@ void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign) SpecWriteReg(inst->r.rt, val); } -void CPU::NewRec::Compiler::SpecExec_lwx(bool lwr) +void CPU::Recompiler::Recompiler::SpecExec_lwx(bool lwr) { // TODO SpecInvalidateReg(inst->i.rt); } -void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size) +void CPU::Recompiler::Recompiler::SpecExec_sxx(MemoryAccessSize size) { const SpecValue addr = SpecExec_LoadStoreAddr(); if (!addr.has_value()) @@ -2836,21 +2838,21 @@ void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size) SpecWriteMem(addr.value(), rt); } -void CPU::NewRec::Compiler::SpecExec_swx(bool swr) +void CPU::Recompiler::Recompiler::SpecExec_swx(bool swr) { const SpecValue addr = SpecExec_LoadStoreAddr(); if (addr.has_value()) SpecInvalidateMem(addr.value() & ~3u); } -void CPU::NewRec::Compiler::SpecExec_swc2() +void CPU::Recompiler::Recompiler::SpecExec_swc2() { const SpecValue addr = SpecExec_LoadStoreAddr(); if (addr.has_value()) SpecInvalidateMem(addr.value()); } -void CPU::NewRec::Compiler::SpecExec_mfc0() +void CPU::Recompiler::Recompiler::SpecExec_mfc0() { const Cop0Reg rd = static_cast(inst->r.rd.GetValue()); if (rd != Cop0Reg::SR) @@ -2862,7 +2864,7 @@ void CPU::NewRec::Compiler::SpecExec_mfc0() SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr); } -void CPU::NewRec::Compiler::SpecExec_mtc0() +void CPU::Recompiler::Recompiler::SpecExec_mtc0() { const Cop0Reg rd = static_cast(inst->r.rd.GetValue()); if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value()) @@ -2878,7 +2880,7 @@ void CPU::NewRec::Compiler::SpecExec_mtc0() m_speculative_constants.cop0_sr = val; } -void CPU::NewRec::Compiler::SpecExec_rfe() +void CPU::Recompiler::Recompiler::SpecExec_rfe() { if (!m_speculative_constants.cop0_sr.has_value()) return; diff --git a/src/core/cpu_newrec_compiler.h b/src/core/cpu_recompiler.h similarity index 98% rename from src/core/cpu_newrec_compiler.h rename to src/core/cpu_recompiler.h index 698e3ffcd..8d14d3592 100644 --- a/src/core/cpu_newrec_compiler.h +++ b/src/core/cpu_recompiler.h @@ -13,7 +13,7 @@ #include #include -namespace CPU::NewRec { +namespace CPU::Recompiler { // Global options static constexpr bool EMULATE_LOAD_DELAYS = true; @@ -35,11 +35,11 @@ static constexpr bool HAS_MEMORY_OPERANDS = false; #endif // TODO: Get rid of the virtuals... somehow. -class Compiler +class Recompiler { public: - Compiler(); - virtual ~Compiler(); + Recompiler(); + virtual ~Recompiler(); const void* CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size); @@ -271,9 +271,9 @@ protected: void CompileInstruction(); void CompileBranchDelaySlot(bool dirty_pc = true); - void CompileTemplate(void (Compiler::*const_func)(CompileFlags), void (Compiler::*func)(CompileFlags), + void CompileTemplate(void (Recompiler::*const_func)(CompileFlags), void (Recompiler::*func)(CompileFlags), const void* pgxp_cpu_func, u32 tflags); - void CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, + void CompileLoadStoreTemplate(void (Recompiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, const std::optional&), MemoryAccessSize size, bool store, bool sign, u32 tflags); void FlushForLoadStore(const std::optional& address, bool store, bool use_fastmem); @@ -539,5 +539,5 @@ u32 CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, TickCount cycles_to_remove, u32 gpr_bitmask, u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, bool is_load); -extern Compiler* g_compiler; -} // namespace CPU::NewRec +extern Recompiler* g_compiler; +} // namespace CPU::Recompiler diff --git a/src/core/cpu_newrec_compiler_aarch32.cpp b/src/core/cpu_recompiler_arm32.cpp similarity index 74% rename from src/core/cpu_newrec_compiler_aarch32.cpp rename to src/core/cpu_recompiler_arm32.cpp index ce6f1f40a..4745b870a 100644 --- a/src/core/cpu_newrec_compiler_aarch32.cpp +++ b/src/core/cpu_recompiler_arm32.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 -#include "cpu_newrec_compiler_aarch32.h" +#include "cpu_recompiler_arm32.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" #include "cpu_recompiler_thunks.h" @@ -13,52 +13,327 @@ #include "common/align.h" #include "common/assert.h" #include "common/log.h" +#include "common/memmap.h" #include "common/string_util.h" #include #ifdef CPU_ARCH_ARM32 +#ifdef ENABLE_HOST_DISASSEMBLY +#include "vixl/aarch32/disasm-aarch32.h" +#include +#endif + LOG_CHANNEL(Recompiler); #define PTR(x) vixl::aarch32::MemOperand(RSTATE, (((u8*)(x)) - ((u8*)&g_state))) #define RMEMBASE vixl::aarch32::r3 -namespace CPU::NewRec { +namespace CPU::Recompiler { using namespace vixl::aarch32; -using CPU::Recompiler::armEmitCall; -using CPU::Recompiler::armEmitCondBranch; -using CPU::Recompiler::armEmitFarLoad; -using CPU::Recompiler::armEmitJmp; -using CPU::Recompiler::armEmitMov; -using CPU::Recompiler::armGetJumpTrampoline; -using CPU::Recompiler::armGetPCDisplacement; -using CPU::Recompiler::armIsCallerSavedRegister; -using CPU::Recompiler::armIsPCDisplacementInImmediateRange; -using CPU::Recompiler::armMoveAddressToReg; +constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers +constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes +constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; -AArch32Compiler s_instance; -Compiler* g_compiler = &s_instance; +static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; +static std::unordered_map s_trampoline_targets; +static u8* s_trampoline_start_ptr = nullptr; +static u32 s_trampoline_used = 0; -} // namespace CPU::NewRec +static ARM32Recompiler s_instance; +Recompiler* g_compiler = &s_instance; -CPU::NewRec::AArch32Compiler::AArch32Compiler() : m_emitter(A32), m_far_emitter(A32) +} // namespace CPU::Recompiler + +bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) +{ + return ((id >= 0 && id <= 3) || // r0-r3 + (id == 12 || id == 14)); // sp, pc +} + +s32 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) +{ + Assert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); + Assert(Common::IsAlignedPow2(reinterpret_cast(target), 4)); + return static_cast((reinterpret_cast(target) - reinterpret_cast(current))); +} + +bool CPU::Recompiler::armIsPCDisplacementInImmediateRange(s32 displacement) +{ + return (displacement >= -33554432 && displacement <= 33554428); +} + +void CPU::Recompiler::armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm) +{ + if (vixl::IsUintN(16, imm)) + { + armAsm->mov(al, rd, imm & 0xffff); + return; + } + + armAsm->mov(al, rd, imm & 0xffff); + armAsm->movt(al, rd, imm >> 16); +} + +void CPU::Recompiler::armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, + const void* addr) +{ + armEmitMov(armAsm, reg, static_cast(reinterpret_cast(addr))); +} + +void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) +{ + const void* cur = armAsm->GetCursorAddress(); + s32 displacement = armGetPCDisplacement(cur, ptr); + bool use_bx = !armIsPCDisplacementInImmediateRange(displacement); + if (use_bx && !force_inline) + { + if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) + { + displacement = armGetPCDisplacement(cur, trampoline); + use_bx = !armIsPCDisplacementInImmediateRange(displacement); + } + } + + if (use_bx) + { + armMoveAddressToReg(armAsm, RSCRATCH, ptr); + armAsm->bx(RSCRATCH); + } + else + { + Label label(displacement + armAsm->GetCursorOffset()); + armAsm->b(&label); + } +} + +void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) +{ + const void* cur = armAsm->GetCursorAddress(); + s32 displacement = armGetPCDisplacement(cur, ptr); + bool use_blx = !armIsPCDisplacementInImmediateRange(displacement); + if (use_blx && !force_inline) + { + if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) + { + displacement = armGetPCDisplacement(cur, trampoline); + use_blx = !armIsPCDisplacementInImmediateRange(displacement); + } + } + + if (use_blx) + { + armMoveAddressToReg(armAsm, RSCRATCH, ptr); + armAsm->blx(RSCRATCH); + } + else + { + Label label(displacement + armAsm->GetCursorOffset()); + armAsm->bl(&label); + } +} + +void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, + const void* ptr) +{ + const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress(), ptr); + if (!armIsPCDisplacementInImmediateRange(displacement)) + { + armMoveAddressToReg(armAsm, RSCRATCH, ptr); + armAsm->blx(cond, RSCRATCH); + } + else + { + Label label(displacement + armAsm->GetCursorOffset()); + armAsm->b(cond, &label); + } +} + +void CPU::Recompiler::armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, + const void* addr) +{ + armMoveAddressToReg(armAsm, reg, addr); + armAsm->ldr(reg, MemOperand(reg)); +} + +void CPU::Recompiler::armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, + const void* addr, const vixl::aarch32::Register& tempreg) +{ + armMoveAddressToReg(armAsm, tempreg, addr); + armAsm->str(reg, MemOperand(tempreg)); +} + +void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) +{ +#ifdef ENABLE_HOST_DISASSEMBLY + vixl::aarch32::PrintDisassembler dis(std::cout, 0); + dis.SetCodeAddress(reinterpret_cast(start)); + dis.DisassembleA32Buffer(static_cast(start), size); +#else + ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); +#endif +} + +u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) +{ + return size / vixl::aarch32::kA32InstructionSizeInBytes; +} + +u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) +{ + using namespace vixl::aarch32; + using namespace CPU::Recompiler; + + const s32 disp = armGetPCDisplacement(code, dst); + DebugAssert(armIsPCDisplacementInImmediateRange(disp)); + + // A32 jumps are silly. + { + Assembler emit(static_cast(code), kA32InstructionSizeInBytes, A32); + Label label(disp); + emit.b(&label); + } + + if (flush_icache) + MemMap::FlushInstructionCache(code, kA32InstructionSizeInBytes); + + return kA32InstructionSizeInBytes; +} + +u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) +{ + auto it = s_trampoline_targets.find(target); + if (it != s_trampoline_targets.end()) + return s_trampoline_start_ptr + it->second; + + // align to 16 bytes? + const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16); + + // 4 movs plus a jump + if (TRAMPOLINE_AREA_SIZE - offset < 20) + { + Panic("Ran out of space in constant pool"); + return nullptr; + } + + u8* start = s_trampoline_start_ptr + offset; + Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); + armMoveAddressToReg(&armAsm, RSCRATCH, target); + armAsm.bx(RSCRATCH); + + const u32 size = static_cast(armAsm.GetSizeOfCodeGenerated()); + DebugAssert(size < 20); + s_trampoline_targets.emplace(target, offset); + s_trampoline_used = offset + static_cast(size); + + MemMap::FlushInstructionCache(start, size); + return start; +} + +u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) +{ + using namespace vixl::aarch32; + using namespace CPU::Recompiler; + + Assembler actual_asm(static_cast(code), code_size); + Assembler* armAsm = &actual_asm; + +#ifdef VIXL_DEBUG + vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); +#endif + + Label dispatch; + + g_enter_recompiler = armAsm->GetCursorAddress(); + { + // reserve some space for saving caller-saved registers + armAsm->sub(sp, sp, FUNCTION_STACK_SIZE); + + // Need the CPU state for basically everything :-) + armMoveAddressToReg(armAsm, RSTATE, &g_state); + } + + // check events then for frame done + g_check_events_and_dispatch = armAsm->GetCursorAddress(); + { + Label skip_event_check; + armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); + armAsm->ldr(RARG2, PTR(&g_state.downcount)); + armAsm->cmp(RARG1, RARG2); + armAsm->b(lt, &skip_event_check); + + g_run_events_and_dispatch = armAsm->GetCursorAddress(); + armEmitCall(armAsm, reinterpret_cast(&TimingEvents::RunEvents), true); + + armAsm->bind(&skip_event_check); + } + + // TODO: align? + g_dispatcher = armAsm->GetCursorAddress(); + { + armAsm->bind(&dispatch); + + // x9 <- s_fast_map[pc >> 16] + armAsm->ldr(RARG1, PTR(&g_state.pc)); + armMoveAddressToReg(armAsm, RARG3, g_code_lut.data()); + armAsm->lsr(RARG2, RARG1, 16); + armAsm->ldr(RARG2, MemOperand(RARG3, RARG2, LSL, 2)); + + // blr(x9[pc * 2]) (fast_map[pc >> 2]) + armAsm->ldr(RARG1, MemOperand(RARG2, RARG1)); + armAsm->blx(RARG1); + } + + g_compile_or_revalidate_block = armAsm->GetCursorAddress(); + { + armAsm->ldr(RARG1, PTR(&g_state.pc)); + armEmitCall(armAsm, reinterpret_cast(&CompileOrRevalidateBlock), true); + armAsm->b(&dispatch); + } + + g_discard_and_recompile_block = armAsm->GetCursorAddress(); + { + armAsm->ldr(RARG1, PTR(&g_state.pc)); + armEmitCall(armAsm, reinterpret_cast(&DiscardAndRecompileBlock), true); + armAsm->b(&dispatch); + } + + g_interpret_block = armAsm->GetCursorAddress(); + { + armEmitCall(armAsm, reinterpret_cast(GetInterpretUncachedBlockFunction()), true); + armAsm->b(&dispatch); + } + + armAsm->FinalizeCode(); + +#if 0 + // TODO: align? + s_trampoline_targets.clear(); + s_trampoline_start_ptr = static_cast(code) + armAsm->GetCursorOffset(); + s_trampoline_used = 0; +#endif + + return static_cast(armAsm->GetCursorOffset()) /* + TRAMPOLINE_AREA_SIZE*/; +} + +CPU::Recompiler::ARM32Recompiler::ARM32Recompiler() : m_emitter(A32), m_far_emitter(A32) { } -CPU::NewRec::AArch32Compiler::~AArch32Compiler() = default; +CPU::Recompiler::ARM32Recompiler::~ARM32Recompiler() = default; -const void* CPU::NewRec::AArch32Compiler::GetCurrentCodePointer() +const void* CPU::Recompiler::ARM32Recompiler::GetCurrentCodePointer() { return armAsm->GetCursorAddress(); } -void CPU::NewRec::AArch32Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +void CPU::Recompiler::ARM32Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, + u8* far_code_buffer, u32 far_code_space) { - Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); + Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); // TODO: don't recreate this every time.. DebugAssert(!armAsm); @@ -94,7 +369,7 @@ void CPU::NewRec::AArch32Compiler::Reset(CodeCache::Block* block, u8* code_buffe } } -void CPU::NewRec::AArch32Compiler::SwitchToFarCode(bool emit_jump, vixl::aarch32::ConditionType cond) +void CPU::Recompiler::ARM32Recompiler::SwitchToFarCode(bool emit_jump, vixl::aarch32::ConditionType cond) { DebugAssert(armAsm == &m_emitter); if (emit_jump) @@ -120,7 +395,7 @@ void CPU::NewRec::AArch32Compiler::SwitchToFarCode(bool emit_jump, vixl::aarch32 armAsm = &m_far_emitter; } -void CPU::NewRec::AArch32Compiler::SwitchToFarCodeIfBitSet(const vixl::aarch32::Register& reg, u32 bit) +void CPU::Recompiler::ARM32Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch32::Register& reg, u32 bit) { armAsm->tst(reg, 1u << bit); @@ -141,7 +416,8 @@ void CPU::NewRec::AArch32Compiler::SwitchToFarCodeIfBitSet(const vixl::aarch32:: armAsm = &m_far_emitter; } -void CPU::NewRec::AArch32Compiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch32::Register& reg, bool nonzero) +void CPU::Recompiler::ARM32Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch32::Register& reg, + bool nonzero) { armAsm->cmp(reg, 0); @@ -162,7 +438,7 @@ void CPU::NewRec::AArch32Compiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl: armAsm = &m_far_emitter; } -void CPU::NewRec::AArch32Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch32::ConditionType cond) +void CPU::Recompiler::ARM32Recompiler::SwitchToNearCode(bool emit_jump, vixl::aarch32::ConditionType cond) { DebugAssert(armAsm == &m_far_emitter); if (emit_jump) @@ -188,17 +464,17 @@ void CPU::NewRec::AArch32Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch3 armAsm = &m_emitter; } -void CPU::NewRec::AArch32Compiler::EmitMov(const vixl::aarch32::Register& dst, u32 val) +void CPU::Recompiler::ARM32Recompiler::EmitMov(const vixl::aarch32::Register& dst, u32 val) { armEmitMov(armAsm, dst, val); } -void CPU::NewRec::AArch32Compiler::EmitCall(const void* ptr, bool force_inline /*= false*/) +void CPU::Recompiler::ARM32Recompiler::EmitCall(const void* ptr, bool force_inline /*= false*/) { armEmitCall(armAsm, ptr, force_inline); } -vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckAddSubConstant(s32 val) +vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckAddSubConstant(s32 val) { if (ImmediateA32::IsImmediateA32(static_cast(val))) return vixl::aarch32::Operand(static_cast(val)); @@ -207,27 +483,27 @@ vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckAddSubConstant(s32 return vixl::aarch32::Operand(RSCRATCH); } -vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckAddSubConstant(u32 val) +vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckAddSubConstant(u32 val) { return armCheckAddSubConstant(static_cast(val)); } -vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckCompareConstant(s32 val) +vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckCompareConstant(s32 val) { return armCheckAddSubConstant(val); } -vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckLogicalConstant(u32 val) +vixl::aarch32::Operand CPU::Recompiler::ARM32Recompiler::armCheckLogicalConstant(u32 val) { return armCheckAddSubConstant(val); } -void CPU::NewRec::AArch32Compiler::BeginBlock() +void CPU::Recompiler::ARM32Recompiler::BeginBlock() { - Compiler::BeginBlock(); + Recompiler::BeginBlock(); } -void CPU::NewRec::AArch32Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::Recompiler::ARM32Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset armMoveAddressToReg(armAsm, RARG1, ram_ptr); @@ -303,7 +579,7 @@ bool foo(const void* a, const void* b) armAsm->bind(&block_unchanged); } -void CPU::NewRec::AArch32Compiler::GenerateICacheCheckAndUpdate() +void CPU::Recompiler::ARM32Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -359,8 +635,8 @@ void CPU::NewRec::AArch32Compiler::GenerateICacheCheckAndUpdate() } } -void CPU::NewRec::AArch32Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::Recompiler::ARM32Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RARG1.GetCode())) armAsm->mov(RARG1, Register(arg1reg)); @@ -371,7 +647,7 @@ void CPU::NewRec::AArch32Compiler::GenerateCall(const void* func, s32 arg1reg /* EmitCall(func); } -void CPU::NewRec::AArch32Compiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::Recompiler::ARM32Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -388,7 +664,7 @@ void CPU::NewRec::AArch32Compiler::EndBlock(const std::optional& newpc, boo EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::NewRec::AArch32Compiler::EndBlockWithException(Exception excode) +void CPU::Recompiler::ARM32Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -406,8 +682,8 @@ void CPU::NewRec::AArch32Compiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::NewRec::AArch32Compiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::Recompiler::ARM32Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, + bool force_run_events) { // event test // pc should've been flushed @@ -464,7 +740,7 @@ void CPU::NewRec::AArch32Compiler::EndAndLinkBlock(const std::optional& new } } -const void* CPU::NewRec::AArch32Compiler::EndCompile(u32* code_size, u32* far_code_size) +const void* CPU::Recompiler::ARM32Recompiler::EndCompile(u32* code_size, u32* far_code_size) { #ifdef VIXL_DEBUG m_emitter_check.reset(); @@ -481,7 +757,7 @@ const void* CPU::NewRec::AArch32Compiler::EndCompile(u32* code_size, u32* far_co return code; } -const char* CPU::NewRec::AArch32Compiler::GetHostRegName(u32 reg) const +const char* CPU::Recompiler::ARM32Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", @@ -489,80 +765,80 @@ const char* CPU::NewRec::AArch32Compiler::GetHostRegName(u32 reg) const return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::NewRec::AArch32Compiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::Recompiler::ARM32Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { EmitMov(Register(reg), val); } -void CPU::NewRec::AArch32Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::ARM32Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { armAsm->ldr(Register(reg), PTR(ptr)); } -void CPU::NewRec::AArch32Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::ARM32Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { armAsm->str(Register(reg), PTR(ptr)); } -void CPU::NewRec::AArch32Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::Recompiler::ARM32Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { EmitMov(RSCRATCH, val); armAsm->str(RSCRATCH, PTR(ptr)); } -void CPU::NewRec::AArch32Compiler::CopyHostReg(u32 dst, u32 src) +void CPU::Recompiler::ARM32Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) armAsm->mov(Register(dst), Register(src)); } -void CPU::NewRec::AArch32Compiler::AssertRegOrConstS(CompileFlags cf) const +void CPU::Recompiler::ARM32Recompiler::AssertRegOrConstS(CompileFlags cf) const { DebugAssert(cf.valid_host_s || cf.const_s); } -void CPU::NewRec::AArch32Compiler::AssertRegOrConstT(CompileFlags cf) const +void CPU::Recompiler::ARM32Recompiler::AssertRegOrConstT(CompileFlags cf) const { DebugAssert(cf.valid_host_t || cf.const_t); } -vixl::aarch32::MemOperand CPU::NewRec::AArch32Compiler::MipsPtr(Reg r) const +vixl::aarch32::MemOperand CPU::Recompiler::ARM32Recompiler::MipsPtr(Reg r) const { DebugAssert(r < Reg::count); return PTR(&g_state.regs.r[static_cast(r)]); } -vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegD(CompileFlags cf) const +vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return Register(cf.host_d); } -vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegS(CompileFlags cf) const +vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return Register(cf.host_s); } -vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegT(CompileFlags cf) const +vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return Register(cf.host_t); } -vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegLO(CompileFlags cf) const +vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return Register(cf.host_lo); } -vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegHI(CompileFlags cf) const +vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return Register(cf.host_hi); } -vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GetMembaseReg() +vixl::aarch32::Register CPU::Recompiler::ARM32Recompiler::GetMembaseReg() { const u32 code = RMEMBASE.GetCode(); if (!IsHostRegAllocated(code)) @@ -576,7 +852,7 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GetMembaseReg() return RMEMBASE; } -void CPU::NewRec::AArch32Compiler::MoveSToReg(const vixl::aarch32::Register& dst, CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::MoveSToReg(const vixl::aarch32::Register& dst, CompileFlags cf) { if (cf.valid_host_s) { @@ -595,7 +871,7 @@ void CPU::NewRec::AArch32Compiler::MoveSToReg(const vixl::aarch32::Register& dst } } -void CPU::NewRec::AArch32Compiler::MoveTToReg(const vixl::aarch32::Register& dst, CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::MoveTToReg(const vixl::aarch32::Register& dst, CompileFlags cf) { if (cf.valid_host_t) { @@ -614,10 +890,10 @@ void CPU::NewRec::AArch32Compiler::MoveTToReg(const vixl::aarch32::Register& dst } } -void CPU::NewRec::AArch32Compiler::MoveMIPSRegToReg(const vixl::aarch32::Register& dst, Reg reg) +void CPU::Recompiler::ARM32Recompiler::MoveMIPSRegToReg(const vixl::aarch32::Register& dst, Reg reg) { DebugAssert(reg < Reg::count); - if (const std::optional hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) + if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) armAsm->mov(dst, Register(hreg.value())); else if (HasConstantReg(reg)) EmitMov(dst, GetConstantRegU32(reg)); @@ -625,9 +901,9 @@ void CPU::NewRec::AArch32Compiler::MoveMIPSRegToReg(const vixl::aarch32::Registe armAsm->ldr(dst, MipsPtr(reg)); } -void CPU::NewRec::AArch32Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::Recompiler::ARM32Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, + Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -642,9 +918,9 @@ void CPU::NewRec::AArch32Compiler::GeneratePGXPCallWithMIPSRegs(const void* func EmitCall(func); } -void CPU::NewRec::AArch32Compiler::Flush(u32 flags) +void CPU::Recompiler::ARM32Recompiler::Flush(u32 flags) { - Compiler::Flush(flags); + Recompiler::Flush(flags); if (flags & FLUSH_PC && m_dirty_pc) { @@ -734,7 +1010,7 @@ void CPU::NewRec::AArch32Compiler::Flush(u32 flags) } } -void CPU::NewRec::AArch32Compiler::Compile_Fallback() +void CPU::Recompiler::ARM32Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); @@ -759,7 +1035,7 @@ void CPU::NewRec::AArch32Compiler::Compile_Fallback() m_load_delay_dirty = EMULATE_LOAD_DELAYS; } -void CPU::NewRec::AArch32Compiler::CheckBranchTarget(const vixl::aarch32::Register& pcreg) +void CPU::Recompiler::ARM32Recompiler::CheckBranchTarget(const vixl::aarch32::Register& pcreg) { if (!g_settings.cpu_recompiler_memory_exceptions) return; @@ -774,7 +1050,7 @@ void CPU::NewRec::AArch32Compiler::CheckBranchTarget(const vixl::aarch32::Regist SwitchToNearCode(false); } -void CPU::NewRec::AArch32Compiler::Compile_jr(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_jr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); CheckBranchTarget(pcreg); @@ -785,7 +1061,7 @@ void CPU::NewRec::AArch32Compiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::AArch32Compiler::Compile_jalr(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_jalr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); if (MipsD() != Reg::zero) @@ -798,7 +1074,7 @@ void CPU::NewRec::AArch32Compiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::AArch32Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::Recompiler::ARM32Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { AssertRegOrConstS(cf); @@ -872,7 +1148,7 @@ void CPU::NewRec::AArch32Compiler::Compile_bxx(CompileFlags cf, BranchCondition EndBlock(taken_pc, true); } -void CPU::NewRec::AArch32Compiler::Compile_addi(CompileFlags cf, bool overflow) +void CPU::Recompiler::ARM32Recompiler::Compile_addi(CompileFlags cf, bool overflow) { const Register rs = CFGetRegS(cf); const Register rt = CFGetRegT(cf); @@ -894,27 +1170,27 @@ void CPU::NewRec::AArch32Compiler::Compile_addi(CompileFlags cf, bool overflow) } } -void CPU::NewRec::AArch32Compiler::Compile_addi(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_addi(CompileFlags cf) { Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::NewRec::AArch32Compiler::Compile_addiu(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_addiu(CompileFlags cf) { Compile_addi(cf, false); } -void CPU::NewRec::AArch32Compiler::Compile_slti(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::NewRec::AArch32Compiler::Compile_sltiu(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::NewRec::AArch32Compiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::Recompiler::ARM32Recompiler::Compile_slti(CompileFlags cf, bool sign) { const Register rs = CFGetRegS(cf); const Register rt = CFGetRegT(cf); @@ -923,7 +1199,7 @@ void CPU::NewRec::AArch32Compiler::Compile_slti(CompileFlags cf, bool sign) armAsm->mov(sign ? lt : lo, rt, 1); } -void CPU::NewRec::AArch32Compiler::Compile_andi(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_andi(CompileFlags cf) { const Register rt = CFGetRegT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) @@ -932,7 +1208,7 @@ void CPU::NewRec::AArch32Compiler::Compile_andi(CompileFlags cf) EmitMov(rt, 0); } -void CPU::NewRec::AArch32Compiler::Compile_ori(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_ori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -942,7 +1218,7 @@ void CPU::NewRec::AArch32Compiler::Compile_ori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::NewRec::AArch32Compiler::Compile_xori(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_xori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -952,10 +1228,10 @@ void CPU::NewRec::AArch32Compiler::Compile_xori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::NewRec::AArch32Compiler::Compile_shift(CompileFlags cf, - void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, - vixl::aarch32::Register, - const Operand&)) +void CPU::Recompiler::ARM32Recompiler::Compile_shift(CompileFlags cf, + void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, + vixl::aarch32::Register, + const Operand&)) { const Register rd = CFGetRegD(cf); const Register rt = CFGetRegT(cf); @@ -965,25 +1241,24 @@ void CPU::NewRec::AArch32Compiler::Compile_shift(CompileFlags cf, armAsm->mov(rd, rt); } -void CPU::NewRec::AArch32Compiler::Compile_sll(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_sll(CompileFlags cf) { Compile_shift(cf, &Assembler::lsl); } -void CPU::NewRec::AArch32Compiler::Compile_srl(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_srl(CompileFlags cf) { Compile_shift(cf, &Assembler::lsr); } -void CPU::NewRec::AArch32Compiler::Compile_sra(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_sra(CompileFlags cf) { Compile_shift(cf, &Assembler::asr); } -void CPU::NewRec::AArch32Compiler::Compile_variable_shift(CompileFlags cf, - void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, - vixl::aarch32::Register, - const Operand&)) +void CPU::Recompiler::ARM32Recompiler::Compile_variable_shift( + CompileFlags cf, + void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, vixl::aarch32::Register, const Operand&)) { const Register rd = CFGetRegD(cf); @@ -1008,22 +1283,22 @@ void CPU::NewRec::AArch32Compiler::Compile_variable_shift(CompileFlags cf, } } -void CPU::NewRec::AArch32Compiler::Compile_sllv(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lsl); } -void CPU::NewRec::AArch32Compiler::Compile_srlv(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lsr); } -void CPU::NewRec::AArch32Compiler::Compile_srav(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::asr); } -void CPU::NewRec::AArch32Compiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::Recompiler::ARM32Recompiler::Compile_mult(CompileFlags cf, bool sign) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1040,17 +1315,17 @@ void CPU::NewRec::AArch32Compiler::Compile_mult(CompileFlags cf, bool sign) (sign) ? armAsm->smull(lo, hi, rs, rt) : armAsm->umull(lo, hi, rs, rt); } -void CPU::NewRec::AArch32Compiler::Compile_mult(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::NewRec::AArch32Compiler::Compile_multu(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::NewRec::AArch32Compiler::Compile_div(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_div(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1096,7 +1371,7 @@ void CPU::NewRec::AArch32Compiler::Compile_div(CompileFlags cf) armAsm->bind(&done); } -void CPU::NewRec::AArch32Compiler::Compile_divu(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_divu(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1127,7 +1402,7 @@ void CPU::NewRec::AArch32Compiler::Compile_divu(CompileFlags cf) armAsm->bind(&done); } -void CPU::NewRec::AArch32Compiler::TestOverflow(const vixl::aarch32::Register& result) +void CPU::Recompiler::ARM32Recompiler::TestOverflow(const vixl::aarch32::Register& result) { SwitchToFarCode(true, vs); @@ -1143,11 +1418,11 @@ void CPU::NewRec::AArch32Compiler::TestOverflow(const vixl::aarch32::Register& r SwitchToNearCode(false); } -void CPU::NewRec::AArch32Compiler::Compile_dst_op(CompileFlags cf, - void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, - vixl::aarch32::Register, - const Operand&), - bool commutative, bool logical, bool overflow) +void CPU::Recompiler::ARM32Recompiler::Compile_dst_op(CompileFlags cf, + void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, + vixl::aarch32::Register, + const Operand&), + bool commutative, bool logical, bool overflow) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1195,7 +1470,7 @@ void CPU::NewRec::AArch32Compiler::Compile_dst_op(CompileFlags cf, TestOverflow(rd); } -void CPU::NewRec::AArch32Compiler::Compile_add(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_add(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::adds, true, false, true); @@ -1203,12 +1478,12 @@ void CPU::NewRec::AArch32Compiler::Compile_add(CompileFlags cf) Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::NewRec::AArch32Compiler::Compile_addu(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_addu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::NewRec::AArch32Compiler::Compile_sub(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_sub(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::subs, false, false, true); @@ -1216,12 +1491,12 @@ void CPU::NewRec::AArch32Compiler::Compile_sub(CompileFlags cf) Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::NewRec::AArch32Compiler::Compile_subu(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_subu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::NewRec::AArch32Compiler::Compile_and(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_and(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1242,7 +1517,7 @@ void CPU::NewRec::AArch32Compiler::Compile_and(CompileFlags cf) Compile_dst_op(cf, &Assembler::and_, true, true, false); } -void CPU::NewRec::AArch32Compiler::Compile_or(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_or(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1258,7 +1533,7 @@ void CPU::NewRec::AArch32Compiler::Compile_or(CompileFlags cf) Compile_dst_op(cf, &Assembler::orr, true, true, false); } -void CPU::NewRec::AArch32Compiler::Compile_xor(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_xor(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1280,23 +1555,23 @@ void CPU::NewRec::AArch32Compiler::Compile_xor(CompileFlags cf) Compile_dst_op(cf, &Assembler::eor, true, true, false); } -void CPU::NewRec::AArch32Compiler::Compile_nor(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); armAsm->mvn(CFGetRegD(cf), CFGetRegD(cf)); } -void CPU::NewRec::AArch32Compiler::Compile_slt(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::NewRec::AArch32Compiler::Compile_sltu(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::NewRec::AArch32Compiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::Recompiler::ARM32Recompiler::Compile_slt(CompileFlags cf, bool sign) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1322,9 +1597,9 @@ void CPU::NewRec::AArch32Compiler::Compile_slt(CompileFlags cf, bool sign) } vixl::aarch32::Register -CPU::NewRec::AArch32Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, - const std::optional& address, - const std::optional& reg) +CPU::Recompiler::ARM32Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, + const std::optional& address, + const std::optional& reg) { const u32 imm = inst->i.imm_sext32(); if (cf.valid_host_s && imm == 0 && !reg.has_value()) @@ -1364,9 +1639,9 @@ CPU::NewRec::AArch32Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, } template -vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::aarch32::Register& addr_reg, - MemoryAccessSize size, bool sign, bool use_fastmem, - const RegAllocFn& dst_reg_alloc) +vixl::aarch32::Register +CPU::Recompiler::ARM32Recompiler::GenerateLoad(const vixl::aarch32::Register& addr_reg, MemoryAccessSize size, + bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) { if (use_fastmem) { @@ -1476,9 +1751,9 @@ vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::a return dst_reg; } -void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& addr_reg, - const vixl::aarch32::Register& value_reg, MemoryAccessSize size, - bool use_fastmem) +void CPU::Recompiler::ARM32Recompiler::GenerateStore(const vixl::aarch32::Register& addr_reg, + const vixl::aarch32::Register& value_reg, MemoryAccessSize size, + bool use_fastmem) { if (use_fastmem) { @@ -1562,8 +1837,8 @@ void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& } } -void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM32Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1590,8 +1865,8 @@ void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM32Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1684,8 +1959,8 @@ void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM32Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1770,8 +2045,8 @@ void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM32Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1798,8 +2073,8 @@ void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM32Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1872,8 +2147,8 @@ void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM32Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -1928,7 +2203,7 @@ void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::AArch32Compiler::Compile_mtc0(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_mtc0(CompileFlags cf) { // TODO: we need better constant setting here.. which will need backprop AssertRegOrConstT(cf); @@ -2006,7 +2281,7 @@ void CPU::NewRec::AArch32Compiler::Compile_mtc0(CompileFlags cf) } } -void CPU::NewRec::AArch32Compiler::Compile_rfe(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits armAsm->ldr(RARG1, PTR(&g_state.cop0_regs.sr.bits)); @@ -2018,7 +2293,7 @@ void CPU::NewRec::AArch32Compiler::Compile_rfe(CompileFlags cf) TestInterrupts(RARG1); } -void CPU::NewRec::AArch32Compiler::TestInterrupts(const vixl::aarch32::Register& sr) +void CPU::Recompiler::ARM32Recompiler::TestInterrupts(const vixl::aarch32::Register& sr) { // if Iec == 0 then goto no_interrupt Label no_interrupt; @@ -2069,7 +2344,7 @@ void CPU::NewRec::AArch32Compiler::TestInterrupts(const vixl::aarch32::Register& armAsm->bind(&no_interrupt); } -void CPU::NewRec::AArch32Compiler::Compile_mfc2(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2110,7 +2385,7 @@ void CPU::NewRec::AArch32Compiler::Compile_mfc2(CompileFlags cf) } } -void CPU::NewRec::AArch32Compiler::Compile_mtc2(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2172,7 +2447,7 @@ void CPU::NewRec::AArch32Compiler::Compile_mtc2(CompileFlags cf) } } -void CPU::NewRec::AArch32Compiler::Compile_cop2(CompileFlags cf) +void CPU::Recompiler::ARM32Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2184,10 +2459,10 @@ void CPU::NewRec::AArch32Compiler::Compile_cop2(CompileFlags cf) AddGTETicks(func_ticks); } -u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, - TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, - u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, - bool is_load) +u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, + TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, + u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, + bool is_load) { Assembler arm_asm(static_cast(thunk_code), thunk_space); Assembler* armAsm = &arm_asm; diff --git a/src/core/cpu_newrec_compiler_aarch32.h b/src/core/cpu_recompiler_arm32.h similarity index 97% rename from src/core/cpu_newrec_compiler_aarch32.h rename to src/core/cpu_recompiler_arm32.h index e506ad098..91fe4e98d 100644 --- a/src/core/cpu_newrec_compiler_aarch32.h +++ b/src/core/cpu_recompiler_arm32.h @@ -3,7 +3,7 @@ #pragma once -#include "cpu_newrec_compiler.h" +#include "cpu_recompiler.h" #include @@ -12,13 +12,13 @@ #include "vixl/aarch32/assembler-aarch32.h" #include "vixl/aarch32/operands-aarch32.h" -namespace CPU::NewRec { +namespace CPU::Recompiler { -class AArch32Compiler final : public Compiler +class ARM32Recompiler final : public Recompiler { public: - AArch32Compiler(); - ~AArch32Compiler() override; + ARM32Recompiler(); + ~ARM32Recompiler() override; protected: const char* GetHostRegName(u32 reg) const override; @@ -165,6 +165,6 @@ private: #endif }; -} // namespace CPU::NewRec +} // namespace CPU::Recompiler -#endif // CPU_ARCH_ARM32 \ No newline at end of file +#endif // CPU_ARCH_ARM32 diff --git a/src/core/cpu_newrec_compiler_aarch64.cpp b/src/core/cpu_recompiler_arm64.cpp similarity index 66% rename from src/core/cpu_newrec_compiler_aarch64.cpp rename to src/core/cpu_recompiler_arm64.cpp index cb3efeb49..125857695 100644 --- a/src/core/cpu_newrec_compiler_aarch64.cpp +++ b/src/core/cpu_recompiler_arm64.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 -#include "cpu_newrec_compiler_aarch64.h" +#include "cpu_recompiler_arm64.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" #include "cpu_recompiler_thunks.h" @@ -13,51 +13,508 @@ #include "common/align.h" #include "common/assert.h" #include "common/log.h" +#include "common/memmap.h" #include "common/string_util.h" #include #ifdef CPU_ARCH_ARM64 +#ifdef ENABLE_HOST_DISASSEMBLY +#include "vixl/aarch64/disasm-aarch64.h" +#endif + LOG_CHANNEL(Recompiler); #define PTR(x) vixl::aarch64::MemOperand(RSTATE, (((u8*)(x)) - ((u8*)&g_state))) -namespace CPU::NewRec { +namespace CPU::Recompiler { using namespace vixl::aarch64; -using CPU::Recompiler::armEmitCall; -using CPU::Recompiler::armEmitCondBranch; -using CPU::Recompiler::armEmitFarLoad; -using CPU::Recompiler::armEmitJmp; -using CPU::Recompiler::armEmitMov; -using CPU::Recompiler::armGetJumpTrampoline; -using CPU::Recompiler::armGetPCDisplacement; -using CPU::Recompiler::armIsCallerSavedRegister; -using CPU::Recompiler::armMoveAddressToReg; +constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers +constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes +constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; -AArch64Compiler s_instance; -Compiler* g_compiler = &s_instance; +static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; +static std::unordered_map s_trampoline_targets; +static u8* s_trampoline_start_ptr = nullptr; +static u32 s_trampoline_used = 0; -} // namespace CPU::NewRec +static ARM64Recompiler s_instance; +Recompiler* g_compiler = &s_instance; -CPU::NewRec::AArch64Compiler::AArch64Compiler() +} // namespace CPU::Recompiler + +bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) +{ + // same on both linux and windows + return (id <= 18); +} + +void CPU::Recompiler::armEmitMov(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& rd, u64 imm) +{ + // From vixl macro assembler. + DebugAssert(vixl::IsUint32(imm) || vixl::IsInt32(imm) || rd.Is64Bits()); + DebugAssert(rd.GetCode() != sp.GetCode()); + + if (imm == 0) + { + armAsm->mov(rd, Assembler::AppropriateZeroRegFor(rd)); + return; + } + + // The worst case for size is mov 64-bit immediate to sp: + // * up to 4 instructions to materialise the constant + // * 1 instruction to move to sp + + // Immediates on Aarch64 can be produced using an initial value, and zero to + // three move keep operations. + // + // Initial values can be generated with: + // 1. 64-bit move zero (movz). + // 2. 32-bit move inverted (movn). + // 3. 64-bit move inverted. + // 4. 32-bit orr immediate. + // 5. 64-bit orr immediate. + // Move-keep may then be used to modify each of the 16-bit half words. + // + // The code below supports all five initial value generators, and + // applying move-keep operations to move-zero and move-inverted initial + // values. + + // Try to move the immediate in one instruction, and if that fails, switch to + // using multiple instructions. + const unsigned reg_size = rd.GetSizeInBits(); + + if (Assembler::IsImmMovz(imm, reg_size) && !rd.IsSP()) + { + // Immediate can be represented in a move zero instruction. Movz can't write + // to the stack pointer. + armAsm->movz(rd, imm); + return; + } + else if (Assembler::IsImmMovn(imm, reg_size) && !rd.IsSP()) + { + // Immediate can be represented in a move negative instruction. Movn can't + // write to the stack pointer. + armAsm->movn(rd, rd.Is64Bits() ? ~imm : (~imm & kWRegMask)); + return; + } + else if (Assembler::IsImmLogical(imm, reg_size)) + { + // Immediate can be represented in a logical orr instruction. + DebugAssert(!rd.IsZero()); + armAsm->orr(rd, Assembler::AppropriateZeroRegFor(rd), imm); + return; + } + + // Generic immediate case. Imm will be represented by + // [imm3, imm2, imm1, imm0], where each imm is 16 bits. + // A move-zero or move-inverted is generated for the first non-zero or + // non-0xffff immX, and a move-keep for subsequent non-zero immX. + + uint64_t ignored_halfword = 0; + bool invert_move = false; + // If the number of 0xffff halfwords is greater than the number of 0x0000 + // halfwords, it's more efficient to use move-inverted. + if (vixl::CountClearHalfWords(~imm, reg_size) > vixl::CountClearHalfWords(imm, reg_size)) + { + ignored_halfword = 0xffff; + invert_move = true; + } + + // Iterate through the halfwords. Use movn/movz for the first non-ignored + // halfword, and movk for subsequent halfwords. + DebugAssert((reg_size % 16) == 0); + bool first_mov_done = false; + for (unsigned i = 0; i < (reg_size / 16); i++) + { + uint64_t imm16 = (imm >> (16 * i)) & 0xffff; + if (imm16 != ignored_halfword) + { + if (!first_mov_done) + { + if (invert_move) + armAsm->movn(rd, ~imm16 & 0xffff, 16 * i); + else + armAsm->movz(rd, imm16, 16 * i); + first_mov_done = true; + } + else + { + // Construct a wider constant. + armAsm->movk(rd, imm16, 16 * i); + } + } + } + + DebugAssert(first_mov_done); +} + +s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) +{ + // pxAssert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); + // pxAssert(Common::IsAlignedPow2(reinterpret_cast(target), 4)); + return static_cast((reinterpret_cast(target) - reinterpret_cast(current)) >> 2); +} + +bool CPU::Recompiler::armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr) +{ + const void* cur = armAsm->GetCursorAddress(); + const void* current_code_ptr_page = + reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); + const void* ptr_page = + reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); + const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; + const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); + + return (vixl::IsInt21(page_displacement) && + (Assembler::IsImmAddSub(page_offset) || Assembler::IsImmLogical(page_offset, 64))); +} + +void CPU::Recompiler::armMoveAddressToReg(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, + const void* addr) +{ + DebugAssert(reg.IsX()); + + const void* cur = armAsm->GetCursorAddress(); + const void* current_code_ptr_page = + reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); + const void* ptr_page = + reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); + const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; + const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); + if (vixl::IsInt21(page_displacement) && Assembler::IsImmAddSub(page_offset)) + { + armAsm->adrp(reg, page_displacement); + armAsm->add(reg, reg, page_offset); + } + else if (vixl::IsInt21(page_displacement) && Assembler::IsImmLogical(page_offset, 64)) + { + armAsm->adrp(reg, page_displacement); + armAsm->orr(reg, reg, page_offset); + } + else + { + armEmitMov(armAsm, reg, reinterpret_cast(addr)); + } +} +void CPU::Recompiler::armEmitJmp(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline) +{ + const void* cur = armAsm->GetCursorAddress(); + s64 displacement = armGetPCDisplacement(cur, ptr); + bool use_blr = !vixl::IsInt26(displacement); + bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); + if (use_blr && use_trampoline && !force_inline) + { + if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) + { + displacement = armGetPCDisplacement(cur, trampoline); + use_blr = !vixl::IsInt26(displacement); + } + } + + if (use_blr) + { + armMoveAddressToReg(armAsm, RXSCRATCH, ptr); + armAsm->br(RXSCRATCH); + } + else + { + armAsm->b(displacement); + } +} + +void CPU::Recompiler::armEmitCall(vixl::aarch64::Assembler* armAsm, const void* ptr, bool force_inline) +{ + const void* cur = armAsm->GetCursorAddress(); + s64 displacement = armGetPCDisplacement(cur, ptr); + bool use_blr = !vixl::IsInt26(displacement); + bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); + if (use_blr && use_trampoline && !force_inline) + { + if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) + { + displacement = armGetPCDisplacement(cur, trampoline); + use_blr = !vixl::IsInt26(displacement); + } + } + + if (use_blr) + { + armMoveAddressToReg(armAsm, RXSCRATCH, ptr); + armAsm->blr(RXSCRATCH); + } + else + { + armAsm->bl(displacement); + } +} + +void CPU::Recompiler::armEmitCondBranch(vixl::aarch64::Assembler* armAsm, vixl::aarch64::Condition cond, + const void* ptr) +{ + const s64 jump_distance = static_cast(reinterpret_cast(ptr) - + reinterpret_cast(armAsm->GetCursorAddress())); + // pxAssert(Common::IsAligned(jump_distance, 4)); + + if (vixl::aarch64::Instruction::IsValidImmPCOffset(CondBranchType, jump_distance >> 2)) + { + armAsm->b(jump_distance >> 2, cond); + } + else + { + Label branch_not_taken; + armAsm->b(&branch_not_taken, InvertCondition(cond)); + + const s64 new_jump_distance = static_cast(reinterpret_cast(ptr) - + reinterpret_cast(armAsm->GetCursorAddress())); + armAsm->b(new_jump_distance >> 2); + armAsm->bind(&branch_not_taken); + } +} + +void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, + const void* addr, bool sign_extend_word) +{ + const void* cur = armAsm->GetCursorAddress(); + const void* current_code_ptr_page = + reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); + const void* ptr_page = + reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); + const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; + const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); + MemOperand memop; + + const vixl::aarch64::Register xreg = reg.X(); + if (vixl::IsInt21(page_displacement)) + { + armAsm->adrp(xreg, page_displacement); + memop = vixl::aarch64::MemOperand(xreg, static_cast(page_offset)); + } + else + { + armMoveAddressToReg(armAsm, xreg, addr); + memop = vixl::aarch64::MemOperand(xreg); + } + + if (sign_extend_word) + armAsm->ldrsw(reg, memop); + else + armAsm->ldr(reg, memop); +} + +void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, + const void* addr, const vixl::aarch64::Register& tempreg) +{ + DebugAssert(tempreg.IsX()); + + const void* cur = armAsm->GetCursorAddress(); + const void* current_code_ptr_page = + reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); + const void* ptr_page = + reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); + const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; + const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); + + if (vixl::IsInt21(page_displacement)) + { + armAsm->adrp(tempreg, page_displacement); + armAsm->str(reg, MemOperand(tempreg, static_cast(page_offset))); + } + else + { + armMoveAddressToReg(armAsm, tempreg, addr); + armAsm->str(reg, MemOperand(tempreg)); + } +} + +u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) +{ + auto it = s_trampoline_targets.find(target); + if (it != s_trampoline_targets.end()) + return s_trampoline_start_ptr + it->second; + + // align to 16 bytes? + const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16); + + // 4 movs plus a jump + if (TRAMPOLINE_AREA_SIZE - offset < 20) + { + Panic("Ran out of space in constant pool"); + return nullptr; + } + + u8* start = s_trampoline_start_ptr + offset; + Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); +#ifdef VIXL_DEBUG + vixl::CodeBufferCheckScope armAsmCheck(&armAsm, TRAMPOLINE_AREA_SIZE - offset, + vixl::CodeBufferCheckScope::kDontReserveBufferSpace); +#endif + armMoveAddressToReg(&armAsm, RXSCRATCH, target); + armAsm.br(RXSCRATCH); + armAsm.FinalizeCode(); + + const u32 size = static_cast(armAsm.GetSizeOfCodeGenerated()); + DebugAssert(size < 20); + s_trampoline_targets.emplace(target, offset); + s_trampoline_used = offset + static_cast(size); + + MemMap::FlushInstructionCache(start, size); + return start; +} + +void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) +{ +#ifdef ENABLE_HOST_DISASSEMBLY + class MyDisassembler : public vixl::aarch64::Disassembler + { + protected: + void ProcessOutput(const vixl::aarch64::Instruction* instr) override + { + DEBUG_LOG("0x{:016X} {:08X}\t\t{}", reinterpret_cast(instr), instr->GetInstructionBits(), GetOutput()); + } + }; + + vixl::aarch64::Decoder decoder; + MyDisassembler disas; + decoder.AppendVisitor(&disas); + decoder.Decode(static_cast(start), + reinterpret_cast(static_cast(start) + size)); +#else + ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); +#endif +} + +u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) +{ + return size / vixl::aarch64::kInstructionSize; +} + +u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) +{ + using namespace vixl::aarch64; + using namespace CPU::Recompiler; + + const s64 disp = armGetPCDisplacement(code, dst); + DebugAssert(vixl::IsInt26(disp)); + + const u32 new_code = B | Assembler::ImmUncondBranch(disp); + std::memcpy(code, &new_code, sizeof(new_code)); + if (flush_icache) + MemMap::FlushInstructionCache(code, kInstructionSize); + + return kInstructionSize; +} + +u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) +{ + using namespace vixl::aarch64; + using namespace CPU::Recompiler; + + Assembler actual_asm(static_cast(code), code_size); + Assembler* armAsm = &actual_asm; + +#ifdef VIXL_DEBUG + vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); +#endif + + Label dispatch; + + g_enter_recompiler = armAsm->GetCursorAddress(); + { + // reserve some space for saving caller-saved registers + armAsm->sub(sp, sp, CPU::Recompiler::FUNCTION_STACK_SIZE); + + // Need the CPU state for basically everything :-) + armMoveAddressToReg(armAsm, RSTATE, &g_state); + + // Fastmem setup, oldrec doesn't need it + if (IsUsingFastmem()) + armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base)); + + // Fall through to event dispatcher + } + + // check events then for frame done + g_check_events_and_dispatch = armAsm->GetCursorAddress(); + { + Label skip_event_check; + armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); + armAsm->ldr(RWARG2, PTR(&g_state.downcount)); + armAsm->cmp(RWARG1, RWARG2); + armAsm->b(&skip_event_check, lt); + + g_run_events_and_dispatch = armAsm->GetCursorAddress(); + armEmitCall(armAsm, reinterpret_cast(&TimingEvents::RunEvents), true); + + armAsm->bind(&skip_event_check); + } + + // TODO: align? + g_dispatcher = armAsm->GetCursorAddress(); + { + armAsm->bind(&dispatch); + + // x9 <- s_fast_map[pc >> 16] + armAsm->ldr(RWARG1, PTR(&g_state.pc)); + armMoveAddressToReg(armAsm, RXARG3, g_code_lut.data()); + armAsm->lsr(RWARG2, RWARG1, 16); + armAsm->lsr(RWARG1, RWARG1, 2); + armAsm->ldr(RXARG2, MemOperand(RXARG3, RXARG2, LSL, 3)); + + // blr(x9[pc * 2]) (fast_map[pc >> 2]) + armAsm->ldr(RXARG1, MemOperand(RXARG2, RXARG1, LSL, 3)); + armAsm->blr(RXARG1); + } + + g_compile_or_revalidate_block = armAsm->GetCursorAddress(); + { + armAsm->ldr(RWARG1, PTR(&g_state.pc)); + armEmitCall(armAsm, reinterpret_cast(&CompileOrRevalidateBlock), true); + armAsm->b(&dispatch); + } + + g_discard_and_recompile_block = armAsm->GetCursorAddress(); + { + armAsm->ldr(RWARG1, PTR(&g_state.pc)); + armEmitCall(armAsm, reinterpret_cast(&DiscardAndRecompileBlock), true); + armAsm->b(&dispatch); + } + + g_interpret_block = armAsm->GetCursorAddress(); + { + armEmitCall(armAsm, reinterpret_cast(GetInterpretUncachedBlockFunction()), true); + armAsm->b(&dispatch); + } + + armAsm->FinalizeCode(); + + // TODO: align? + s_trampoline_targets.clear(); + s_trampoline_start_ptr = static_cast(code) + armAsm->GetCursorOffset(); + s_trampoline_used = 0; + + return static_cast(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE; +} + +CPU::Recompiler::ARM64Recompiler::ARM64Recompiler() : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode) { } -CPU::NewRec::AArch64Compiler::~AArch64Compiler() = default; +CPU::Recompiler::ARM64Recompiler::~ARM64Recompiler() = default; -const void* CPU::NewRec::AArch64Compiler::GetCurrentCodePointer() +const void* CPU::Recompiler::ARM64Recompiler::GetCurrentCodePointer() { return armAsm->GetCursorAddress(); } -void CPU::NewRec::AArch64Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +void CPU::Recompiler::ARM64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, + u8* far_code_buffer, u32 far_code_space) { - Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); + Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); // TODO: don't recreate this every time.. DebugAssert(!armAsm); @@ -90,7 +547,7 @@ void CPU::NewRec::AArch64Compiler::Reset(CodeCache::Block* block, u8* code_buffe } } -void CPU::NewRec::AArch64Compiler::SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond) +void CPU::Recompiler::ARM64Recompiler::SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond) { DebugAssert(armAsm == &m_emitter); if (emit_jump) @@ -118,7 +575,7 @@ void CPU::NewRec::AArch64Compiler::SwitchToFarCode(bool emit_jump, vixl::aarch64 armAsm = &m_far_emitter; } -void CPU::NewRec::AArch64Compiler::SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit) +void CPU::Recompiler::ARM64Recompiler::SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit) { const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress()); if (vixl::IsInt14(disp)) @@ -136,7 +593,8 @@ void CPU::NewRec::AArch64Compiler::SwitchToFarCodeIfBitSet(const vixl::aarch64:: armAsm = &m_far_emitter; } -void CPU::NewRec::AArch64Compiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, bool nonzero) +void CPU::Recompiler::ARM64Recompiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, + bool nonzero) { const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress()); if (vixl::IsInt19(disp)) @@ -154,7 +612,7 @@ void CPU::NewRec::AArch64Compiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl: armAsm = &m_far_emitter; } -void CPU::NewRec::AArch64Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond) +void CPU::Recompiler::ARM64Recompiler::SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond) { DebugAssert(armAsm == &m_far_emitter); if (emit_jump) @@ -165,17 +623,17 @@ void CPU::NewRec::AArch64Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch6 armAsm = &m_emitter; } -void CPU::NewRec::AArch64Compiler::EmitMov(const vixl::aarch64::Register& dst, u32 val) +void CPU::Recompiler::ARM64Recompiler::EmitMov(const vixl::aarch64::Register& dst, u32 val) { armEmitMov(armAsm, dst, val); } -void CPU::NewRec::AArch64Compiler::EmitCall(const void* ptr, bool force_inline /*= false*/) +void CPU::Recompiler::ARM64Recompiler::EmitCall(const void* ptr, bool force_inline /*= false*/) { armEmitCall(armAsm, ptr, force_inline); } -vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckAddSubConstant(s32 val) +vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckAddSubConstant(s32 val) { if (Assembler::IsImmAddSub(val)) return vixl::aarch64::Operand(static_cast(val)); @@ -184,12 +642,12 @@ vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckAddSubConstant(s32 return vixl::aarch64::Operand(RWSCRATCH); } -vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckAddSubConstant(u32 val) +vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckAddSubConstant(u32 val) { return armCheckAddSubConstant(static_cast(val)); } -vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckCompareConstant(s32 val) +vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckCompareConstant(s32 val) { if (Assembler::IsImmConditionalCompare(val)) return vixl::aarch64::Operand(static_cast(val)); @@ -198,7 +656,7 @@ vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckCompareConstant(s32 return vixl::aarch64::Operand(RWSCRATCH); } -vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckLogicalConstant(u32 val) +vixl::aarch64::Operand CPU::Recompiler::ARM64Recompiler::armCheckLogicalConstant(u32 val) { if (Assembler::IsImmLogical(val, 32)) return vixl::aarch64::Operand(static_cast(static_cast(val))); @@ -207,12 +665,12 @@ vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckLogicalConstant(u32 return vixl::aarch64::Operand(RWSCRATCH); } -void CPU::NewRec::AArch64Compiler::BeginBlock() +void CPU::Recompiler::ARM64Recompiler::BeginBlock() { - Compiler::BeginBlock(); + Recompiler::BeginBlock(); } -void CPU::NewRec::AArch64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::Recompiler::ARM64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset armMoveAddressToReg(armAsm, RXARG1, ram_ptr); @@ -275,7 +733,7 @@ void CPU::NewRec::AArch64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, armAsm->bind(&block_unchanged); } -void CPU::NewRec::AArch64Compiler::GenerateICacheCheckAndUpdate() +void CPU::Recompiler::ARM64Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -331,8 +789,8 @@ void CPU::NewRec::AArch64Compiler::GenerateICacheCheckAndUpdate() } } -void CPU::NewRec::AArch64Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::Recompiler::ARM64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RXARG1.GetCode())) armAsm->mov(RXARG1, XRegister(arg1reg)); @@ -343,7 +801,7 @@ void CPU::NewRec::AArch64Compiler::GenerateCall(const void* func, s32 arg1reg /* EmitCall(func); } -void CPU::NewRec::AArch64Compiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::Recompiler::ARM64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -360,7 +818,7 @@ void CPU::NewRec::AArch64Compiler::EndBlock(const std::optional& newpc, boo EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::NewRec::AArch64Compiler::EndBlockWithException(Exception excode) +void CPU::Recompiler::ARM64Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -378,8 +836,8 @@ void CPU::NewRec::AArch64Compiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::NewRec::AArch64Compiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::Recompiler::ARM64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, + bool force_run_events) { // event test // pc should've been flushed @@ -436,7 +894,7 @@ void CPU::NewRec::AArch64Compiler::EndAndLinkBlock(const std::optional& new } } -const void* CPU::NewRec::AArch64Compiler::EndCompile(u32* code_size, u32* far_code_size) +const void* CPU::Recompiler::ARM64Recompiler::EndCompile(u32* code_size, u32* far_code_size) { #ifdef VIXL_DEBUG m_emitter_check.reset(); @@ -453,7 +911,7 @@ const void* CPU::NewRec::AArch64Compiler::EndCompile(u32* code_size, u32* far_co return code; } -const char* CPU::NewRec::AArch64Compiler::GetHostRegName(u32 reg) const +const char* CPU::Recompiler::ARM64Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", @@ -461,22 +919,22 @@ const char* CPU::NewRec::AArch64Compiler::GetHostRegName(u32 reg) const return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::NewRec::AArch64Compiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::Recompiler::ARM64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { EmitMov(WRegister(reg), val); } -void CPU::NewRec::AArch64Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::ARM64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { armAsm->ldr(WRegister(reg), PTR(ptr)); } -void CPU::NewRec::AArch64Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::ARM64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { armAsm->str(WRegister(reg), PTR(ptr)); } -void CPU::NewRec::AArch64Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::Recompiler::ARM64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { if (val == 0) { @@ -488,59 +946,59 @@ void CPU::NewRec::AArch64Compiler::StoreConstantToCPUPointer(u32 val, const void armAsm->str(RWSCRATCH, PTR(ptr)); } -void CPU::NewRec::AArch64Compiler::CopyHostReg(u32 dst, u32 src) +void CPU::Recompiler::ARM64Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) armAsm->mov(WRegister(dst), WRegister(src)); } -void CPU::NewRec::AArch64Compiler::AssertRegOrConstS(CompileFlags cf) const +void CPU::Recompiler::ARM64Recompiler::AssertRegOrConstS(CompileFlags cf) const { DebugAssert(cf.valid_host_s || cf.const_s); } -void CPU::NewRec::AArch64Compiler::AssertRegOrConstT(CompileFlags cf) const +void CPU::Recompiler::ARM64Recompiler::AssertRegOrConstT(CompileFlags cf) const { DebugAssert(cf.valid_host_t || cf.const_t); } -vixl::aarch64::MemOperand CPU::NewRec::AArch64Compiler::MipsPtr(Reg r) const +vixl::aarch64::MemOperand CPU::Recompiler::ARM64Recompiler::MipsPtr(Reg r) const { DebugAssert(r < Reg::count); return PTR(&g_state.regs.r[static_cast(r)]); } -vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegD(CompileFlags cf) const +vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return WRegister(cf.host_d); } -vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegS(CompileFlags cf) const +vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return WRegister(cf.host_s); } -vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegT(CompileFlags cf) const +vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return WRegister(cf.host_t); } -vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegLO(CompileFlags cf) const +vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return WRegister(cf.host_lo); } -vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegHI(CompileFlags cf) const +vixl::aarch64::Register CPU::Recompiler::ARM64Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return WRegister(cf.host_hi); } -void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf) { DebugAssert(dst.IsW()); if (cf.valid_host_s) @@ -563,7 +1021,7 @@ void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::Register& dst } } -void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf) { DebugAssert(dst.IsW()); if (cf.valid_host_t) @@ -586,10 +1044,10 @@ void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::Register& dst } } -void CPU::NewRec::AArch64Compiler::MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg) +void CPU::Recompiler::ARM64Recompiler::MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg) { DebugAssert(reg < Reg::count && dst.IsW()); - if (const std::optional hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) + if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) armAsm->mov(dst, WRegister(hreg.value())); else if (HasConstantReg(reg)) EmitMov(dst, GetConstantRegU32(reg)); @@ -597,9 +1055,9 @@ void CPU::NewRec::AArch64Compiler::MoveMIPSRegToReg(const vixl::aarch64::Registe armAsm->ldr(dst, MipsPtr(reg)); } -void CPU::NewRec::AArch64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::Recompiler::ARM64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, + Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -614,9 +1072,9 @@ void CPU::NewRec::AArch64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func EmitCall(func); } -void CPU::NewRec::AArch64Compiler::Flush(u32 flags) +void CPU::Recompiler::ARM64Recompiler::Flush(u32 flags) { - Compiler::Flush(flags); + Recompiler::Flush(flags); if (flags & FLUSH_PC && m_dirty_pc) { @@ -706,7 +1164,7 @@ void CPU::NewRec::AArch64Compiler::Flush(u32 flags) } } -void CPU::NewRec::AArch64Compiler::Compile_Fallback() +void CPU::Recompiler::ARM64Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); @@ -731,7 +1189,7 @@ void CPU::NewRec::AArch64Compiler::Compile_Fallback() m_load_delay_dirty = EMULATE_LOAD_DELAYS; } -void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::Register& pcreg) +void CPU::Recompiler::ARM64Recompiler::CheckBranchTarget(const vixl::aarch64::Register& pcreg) { DebugAssert(pcreg.IsW()); if (!g_settings.cpu_recompiler_memory_exceptions) @@ -747,7 +1205,7 @@ void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::Regist SwitchToNearCode(false); } -void CPU::NewRec::AArch64Compiler::Compile_jr(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_jr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); CheckBranchTarget(pcreg); @@ -758,7 +1216,7 @@ void CPU::NewRec::AArch64Compiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::AArch64Compiler::Compile_jalr(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_jalr(CompileFlags cf) { const Register pcreg = CFGetRegS(cf); if (MipsD() != Reg::zero) @@ -771,7 +1229,7 @@ void CPU::NewRec::AArch64Compiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::AArch64Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::Recompiler::ARM64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { AssertRegOrConstS(cf); @@ -852,7 +1310,7 @@ void CPU::NewRec::AArch64Compiler::Compile_bxx(CompileFlags cf, BranchCondition EndBlock(taken_pc, true); } -void CPU::NewRec::AArch64Compiler::Compile_addi(CompileFlags cf, bool overflow) +void CPU::Recompiler::ARM64Recompiler::Compile_addi(CompileFlags cf, bool overflow) { const Register rs = CFGetRegS(cf); const Register rt = CFGetRegT(cf); @@ -874,33 +1332,33 @@ void CPU::NewRec::AArch64Compiler::Compile_addi(CompileFlags cf, bool overflow) } } -void CPU::NewRec::AArch64Compiler::Compile_addi(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_addi(CompileFlags cf) { Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::NewRec::AArch64Compiler::Compile_addiu(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_addiu(CompileFlags cf) { Compile_addi(cf, false); } -void CPU::NewRec::AArch64Compiler::Compile_slti(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::NewRec::AArch64Compiler::Compile_sltiu(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::NewRec::AArch64Compiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::Recompiler::ARM64Recompiler::Compile_slti(CompileFlags cf, bool sign) { armAsm->cmp(CFGetRegS(cf), armCheckCompareConstant(static_cast(inst->i.imm_sext32()))); armAsm->cset(CFGetRegT(cf), sign ? lt : lo); } -void CPU::NewRec::AArch64Compiler::Compile_andi(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_andi(CompileFlags cf) { const Register rt = CFGetRegT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) @@ -909,7 +1367,7 @@ void CPU::NewRec::AArch64Compiler::Compile_andi(CompileFlags cf) armAsm->mov(rt, wzr); } -void CPU::NewRec::AArch64Compiler::Compile_ori(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_ori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -919,7 +1377,7 @@ void CPU::NewRec::AArch64Compiler::Compile_ori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::NewRec::AArch64Compiler::Compile_xori(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_xori(CompileFlags cf) { const Register rt = CFGetRegT(cf); const Register rs = CFGetRegS(cf); @@ -929,10 +1387,9 @@ void CPU::NewRec::AArch64Compiler::Compile_xori(CompileFlags cf) armAsm->mov(rt, rs); } -void CPU::NewRec::AArch64Compiler::Compile_shift(CompileFlags cf, - void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, - const vixl::aarch64::Register&, - unsigned)) +void CPU::Recompiler::ARM64Recompiler::Compile_shift( + CompileFlags cf, + void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, unsigned)) { const Register rd = CFGetRegD(cf); const Register rt = CFGetRegT(cf); @@ -942,22 +1399,22 @@ void CPU::NewRec::AArch64Compiler::Compile_shift(CompileFlags cf, armAsm->mov(rd, rt); } -void CPU::NewRec::AArch64Compiler::Compile_sll(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_sll(CompileFlags cf) { Compile_shift(cf, &Assembler::lsl); } -void CPU::NewRec::AArch64Compiler::Compile_srl(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_srl(CompileFlags cf) { Compile_shift(cf, &Assembler::lsr); } -void CPU::NewRec::AArch64Compiler::Compile_sra(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_sra(CompileFlags cf) { Compile_shift(cf, &Assembler::asr); } -void CPU::NewRec::AArch64Compiler::Compile_variable_shift( +void CPU::Recompiler::ARM64Recompiler::Compile_variable_shift( CompileFlags cf, void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, const vixl::aarch64::Register&), @@ -985,22 +1442,22 @@ void CPU::NewRec::AArch64Compiler::Compile_variable_shift( } } -void CPU::NewRec::AArch64Compiler::Compile_sllv(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lslv, &Assembler::lsl); } -void CPU::NewRec::AArch64Compiler::Compile_srlv(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::lsrv, &Assembler::lsr); } -void CPU::NewRec::AArch64Compiler::Compile_srav(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::asrv, &Assembler::asr); } -void CPU::NewRec::AArch64Compiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::Recompiler::ARM64Recompiler::Compile_mult(CompileFlags cf, bool sign) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; if (!cf.valid_host_s) @@ -1018,17 +1475,17 @@ void CPU::NewRec::AArch64Compiler::Compile_mult(CompileFlags cf, bool sign) armAsm->lsr(hi.X(), lo.X(), 32); } -void CPU::NewRec::AArch64Compiler::Compile_mult(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::NewRec::AArch64Compiler::Compile_multu(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_div(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; if (!cf.valid_host_s) @@ -1073,7 +1530,7 @@ void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf) armAsm->bind(&done); } -void CPU::NewRec::AArch64Compiler::Compile_divu(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_divu(CompileFlags cf) { const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; if (!cf.valid_host_s) @@ -1103,7 +1560,7 @@ void CPU::NewRec::AArch64Compiler::Compile_divu(CompileFlags cf) armAsm->bind(&done); } -void CPU::NewRec::AArch64Compiler::TestOverflow(const vixl::aarch64::Register& result) +void CPU::Recompiler::ARM64Recompiler::TestOverflow(const vixl::aarch64::Register& result) { DebugAssert(result.IsW()); SwitchToFarCode(true, vs); @@ -1120,11 +1577,11 @@ void CPU::NewRec::AArch64Compiler::TestOverflow(const vixl::aarch64::Register& r SwitchToNearCode(false); } -void CPU::NewRec::AArch64Compiler::Compile_dst_op(CompileFlags cf, - void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, - const vixl::aarch64::Register&, - const vixl::aarch64::Operand&), - bool commutative, bool logical, bool overflow) +void CPU::Recompiler::ARM64Recompiler::Compile_dst_op( + CompileFlags cf, + void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, + const vixl::aarch64::Operand&), + bool commutative, bool logical, bool overflow) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1173,7 +1630,7 @@ void CPU::NewRec::AArch64Compiler::Compile_dst_op(CompileFlags cf, TestOverflow(rd); } -void CPU::NewRec::AArch64Compiler::Compile_add(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_add(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::adds, true, false, true); @@ -1181,12 +1638,12 @@ void CPU::NewRec::AArch64Compiler::Compile_add(CompileFlags cf) Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::NewRec::AArch64Compiler::Compile_addu(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_addu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::add, true, false, false); } -void CPU::NewRec::AArch64Compiler::Compile_sub(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_sub(CompileFlags cf) { if (g_settings.cpu_recompiler_memory_exceptions) Compile_dst_op(cf, &Assembler::subs, false, false, true); @@ -1194,12 +1651,12 @@ void CPU::NewRec::AArch64Compiler::Compile_sub(CompileFlags cf) Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::NewRec::AArch64Compiler::Compile_subu(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_subu(CompileFlags cf) { Compile_dst_op(cf, &Assembler::sub, false, false, false); } -void CPU::NewRec::AArch64Compiler::Compile_and(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_and(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1220,7 +1677,7 @@ void CPU::NewRec::AArch64Compiler::Compile_and(CompileFlags cf) Compile_dst_op(cf, &Assembler::and_, true, true, false); } -void CPU::NewRec::AArch64Compiler::Compile_or(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_or(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1236,7 +1693,7 @@ void CPU::NewRec::AArch64Compiler::Compile_or(CompileFlags cf) Compile_dst_op(cf, &Assembler::orr, true, true, false); } -void CPU::NewRec::AArch64Compiler::Compile_xor(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_xor(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1258,23 +1715,23 @@ void CPU::NewRec::AArch64Compiler::Compile_xor(CompileFlags cf) Compile_dst_op(cf, &Assembler::eor, true, true, false); } -void CPU::NewRec::AArch64Compiler::Compile_nor(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); armAsm->mvn(CFGetRegD(cf), CFGetRegD(cf)); } -void CPU::NewRec::AArch64Compiler::Compile_slt(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::NewRec::AArch64Compiler::Compile_sltu(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::NewRec::AArch64Compiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::Recompiler::ARM64Recompiler::Compile_slt(CompileFlags cf, bool sign) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1298,9 +1755,9 @@ void CPU::NewRec::AArch64Compiler::Compile_slt(CompileFlags cf, bool sign) } vixl::aarch64::Register -CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, - const std::optional& address, - const std::optional& reg) +CPU::Recompiler::ARM64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf, + const std::optional& address, + const std::optional& reg) { const u32 imm = inst->i.imm_sext32(); if (cf.valid_host_s && imm == 0 && !reg.has_value()) @@ -1340,9 +1797,9 @@ CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, } template -vixl::aarch64::Register CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::Register& addr_reg, - MemoryAccessSize size, bool sign, bool use_fastmem, - const RegAllocFn& dst_reg_alloc) +vixl::aarch64::Register +CPU::Recompiler::ARM64Recompiler::GenerateLoad(const vixl::aarch64::Register& addr_reg, MemoryAccessSize size, + bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) { DebugAssert(addr_reg.IsW()); if (use_fastmem) @@ -1388,20 +1845,20 @@ vixl::aarch64::Register CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::a { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte)); + EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryByte) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryHalfWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord)); + EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord)); } break; } @@ -1456,9 +1913,9 @@ vixl::aarch64::Register CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::a return dst_reg; } -void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::Register& addr_reg, - const vixl::aarch64::Register& value_reg, MemoryAccessSize size, - bool use_fastmem) +void CPU::Recompiler::ARM64Recompiler::GenerateStore(const vixl::aarch64::Register& addr_reg, + const vixl::aarch64::Register& value_reg, MemoryAccessSize size, + bool use_fastmem) { DebugAssert(addr_reg.IsW() && value_reg.IsW()); if (use_fastmem) @@ -1501,20 +1958,20 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::Register& { case MemoryAccessSize::Byte: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte)); + EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryByte) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryHalfWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - EmitCall(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord)); + EmitCall(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord)); } break; } @@ -1545,8 +2002,8 @@ void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::Register& } } -void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1574,8 +2031,8 @@ void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1668,8 +2125,8 @@ void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1754,8 +2211,8 @@ void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1782,8 +2239,8 @@ void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1856,8 +2313,8 @@ void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::ARM64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -1912,7 +2369,7 @@ void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::AArch64Compiler::Compile_mtc0(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_mtc0(CompileFlags cf) { // TODO: we need better constant setting here.. which will need backprop AssertRegOrConstT(cf); @@ -1988,7 +2445,7 @@ void CPU::NewRec::AArch64Compiler::Compile_mtc0(CompileFlags cf) } } -void CPU::NewRec::AArch64Compiler::Compile_rfe(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits armAsm->ldr(RWARG1, PTR(&g_state.cop0_regs.sr.bits)); @@ -1998,7 +2455,7 @@ void CPU::NewRec::AArch64Compiler::Compile_rfe(CompileFlags cf) TestInterrupts(RWARG1); } -void CPU::NewRec::AArch64Compiler::TestInterrupts(const vixl::aarch64::Register& sr) +void CPU::Recompiler::ARM64Recompiler::TestInterrupts(const vixl::aarch64::Register& sr) { DebugAssert(sr.IsW()); @@ -2049,7 +2506,7 @@ void CPU::NewRec::AArch64Compiler::TestInterrupts(const vixl::aarch64::Register& armAsm->bind(&no_interrupt); } -void CPU::NewRec::AArch64Compiler::Compile_mfc2(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2090,7 +2547,7 @@ void CPU::NewRec::AArch64Compiler::Compile_mfc2(CompileFlags cf) } } -void CPU::NewRec::AArch64Compiler::Compile_mtc2(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2152,7 +2609,7 @@ void CPU::NewRec::AArch64Compiler::Compile_mtc2(CompileFlags cf) } } -void CPU::NewRec::AArch64Compiler::Compile_cop2(CompileFlags cf) +void CPU::Recompiler::ARM64Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2164,10 +2621,10 @@ void CPU::NewRec::AArch64Compiler::Compile_cop2(CompileFlags cf) AddGTETicks(func_ticks); } -u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, - TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, - u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, - bool is_load) +u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, + TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, + u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, + bool is_load) { Assembler arm_asm(static_cast(thunk_code), thunk_space); Assembler* armAsm = &arm_asm; @@ -2229,24 +2686,24 @@ u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* case MemoryAccessSize::Byte: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte), + is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte), false); } break; case MemoryAccessSize::HalfWord: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord), + is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord), false); } break; case MemoryAccessSize::Word: { armEmitCall(armAsm, - is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord), + is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord), false); } break; diff --git a/src/core/cpu_newrec_compiler_aarch64.h b/src/core/cpu_recompiler_arm64.h similarity index 97% rename from src/core/cpu_newrec_compiler_aarch64.h rename to src/core/cpu_recompiler_arm64.h index fd0dbb38f..44bb4001a 100644 --- a/src/core/cpu_newrec_compiler_aarch64.h +++ b/src/core/cpu_recompiler_arm64.h @@ -3,7 +3,7 @@ #pragma once -#include "cpu_newrec_compiler.h" +#include "cpu_recompiler.h" #include @@ -11,13 +11,13 @@ #include "vixl/aarch64/assembler-aarch64.h" -namespace CPU::NewRec { +namespace CPU::Recompiler { -class AArch64Compiler final : public Compiler +class ARM64Recompiler final : public Recompiler { public: - AArch64Compiler(); - ~AArch64Compiler() override; + ARM64Recompiler(); + ~ARM64Recompiler() override; protected: const char* GetHostRegName(u32 reg) const override; @@ -166,6 +166,6 @@ private: #endif }; -} // namespace CPU::NewRec +} // namespace CPU::Recompiler #endif // CPU_ARCH_ARM64 diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp deleted file mode 100644 index 517276ec9..000000000 --- a/src/core/cpu_recompiler_code_generator.cpp +++ /dev/null @@ -1,3214 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "cpu_recompiler_code_generator.h" -#include "cpu_core.h" -#include "cpu_core_private.h" -#include "cpu_disasm.h" -#include "cpu_pgxp.h" -#include "gte.h" -#include "settings.h" - -#include "common/log.h" - -LOG_CHANNEL(Recompiler); - -// TODO: Turn load+sext/zext into a single signed/unsigned load -// TODO: mulx/shlx/etc -// TODO: when writing to the same register, don't allocate a temporary and copy it (mainly for shifts) - -namespace CPU::Recompiler { - -const void* CodeGenerator::CompileBlock(CodeCache::Block* block, u32* out_host_code_size, u32* out_host_far_code_size) -{ - // TODO: Align code buffer. - - m_block = block; - m_block_start = {block->Instructions(), block->InstructionsInfo()}; - m_block_end = {block->Instructions() + block->size, block->InstructionsInfo() + block->size}; - - m_pc = block->pc; - m_pc_valid = true; - - EmitBeginBlock(true); - BlockPrologue(); - - m_current_instruction = m_block_start; - while (m_current_instruction.instruction != m_block_end.instruction) - { - if (!CompileInstruction(*m_current_instruction.instruction, *m_current_instruction.info)) - { - m_current_instruction = {}; - m_block_end = {}; - m_block_start = {}; - m_block = nullptr; - return nullptr; - } - - m_current_instruction.instruction++; - m_current_instruction.info++; - } - - if (!m_block_linked) - { - BlockEpilogue(); - - if (block->HasFlag(CodeCache::BlockFlags::SpansPages)) - { - // jump directly to the next block - const Value pc = CalculatePC(); - WriteNewPC(pc, true); - const void* host_target = - CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(), static_cast(pc.constant_value)); - EmitBranch(host_target); - EmitEndBlock(true, nullptr); - } - else - { - EmitEndBlock(true, CodeCache::g_check_events_and_dispatch); - } - } - - const void* code = FinalizeBlock(out_host_code_size, out_host_far_code_size); - DebugAssert(m_register_cache.GetUsedHostRegisters() == 0); - - m_current_instruction = {}; - m_block_end = {}; - m_block_start = {}; - m_block = nullptr; - return code; -} - -bool CodeGenerator::CompileInstruction(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - if (IsNopInstruction(instruction)) - { - InstructionPrologue(instruction, info, 1); - InstructionEpilogue(instruction, info); - return true; - } - - bool result; - switch (instruction.op) - { -#if 1 - case InstructionOp::ori: - case InstructionOp::andi: - case InstructionOp::xori: - result = Compile_Bitwise(instruction, info); - break; - - case InstructionOp::lb: - case InstructionOp::lbu: - case InstructionOp::lh: - case InstructionOp::lhu: - case InstructionOp::lw: - result = Compile_Load(instruction, info); - break; - - case InstructionOp::lwl: - case InstructionOp::lwr: - result = Compile_LoadLeftRight(instruction, info); - break; - - case InstructionOp::swl: - case InstructionOp::swr: - result = Compile_StoreLeftRight(instruction, info); - break; - - case InstructionOp::sb: - case InstructionOp::sh: - case InstructionOp::sw: - result = Compile_Store(instruction, info); - break; - - case InstructionOp::j: - case InstructionOp::jal: - case InstructionOp::b: - case InstructionOp::beq: - case InstructionOp::bne: - case InstructionOp::bgtz: - case InstructionOp::blez: - result = Compile_Branch(instruction, info); - break; - - case InstructionOp::addi: - case InstructionOp::addiu: - result = Compile_Add(instruction, info); - break; - - case InstructionOp::slti: - case InstructionOp::sltiu: - result = Compile_SetLess(instruction, info); - break; - - case InstructionOp::lui: - result = Compile_lui(instruction, info); - break; - - case InstructionOp::cop0: - result = Compile_cop0(instruction, info); - break; - - case InstructionOp::cop2: - case InstructionOp::lwc2: - case InstructionOp::swc2: - result = Compile_cop2(instruction, info); - break; - - case InstructionOp::funct: - { - switch (instruction.r.funct) - { - case InstructionFunct::and_: - case InstructionFunct::or_: - case InstructionFunct::xor_: - case InstructionFunct::nor: - result = Compile_Bitwise(instruction, info); - break; - - case InstructionFunct::sll: - case InstructionFunct::srl: - case InstructionFunct::sra: - case InstructionFunct::sllv: - case InstructionFunct::srlv: - case InstructionFunct::srav: - result = Compile_Shift(instruction, info); - break; - - case InstructionFunct::mfhi: - case InstructionFunct::mflo: - case InstructionFunct::mthi: - case InstructionFunct::mtlo: - result = Compile_MoveHiLo(instruction, info); - break; - - case InstructionFunct::add: - case InstructionFunct::addu: - result = Compile_Add(instruction, info); - break; - - case InstructionFunct::sub: - case InstructionFunct::subu: - result = Compile_Subtract(instruction, info); - break; - - case InstructionFunct::mult: - case InstructionFunct::multu: - result = Compile_Multiply(instruction, info); - break; - - case InstructionFunct::div: - result = Compile_SignedDivide(instruction, info); - break; - - case InstructionFunct::divu: - result = Compile_Divide(instruction, info); - break; - - case InstructionFunct::slt: - case InstructionFunct::sltu: - result = Compile_SetLess(instruction, info); - break; - - case InstructionFunct::jr: - case InstructionFunct::jalr: - case InstructionFunct::syscall: - case InstructionFunct::break_: - result = Compile_Branch(instruction, info); - break; - - default: - result = Compile_Fallback(instruction, info); - break; - } - } - break; -#endif - - default: - result = Compile_Fallback(instruction, info); - break; - } - - return result; -} - -Value CodeGenerator::ConvertValueSize(const Value& value, RegSize size, bool sign_extend) -{ - DebugAssert(value.size != size); - - if (value.IsConstant()) - { - // compile-time conversion, woo! - switch (size) - { - case RegSize_8: - return Value::FromConstantU8(value.constant_value & 0xFF); - - case RegSize_16: - { - switch (value.size) - { - case RegSize_8: - return Value::FromConstantU16(sign_extend ? SignExtend16(Truncate8(value.constant_value)) : - ZeroExtend16(Truncate8(value.constant_value))); - - default: - return Value::FromConstantU16(value.constant_value & 0xFFFF); - } - } - break; - - case RegSize_32: - { - switch (value.size) - { - case RegSize_8: - return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate8(value.constant_value)) : - ZeroExtend32(Truncate8(value.constant_value))); - case RegSize_16: - return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate16(value.constant_value)) : - ZeroExtend32(Truncate16(value.constant_value))); - - case RegSize_32: - return value; - - default: - break; - } - } - break; - - default: - break; - } - - UnreachableCode(); - } - - Value new_value = m_register_cache.AllocateScratch(size); - if (size < value.size) - { - EmitCopyValue(new_value.host_reg, value); - } - else - { - if (sign_extend) - EmitSignExtend(new_value.host_reg, size, value.host_reg, value.size); - else - EmitZeroExtend(new_value.host_reg, size, value.host_reg, value.size); - } - - return new_value; -} - -void CodeGenerator::ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend) -{ - DebugAssert(value->size != size); - - // We don't want to mess up the register cache value, so generate a new value if it's not scratch. - if (value->IsConstant() || !value->IsScratch()) - { - *value = ConvertValueSize(*value, size, sign_extend); - return; - } - - DebugAssert(value->IsInHostRegister() && value->IsScratch()); - - // If the size is smaller and the value is in a register, we can just "view" the lower part. - if (size < value->size) - { - value->size = size; - } - else - { - if (sign_extend) - EmitSignExtend(value->host_reg, size, value->host_reg, value->size); - else - EmitZeroExtend(value->host_reg, size, value->host_reg, value->size); - } - - value->size = size; -} - -void* CodeGenerator::GetCurrentCodePointer() const -{ - if (m_emit == &m_near_emitter) - return GetCurrentNearCodePointer(); - else if (m_emit == &m_far_emitter) - return GetCurrentFarCodePointer(); - - Panic("unknown emitter"); -} - -Value CodeGenerator::AddValues(const Value& lhs, const Value& rhs, bool set_flags) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant() && !set_flags) - { - // compile-time - u64 new_cv = lhs.constant_value + rhs.constant_value; - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - Value res = m_register_cache.AllocateScratch(lhs.size); - if (lhs.HasConstantValue(0) && !set_flags) - { - EmitCopyValue(res.host_reg, rhs); - return res; - } - else if (rhs.HasConstantValue(0) && !set_flags) - { - EmitCopyValue(res.host_reg, lhs); - return res; - } - else - { - if (lhs.IsInHostRegister()) - { - EmitAdd(res.host_reg, lhs.host_reg, rhs, set_flags); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitAdd(res.host_reg, res.host_reg, rhs, set_flags); - } - return res; - } -} - -Value CodeGenerator::SubValues(const Value& lhs, const Value& rhs, bool set_flags) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant() && !set_flags) - { - // compile-time - u64 new_cv = lhs.constant_value - rhs.constant_value; - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - Value res = m_register_cache.AllocateScratch(lhs.size); - if (rhs.HasConstantValue(0) && !set_flags) - { - EmitCopyValue(res.host_reg, lhs); - return res; - } - else - { - if (lhs.IsInHostRegister()) - { - EmitSub(res.host_reg, lhs.host_reg, rhs, set_flags); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitSub(res.host_reg, res.host_reg, rhs, set_flags); - } - - return res; - } -} - -std::pair CodeGenerator::MulValues(const Value& lhs, const Value& rhs, bool signed_multiply) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - switch (lhs.size) - { - case RegSize_8: - { - u16 res; - if (signed_multiply) - res = u16(s16(s8(lhs.constant_value)) * s16(s8(rhs.constant_value))); - else - res = u16(u8(lhs.constant_value)) * u16(u8(rhs.constant_value)); - - return std::make_pair(Value::FromConstantU8(Truncate8(res >> 8)), Value::FromConstantU8(Truncate8(res))); - } - - case RegSize_16: - { - u32 res; - if (signed_multiply) - res = u32(s32(s16(lhs.constant_value)) * s32(s16(rhs.constant_value))); - else - res = u32(u16(lhs.constant_value)) * u32(u16(rhs.constant_value)); - - return std::make_pair(Value::FromConstantU16(Truncate16(res >> 16)), Value::FromConstantU16(Truncate16(res))); - } - - case RegSize_32: - { - u64 res; - if (signed_multiply) - res = u64(s64(s32(lhs.constant_value)) * s64(s32(rhs.constant_value))); - else - res = u64(u32(lhs.constant_value)) * u64(u32(rhs.constant_value)); - - return std::make_pair(Value::FromConstantU32(Truncate32(res >> 32)), Value::FromConstantU32(Truncate32(res))); - } - break; - - case RegSize_64: - { - u64 res; - if (signed_multiply) - res = u64(s64(lhs.constant_value) * s64(rhs.constant_value)); - else - res = lhs.constant_value * rhs.constant_value; - - // TODO: 128-bit multiply... - Panic("128-bit multiply"); - return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(res)); - } - - default: - return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(0)); - } - } - - // We need two registers for both components. - Value hi = m_register_cache.AllocateScratch(lhs.size); - Value lo = m_register_cache.AllocateScratch(lhs.size); - EmitMul(hi.host_reg, lo.host_reg, lhs, rhs, signed_multiply); - return std::make_pair(std::move(hi), std::move(lo)); -} - -Value CodeGenerator::ShlValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - u64 new_cv = lhs.constant_value << (rhs.constant_value & 0x1F); - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - Value res = m_register_cache.AllocateScratch(lhs.size); - if (rhs.HasConstantValue(0)) - { - EmitCopyValue(res.host_reg, lhs); - } - else - { - if (lhs.IsInHostRegister()) - { - EmitShl(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitShl(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked); - } - } - return res; -} - -Value CodeGenerator::ShrValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - u64 new_cv = lhs.constant_value >> (rhs.constant_value & 0x1F); - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - Value res = m_register_cache.AllocateScratch(lhs.size); - if (rhs.HasConstantValue(0)) - { - EmitCopyValue(res.host_reg, lhs); - } - else - { - if (lhs.IsInHostRegister()) - { - EmitShr(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitShr(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked); - } - } - return res; -} - -Value CodeGenerator::SarValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8( - static_cast(static_cast(Truncate8(lhs.constant_value)) >> (rhs.constant_value & 0x1F))); - - case RegSize_16: - return Value::FromConstantU16( - static_cast(static_cast(Truncate16(lhs.constant_value)) >> (rhs.constant_value & 0x1F))); - - case RegSize_32: - return Value::FromConstantU32( - static_cast(static_cast(Truncate32(lhs.constant_value)) >> (rhs.constant_value & 0x1F))); - - case RegSize_64: - return Value::FromConstantU64( - static_cast(static_cast(lhs.constant_value) >> (rhs.constant_value & 0x3F))); - - default: - return Value(); - } - } - - Value res = m_register_cache.AllocateScratch(lhs.size); - if (rhs.HasConstantValue(0)) - { - EmitCopyValue(res.host_reg, lhs); - } - else - { - if (lhs.IsInHostRegister()) - { - EmitSar(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitSar(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked); - } - } - return res; -} - -Value CodeGenerator::OrValues(const Value& lhs, const Value& rhs) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - u64 new_cv = lhs.constant_value | rhs.constant_value; - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - Value res = m_register_cache.AllocateScratch(lhs.size); - if (lhs.HasConstantValue(0)) - { - EmitCopyValue(res.host_reg, rhs); - return res; - } - else if (rhs.HasConstantValue(0)) - { - EmitCopyValue(res.host_reg, lhs); - return res; - } - - if (lhs.IsInHostRegister()) - { - EmitOr(res.host_reg, lhs.host_reg, rhs); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitOr(res.host_reg, res.host_reg, rhs); - } - return res; -} - -void CodeGenerator::OrValueInPlace(Value& lhs, const Value& rhs) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - u64 new_cv = lhs.constant_value | rhs.constant_value; - switch (lhs.size) - { - case RegSize_8: - lhs = Value::FromConstantU8(Truncate8(new_cv)); - break; - - case RegSize_16: - lhs = Value::FromConstantU16(Truncate16(new_cv)); - break; - - case RegSize_32: - lhs = Value::FromConstantU32(Truncate32(new_cv)); - break; - - case RegSize_64: - lhs = Value::FromConstantU64(new_cv); - break; - - default: - lhs = Value(); - break; - } - } - - // unlikely - if (rhs.HasConstantValue(0)) - return; - - if (lhs.IsInHostRegister()) - { - EmitOr(lhs.host_reg, lhs.host_reg, rhs); - } - else - { - Value new_lhs = m_register_cache.AllocateScratch(lhs.size); - EmitCopyValue(new_lhs.host_reg, lhs); - EmitOr(new_lhs.host_reg, new_lhs.host_reg, rhs); - lhs = std::move(new_lhs); - } -} - -Value CodeGenerator::AndValues(const Value& lhs, const Value& rhs) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - u64 new_cv = lhs.constant_value & rhs.constant_value; - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - // TODO: and with -1 -> noop - Value res = m_register_cache.AllocateScratch(lhs.size); - if (lhs.HasConstantValue(0) || rhs.HasConstantValue(0)) - { - EmitXor(res.host_reg, res.host_reg, res); - return res; - } - - if (lhs.IsInHostRegister()) - { - EmitAnd(res.host_reg, lhs.host_reg, rhs); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitAnd(res.host_reg, res.host_reg, rhs); - } - return res; -} - -void CodeGenerator::AndValueInPlace(Value& lhs, const Value& rhs) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - u64 new_cv = lhs.constant_value & rhs.constant_value; - switch (lhs.size) - { - case RegSize_8: - lhs = Value::FromConstantU8(Truncate8(new_cv)); - break; - - case RegSize_16: - lhs = Value::FromConstantU16(Truncate16(new_cv)); - break; - - case RegSize_32: - lhs = Value::FromConstantU32(Truncate32(new_cv)); - break; - - case RegSize_64: - lhs = Value::FromConstantU64(new_cv); - break; - - default: - lhs = Value(); - break; - } - } - - // TODO: and with -1 -> noop - if (lhs.HasConstantValue(0) || rhs.HasConstantValue(0)) - { - EmitXor(lhs.host_reg, lhs.host_reg, lhs); - return; - } - - if (lhs.IsInHostRegister()) - { - EmitAnd(lhs.host_reg, lhs.host_reg, rhs); - } - else - { - Value new_lhs = m_register_cache.AllocateScratch(lhs.size); - EmitCopyValue(new_lhs.host_reg, lhs); - EmitAnd(new_lhs.host_reg, new_lhs.host_reg, rhs); - lhs = std::move(new_lhs); - } -} - -Value CodeGenerator::XorValues(const Value& lhs, const Value& rhs) -{ - DebugAssert(lhs.size == rhs.size); - if (lhs.IsConstant() && rhs.IsConstant()) - { - // compile-time - u64 new_cv = lhs.constant_value ^ rhs.constant_value; - switch (lhs.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - Value res = m_register_cache.AllocateScratch(lhs.size); - EmitCopyValue(res.host_reg, lhs); - if (lhs.HasConstantValue(0)) - { - EmitCopyValue(res.host_reg, rhs); - return res; - } - else if (rhs.HasConstantValue(0)) - { - EmitCopyValue(res.host_reg, lhs); - return res; - } - - if (lhs.IsInHostRegister()) - { - EmitXor(res.host_reg, lhs.host_reg, rhs); - } - else - { - EmitCopyValue(res.host_reg, lhs); - EmitXor(res.host_reg, res.host_reg, rhs); - } - - return res; -} - -Value CodeGenerator::NotValue(const Value& val) -{ - if (val.IsConstant()) - { - u64 new_cv = ~val.constant_value; - switch (val.size) - { - case RegSize_8: - return Value::FromConstantU8(Truncate8(new_cv)); - - case RegSize_16: - return Value::FromConstantU16(Truncate16(new_cv)); - - case RegSize_32: - return Value::FromConstantU32(Truncate32(new_cv)); - - case RegSize_64: - return Value::FromConstantU64(new_cv); - - default: - return Value(); - } - } - - // TODO: Don't allocate scratch if the lhs is a scratch? - Value res = m_register_cache.AllocateScratch(RegSize_32); - EmitCopyValue(res.host_reg, val); - EmitNot(res.host_reg, val.size); - return res; -} - -const TickCount* CodeGenerator::GetFetchMemoryAccessTimePtr() const -{ - const TickCount* ptr = - Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word); - AssertMsg(ptr, "Address has dynamic fetch ticks"); - return ptr; -} - -void CodeGenerator::GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, - Exception excode, Condition condition /* = Condition::Always */) -{ - const Value CAUSE_bits = Value::FromConstantU32( - Cop0Registers::CAUSE::MakeValueForException(excode, info.is_branch_delay_slot, false, instruction.cop.cop_n)); - - if (condition == Condition::Always) - { - // no need to use far code if we're always raising the exception - m_register_cache.FlushAllGuestRegisters(true, true); - m_register_cache.FlushLoadDelay(true); - - if (excode == Exception::BP) - { - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseBreakException), CAUSE_bits, - GetCurrentInstructionPC(), Value::FromConstantU32(instruction.bits)); - } - else - { - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), CAUSE_bits, - GetCurrentInstructionPC()); - } - - return; - } - - LabelType skip_exception; - EmitConditionalBranch(condition, true, &skip_exception); - - m_register_cache.PushState(); - - EmitBranch(GetCurrentFarCodePointer()); - - SwitchToFarCode(); - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), CAUSE_bits, - GetCurrentInstructionPC()); - EmitExceptionExit(); - SwitchToNearCode(); - - m_register_cache.PopState(); - - EmitBindLabel(&skip_exception); -} - -void CodeGenerator::BlockPrologue() -{ -#if 0 - EmitFunctionCall(nullptr, &CodeCache::LogCurrentState); -#endif - - InitSpeculativeRegs(); - - if (m_block->protection == CodeCache::PageProtectionMode::ManualCheck) - { - DEBUG_LOG("Generate manual protection for PC {:08X}", m_block->pc); - const u8* ram_ptr = Bus::g_ram + VirtualAddressToPhysical(m_block->pc); - const u8* shadow_ptr = reinterpret_cast(m_block->Instructions()); - EmitBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction)); - } - - EmitStoreCPUStructField(OFFSETOF(State, exception_raised), Value::FromConstantU8(0)); - - if (g_settings.bios_tty_logging) - { - const u32 masked_pc = (m_pc & PHYSICAL_MEMORY_ADDRESS_MASK); - if (masked_pc == 0xa0) - EmitFunctionCall(nullptr, &CPU::HandleA0Syscall); - else if (masked_pc == 0xb0) - EmitFunctionCall(nullptr, &CPU::HandleB0Syscall); - } - - EmitICacheCheckAndUpdate(); - - // we don't know the state of the last block, so assume load delays might be in progress - // TODO: Pull load delay into register cache - m_current_instruction_in_branch_delay_slot_dirty = g_settings.cpu_recompiler_memory_exceptions; - m_branch_was_taken_dirty = g_settings.cpu_recompiler_memory_exceptions; - m_current_instruction_was_branch_taken_dirty = false; - m_load_delay_dirty = true; - m_gte_busy_cycles_dirty = true; -} - -void CodeGenerator::BlockEpilogue() -{ -#if defined(_DEBUG) && defined(CPU_ARCH_X64) - m_emit->nop(); -#endif - - m_register_cache.FlushAllGuestRegisters(true, true); - if (m_register_cache.HasLoadDelay()) - m_register_cache.WriteLoadDelayToCPU(true); - - AddPendingCycles(true); -} - -void CodeGenerator::InstructionPrologue(Instruction instruction, const CodeCache::InstructionInfo& info, - TickCount cycles, bool force_sync /* = false */) -{ -#if defined(_DEBUG) && defined(CPU_ARCH_X64) - m_emit->nop(); -#endif - - // move instruction offsets forward - if (m_pc_valid) - m_pc += 4; - - // reset dirty flags - if (m_branch_was_taken_dirty) - { - Value temp = m_register_cache.AllocateScratch(RegSize_8); - EmitLoadCPUStructField(temp.host_reg, RegSize_8, OFFSETOF(State, branch_was_taken)); - EmitStoreCPUStructField(OFFSETOF(State, current_instruction_was_branch_taken), temp); - EmitStoreCPUStructField(OFFSETOF(State, branch_was_taken), Value::FromConstantU8(0)); - m_current_instruction_was_branch_taken_dirty = true; - m_branch_was_taken_dirty = false; - } - else if (m_current_instruction_was_branch_taken_dirty) - { - EmitStoreCPUStructField(OFFSETOF(State, current_instruction_was_branch_taken), Value::FromConstantU8(0)); - m_current_instruction_was_branch_taken_dirty = false; - } - - if (m_current_instruction_in_branch_delay_slot_dirty && !info.is_branch_delay_slot) - { - EmitStoreCPUStructField(OFFSETOF(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(0)); - m_current_instruction_in_branch_delay_slot_dirty = false; - } - - if (!force_sync) - { - // Defer updates for non-faulting instructions. - m_delayed_cycles_add += cycles; - return; - } - - if (info.is_branch_delay_slot && g_settings.cpu_recompiler_memory_exceptions) - { - // m_current_instruction_in_branch_delay_slot = true - EmitStoreCPUStructField(OFFSETOF(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(1)); - m_current_instruction_in_branch_delay_slot_dirty = true; - } - - m_delayed_cycles_add += cycles; - AddPendingCycles(true); -} - -void CodeGenerator::InstructionEpilogue(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - m_register_cache.UpdateLoadDelay(); - - if (m_load_delay_dirty) - { - // we have to invalidate the register cache, since the load delayed register might've been cached - DEBUG_LOG("Emitting delay slot flush"); - EmitFlushInterpreterLoadDelay(); - m_register_cache.InvalidateAllNonDirtyGuestRegisters(); - m_load_delay_dirty = false; - } - - // copy if the previous instruction was a load, reset the current value on the next instruction - if (m_next_load_delay_dirty) - { - DEBUG_LOG("Emitting delay slot flush (with move next)"); - EmitMoveNextInterpreterLoadDelay(); - m_next_load_delay_dirty = false; - m_load_delay_dirty = true; - } -} - -void CodeGenerator::TruncateBlockAtCurrentInstruction() -{ - DEV_LOG("Truncating block {:08X} at {:08X}", m_block->pc, m_current_instruction.info->pc); - m_block_end.instruction = m_current_instruction.instruction + 1; - m_block_end.info = m_current_instruction.info + 1; - WriteNewPC(CalculatePC(), true); -} - -void CodeGenerator::AddPendingCycles(bool commit) -{ - if (m_delayed_cycles_add == 0 && m_gte_done_cycle <= m_delayed_cycles_add) - return; - - if (m_gte_done_cycle > m_delayed_cycles_add) - { - Value temp = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks)); - if (m_delayed_cycles_add > 0) - { - EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_delayed_cycles_add), false); - EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), temp); - EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), - Value::FromConstantU32(m_gte_done_cycle - m_delayed_cycles_add), false); - EmitStoreCPUStructField(OFFSETOF(State, gte_completion_tick), temp); - } - else - { - EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_gte_done_cycle), false); - EmitStoreCPUStructField(OFFSETOF(State, gte_completion_tick), temp); - } - } - else - { - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(m_delayed_cycles_add)); - } - - if (commit) - { - m_gte_done_cycle = std::max(m_gte_done_cycle - m_delayed_cycles_add, 0); - m_delayed_cycles_add = 0; - } -} - -void CodeGenerator::AddGTETicks(TickCount ticks) -{ - m_gte_done_cycle = m_delayed_cycles_add + ticks; - DEBUG_LOG("Adding {} GTE ticks", ticks); -} - -void CodeGenerator::StallUntilGTEComplete() -{ - if (!m_gte_busy_cycles_dirty) - { - // simple case - in block scheduling - if (m_gte_done_cycle > m_delayed_cycles_add) - { - DEBUG_LOG("Stalling for {} ticks from GTE", m_gte_done_cycle - m_delayed_cycles_add); - m_delayed_cycles_add += (m_gte_done_cycle - m_delayed_cycles_add); - } - - return; - } - - // switch to in block scheduling - EmitStallUntilGTEComplete(); - m_gte_done_cycle = 0; - m_gte_busy_cycles_dirty = false; -} - -Value CodeGenerator::CalculatePC(u32 offset /* = 0 */) -{ - if (!m_pc_valid) - Panic("Attempt to get an indeterminate PC"); - - return Value::FromConstantU32(m_pc + offset); -} - -Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */) -{ - return Value::FromConstantU32(m_current_instruction.info->pc); -} - -void CodeGenerator::WriteNewPC(const Value& value, bool commit) -{ - // TODO: This _could_ be moved into the register cache, but would it gain anything? - EmitStoreCPUStructField(OFFSETOF(CPU::State, pc), value); - if (commit) - { - m_pc_valid = value.IsConstant(); - if (m_pc_valid) - m_pc = static_cast(value.constant_value); - } -} - -bool CodeGenerator::Compile_Fallback(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", info.pc, instruction.bits); - - InstructionPrologue(instruction, info, 1, true); - - // flush and invalidate all guest registers, since the fallback could change any of them - m_register_cache.FlushAllGuestRegisters(true, true); - if (m_register_cache.HasLoadDelay()) - { - m_load_delay_dirty = true; - m_register_cache.WriteLoadDelayToCPU(true); - } - - EmitStoreCPUStructField(OFFSETOF(State, current_instruction_pc), Value::FromConstantU32(info.pc)); - EmitStoreCPUStructField(OFFSETOF(State, current_instruction.bits), Value::FromConstantU32(instruction.bits)); - - // TODO: Use carry flag or something here too - Value return_value = m_register_cache.AllocateScratch(RegSize_8); - EmitFunctionCall(&return_value, - g_settings.gpu_pgxp_enable ? &Thunks::InterpretInstructionPGXP : &Thunks::InterpretInstruction); - EmitExceptionExitOnBool(return_value); - - m_current_instruction_in_branch_delay_slot_dirty = info.is_branch_instruction; - m_branch_was_taken_dirty = info.is_branch_instruction; - m_next_load_delay_dirty = info.has_load_delay; - InvalidateSpeculativeValues(); - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Bitwise(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - Value lhs; - Value rhs; - Reg dest; - - SpeculativeValue spec_lhs, spec_rhs; - SpeculativeValue spec_value; - - if (instruction.op != InstructionOp::funct) - { - // rt <- rs op zext(imm) - lhs = m_register_cache.ReadGuestRegister(instruction.i.rs); - rhs = Value::FromConstantU32(instruction.i.imm_zext32()); - dest = instruction.i.rt; - - spec_lhs = SpeculativeReadReg(instruction.i.rs); - spec_rhs = instruction.i.imm_zext32(); - } - else - { - lhs = m_register_cache.ReadGuestRegister(instruction.r.rs); - rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); - dest = instruction.r.rd; - - spec_lhs = SpeculativeReadReg(instruction.r.rs); - spec_rhs = SpeculativeReadReg(instruction.r.rt); - } - - Value result; - switch (instruction.op) - { - case InstructionOp::ori: - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_ORI, Value::FromConstantU32(instruction.bits), lhs); - - result = OrValues(lhs, rhs); - if (spec_lhs && spec_rhs) - spec_value = *spec_lhs | *spec_rhs; - - if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && - instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0)) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); - } - } - break; - - case InstructionOp::andi: - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_ANDI, Value::FromConstantU32(instruction.bits), lhs); - - result = AndValues(lhs, rhs); - if (spec_lhs && spec_rhs) - spec_value = *spec_lhs & *spec_rhs; - } - break; - - case InstructionOp::xori: - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_XORI, Value::FromConstantU32(instruction.bits), lhs); - - result = XorValues(lhs, rhs); - if (spec_lhs && spec_rhs) - spec_value = *spec_lhs ^ *spec_rhs; - - if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && - instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0)) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); - } - } - break; - - case InstructionOp::funct: - { - switch (instruction.r.funct) - { - case InstructionFunct::or_: - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_OR_, Value::FromConstantU32(instruction.bits), lhs, rhs); - - result = OrValues(lhs, rhs); - if (spec_lhs && spec_rhs) - spec_value = *spec_lhs | *spec_rhs; - - if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && - ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || - (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) - { - const auto rs = lhs.HasConstantValue(0) ? static_cast(instruction.r.rt) : - static_cast(instruction.r.rs); - - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, Value::FromConstantU32(PGXP::PackMoveArgs(dest, rs)), - lhs.HasConstantValue(0) ? rhs : lhs); - } - } - break; - - case InstructionFunct::and_: - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_AND_, Value::FromConstantU32(instruction.bits), lhs, rhs); - - result = AndValues(lhs, rhs); - if (spec_lhs && spec_rhs) - spec_value = *spec_lhs & *spec_rhs; - } - break; - - case InstructionFunct::xor_: - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_XOR_, Value::FromConstantU32(instruction.bits), lhs, rhs); - - result = XorValues(lhs, rhs); - if (spec_lhs && spec_rhs) - spec_value = *spec_lhs ^ *spec_rhs; - - if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && - ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || - (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) - { - const auto rs = lhs.HasConstantValue(0) ? static_cast(instruction.r.rt) : - static_cast(instruction.r.rs); - - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, Value::FromConstantU32(PGXP::PackMoveArgs(dest, rs)), - lhs.HasConstantValue(0) ? rhs : lhs); - } - } - break; - - case InstructionFunct::nor: - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_NOR, Value::FromConstantU32(instruction.bits), lhs, rhs); - - result = NotValue(OrValues(lhs, rhs)); - if (spec_lhs && spec_rhs) - spec_value = ~(*spec_lhs | *spec_rhs); - } - break; - - default: - UnreachableCode(); - break; - } - } - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.WriteGuestRegister(dest, std::move(result)); - SpeculativeWriteReg(dest, spec_value); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Shift(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - const InstructionFunct funct = instruction.r.funct; - Value rt = m_register_cache.ReadGuestRegister(instruction.r.rt); - SpeculativeValue rt_spec = SpeculativeReadReg(instruction.r.rt); - Value shamt; - SpeculativeValue shamt_spec; - if (funct == InstructionFunct::sll || funct == InstructionFunct::srl || funct == InstructionFunct::sra) - { - // rd <- rt op shamt - shamt = Value::FromConstantU32(instruction.r.shamt); - shamt_spec = instruction.r.shamt; - } - else - { - // rd <- rt op (rs & 0x1F) - shamt = m_register_cache.ReadGuestRegister(instruction.r.rs); - shamt_spec = SpeculativeReadReg(instruction.r.rs); - } - - Value result; - SpeculativeValue result_spec; - switch (instruction.r.funct) - { - case InstructionFunct::sll: - case InstructionFunct::sllv: - { - if (g_settings.UsingPGXPCPUMode()) - { - if (instruction.r.funct == InstructionFunct::sll) - EmitFunctionCall(nullptr, &PGXP::CPU_SLL, Value::FromConstantU32(instruction.bits), rt); - else // if (instruction.r.funct == InstructionFunct::sllv) - EmitFunctionCall(nullptr, &PGXP::CPU_SLLV, Value::FromConstantU32(instruction.bits), rt, shamt); - } - - result = ShlValues(rt, shamt, false); - if (rt_spec && shamt_spec) - result_spec = *rt_spec << *shamt_spec; - } - break; - - case InstructionFunct::srl: - case InstructionFunct::srlv: - { - if (g_settings.UsingPGXPCPUMode()) - { - if (instruction.r.funct == InstructionFunct::srl) - EmitFunctionCall(nullptr, &PGXP::CPU_SRL, Value::FromConstantU32(instruction.bits), rt); - else // if (instruction.r.funct == InstructionFunct::srlv) - EmitFunctionCall(nullptr, &PGXP::CPU_SRLV, Value::FromConstantU32(instruction.bits), rt, shamt); - } - - result = ShrValues(rt, shamt, false); - if (rt_spec && shamt_spec) - result_spec = *rt_spec >> *shamt_spec; - } - break; - - case InstructionFunct::sra: - case InstructionFunct::srav: - { - if (g_settings.UsingPGXPCPUMode()) - { - if (instruction.r.funct == InstructionFunct::sra) - EmitFunctionCall(nullptr, &PGXP::CPU_SRA, Value::FromConstantU32(instruction.bits), rt); - else // if (instruction.r.funct == InstructionFunct::srav) - EmitFunctionCall(nullptr, &PGXP::CPU_SRAV, Value::FromConstantU32(instruction.bits), rt, shamt); - } - - result = SarValues(rt, shamt, false); - if (rt_spec && shamt_spec) - result_spec = static_cast(static_cast(*rt_spec) << *shamt_spec); - } - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(result)); - SpeculativeWriteReg(instruction.r.rd, result_spec); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Load(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - // rt <- mem[rs + sext(imm)] - Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); - Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); - Value address = AddValues(base, offset, false); - - SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); - SpeculativeValue value_spec; - if (address_spec) - address_spec = *address_spec + instruction.i.imm_sext32(); - - Value result; - switch (instruction.op) - { - case InstructionOp::lb: - case InstructionOp::lbu: - { - result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_8); - ConvertValueSizeInPlace(&result, RegSize_32, (instruction.op == InstructionOp::lb)); - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(instruction.bits), address, result); - - if (address_spec) - { - value_spec = SpeculativeReadMemory(*address_spec & ~3u); - if (value_spec) - value_spec = (*value_spec >> ((*address_spec & 3u) * 8u)) & 0xFFu; - } - } - break; - - case InstructionOp::lh: - case InstructionOp::lhu: - { - result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_16); - ConvertValueSizeInPlace(&result, RegSize_32, (instruction.op == InstructionOp::lh)); - - if (g_settings.gpu_pgxp_enable) - { - EmitFunctionCall(nullptr, (instruction.op == InstructionOp::lhu) ? &PGXP::CPU_LHU : PGXP::CPU_LH, - Value::FromConstantU32(instruction.bits), address, result); - } - - if (address_spec) - { - value_spec = SpeculativeReadMemory(*address_spec & ~3u); - if (value_spec) - value_spec = (*value_spec >> ((*address_spec & 3u) * 8u)) & 0xFFFFu; - } - } - break; - - case InstructionOp::lw: - { - result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(instruction.bits), address, result); - - if (address_spec) - value_spec = SpeculativeReadMemory(*address_spec); - } - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.WriteGuestRegisterDelayed(instruction.i.rt, std::move(result)); - SpeculativeWriteReg(instruction.i.rt, value_spec); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Store(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - // mem[rs + sext(imm)] <- rt - Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); - Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); - Value address = AddValues(base, offset, false); - Value value = m_register_cache.ReadGuestRegister(instruction.i.rt); - - SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); - SpeculativeValue value_spec = SpeculativeReadReg(instruction.i.rt); - if (address_spec) - address_spec = *address_spec + instruction.i.imm_sext32(); - - switch (instruction.op) - { - case InstructionOp::sb: - { - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(instruction.bits), address, value); - - EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_8, value); - - if (address_spec) - { - const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u); - const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr); - if (aligned_existing_value) - { - if (value_spec) - { - const u32 shift = (aligned_addr & 3u) * 8u; - SpeculativeWriteMemory(aligned_addr, - (*aligned_existing_value & ~(0xFFu << shift)) | ((*value_spec & 0xFFu) << shift)); - } - else - { - SpeculativeWriteMemory(aligned_addr, std::nullopt); - } - } - } - } - break; - - case InstructionOp::sh: - { - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(instruction.bits), address, value); - - EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_16, value); - - if (address_spec) - { - const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u); - const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr); - if (aligned_existing_value) - { - if (value_spec) - { - const u32 shift = (aligned_addr & 1u) * 16u; - SpeculativeWriteMemory(aligned_addr, (*aligned_existing_value & ~(0xFFFFu << shift)) | - ((*value_spec & 0xFFFFu) << shift)); - } - else - { - SpeculativeWriteMemory(aligned_addr, std::nullopt); - } - } - } - } - break; - - case InstructionOp::sw: - { - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(instruction.bits), address, value); - - EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_32, value); - - if (address_spec) - SpeculativeWriteMemory(*address_spec, value_spec); - } - break; - - default: - UnreachableCode(); - break; - } - - InstructionEpilogue(instruction, info); - - if (address_spec) - { - const CPU::Segment seg = GetSegmentForAddress(*address_spec); - if (seg == Segment::KUSEG || seg == Segment::KSEG0 || seg == Segment::KSEG1) - { - const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(*address_spec); - const PhysicalMemoryAddress block_start = VirtualAddressToPhysical(m_block->pc); - const PhysicalMemoryAddress block_end = - VirtualAddressToPhysical(m_block->pc + (m_block->size * sizeof(Instruction))); - if (phys_addr >= block_start && phys_addr < block_end) - { - WARNING_LOG("Instruction {:08X} speculatively writes to {:08X} inside block {:08X}-{:08X}. Truncating block.", - info.pc, phys_addr, block_start, block_end); - TruncateBlockAtCurrentInstruction(); - } - } - } - - return true; -} - -bool CodeGenerator::Compile_LoadLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); - Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); - Value address = AddValues(base, offset, false); - base.ReleaseAndClear(); - - SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); - if (address_spec) - address_spec = *address_spec + instruction.i.imm_sext32(); - - Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 - address = AndValues(address, Value::FromConstantU32(~u32(3))); - - // hack to bypass load delays - Value value; - if (instruction.i.rt == m_register_cache.GetLoadDelayRegister()) - { - const Value& ld_value = m_register_cache.GetLoadDelayValue(); - if (ld_value.IsInHostRegister()) - value.SetHostReg(&m_register_cache, ld_value.GetHostRegister(), ld_value.size); - else - value = ld_value; - } - else - { - // if this is the first instruction in the block, we need to stall until the load finishes - // we don't actually care if it's our target reg or not, if it's not, it won't affect anything - if (m_load_delay_dirty) - { - DEV_LOG("Flushing interpreter load delay for lwl/lwr instruction at 0x{:08X}", info.pc); - EmitFlushInterpreterLoadDelay(); - m_register_cache.InvalidateGuestRegister(instruction.r.rt); - m_load_delay_dirty = false; - } - - value = m_register_cache.ReadGuestRegister(instruction.i.rt, true, true); - } - - Value mem; - if (instruction.op == InstructionOp::lwl) - { - Value lhs = ShrValues(Value::FromConstantU32(0x00FFFFFF), shift); - AndValueInPlace(lhs, value); - shift = SubValues(Value::FromConstantU32(24), shift, false); - value.ReleaseAndClear(); - - mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); - EmitShl(mem.GetHostRegister(), mem.GetHostRegister(), RegSize_32, shift); - EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); - } - else - { - Value lhs = ShlValues(Value::FromConstantU32(0xFFFFFF00), SubValues(Value::FromConstantU32(24), shift, false)); - AndValueInPlace(lhs, value); - value.ReleaseAndClear(); - - mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); - EmitShr(mem.GetHostRegister(), mem.GetHostRegister(), RegSize_32, shift); - EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); - } - - shift.ReleaseAndClear(); - - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(instruction.bits), address, mem); - - m_register_cache.WriteGuestRegisterDelayed(instruction.i.rt, std::move(mem)); - - // TODO: Speculative values - SpeculativeWriteReg(instruction.r.rt, std::nullopt); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_StoreLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); - Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); - Value address = AddValues(base, offset, false); - base.ReleaseAndClear(); - - // TODO: Speculative values - SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); - if (address_spec) - { - address_spec = *address_spec + instruction.i.imm_sext32(); - SpeculativeWriteMemory(*address_spec & ~3u, std::nullopt); - } - - Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 - address = AndValues(address, Value::FromConstantU32(~u32(3))); - - Value mem; - if (instruction.op == InstructionOp::swl) - { - Value mask = ShlValues(Value::FromConstantU32(0xFFFFFF00), shift); - mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); - EmitAnd(mem.GetHostRegister(), mem.GetHostRegister(), mask); - mask.ReleaseAndClear(); - - Value reg = m_register_cache.ReadGuestRegister(instruction.r.rt); - Value lhs = ShrValues(reg, SubValues(Value::FromConstantU32(24), shift, false)); - reg.ReleaseAndClear(); - - EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); - } - else - { - Value mask = ShrValues(Value::FromConstantU32(0x00FFFFFF), SubValues(Value::FromConstantU32(24), shift, false)); - mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); - AndValueInPlace(mem, mask); - mask.ReleaseAndClear(); - - Value reg = m_register_cache.ReadGuestRegister(instruction.r.rt); - Value lhs = ShlValues(reg, shift); - reg.ReleaseAndClear(); - - EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); - } - - shift.ReleaseAndClear(); - - EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_32, mem); - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(instruction.bits), address, mem); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_MoveHiLo(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - switch (instruction.r.funct) - { - case InstructionFunct::mfhi: - { - Value hi = m_register_cache.ReadGuestRegister(Reg::hi); - if (g_settings.UsingPGXPCPUMode()) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::hi)), hi); - } - - m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(hi)); - SpeculativeWriteReg(instruction.r.rd, std::nullopt); - } - break; - - case InstructionFunct::mthi: - { - Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); - if (g_settings.UsingPGXPCPUMode()) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(Reg::hi, instruction.r.rs)), rs); - } - - m_register_cache.WriteGuestRegister(Reg::hi, std::move(rs)); - } - break; - - case InstructionFunct::mflo: - { - Value lo = m_register_cache.ReadGuestRegister(Reg::lo); - if (g_settings.UsingPGXPCPUMode()) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::lo)), lo); - } - - m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(lo)); - SpeculativeWriteReg(instruction.r.rd, std::nullopt); - } - break; - - case InstructionFunct::mtlo: - { - Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); - if (g_settings.UsingPGXPCPUMode()) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(Reg::lo, instruction.r.rs)), rs); - } - - m_register_cache.WriteGuestRegister(Reg::lo, std::move(rs)); - } - break; - - default: - UnreachableCode(); - break; - } - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Add(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - const bool check_overflow = (instruction.op == InstructionOp::addi || (instruction.op == InstructionOp::funct && - instruction.r.funct == InstructionFunct::add)); - - Value lhs, rhs; - SpeculativeValue lhs_spec, rhs_spec; - Reg dest; - - switch (instruction.op) - { - case InstructionOp::addi: - case InstructionOp::addiu: - { - // rt <- rs + sext(imm) - dest = instruction.i.rt; - lhs = m_register_cache.ReadGuestRegister(instruction.i.rs); - rhs = Value::FromConstantU32(instruction.i.imm_sext32()); - - lhs_spec = SpeculativeReadReg(instruction.i.rs); - rhs_spec = instruction.i.imm_sext32(); - } - break; - - case InstructionOp::funct: - { - Assert(instruction.r.funct == InstructionFunct::add || instruction.r.funct == InstructionFunct::addu); - dest = instruction.r.rd; - lhs = m_register_cache.ReadGuestRegister(instruction.r.rs); - rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); - lhs_spec = SpeculativeReadReg(instruction.r.rs); - rhs_spec = SpeculativeReadReg(instruction.r.rt); - } - break; - - default: - UnreachableCode(); - return false; - } - - // detect register moves and handle them for pgxp - if (dest != Reg::zero && g_settings.gpu_pgxp_enable) - { - bool handled = false; - if (instruction.op != InstructionOp::funct) - { - if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && instruction.i.rs != Reg::zero && - dest != instruction.i.rs && rhs.HasConstantValue(0)) - { - handled = true; - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); - } - } - else - { - if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && - ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || - (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) - { - handled = true; - EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, - Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); - } - } - - if (g_settings.gpu_pgxp_cpu && !handled) - { - if (instruction.op != InstructionOp::funct) - EmitFunctionCall(nullptr, &PGXP::CPU_ADDI, Value::FromConstantU32(instruction.bits), lhs); - else - EmitFunctionCall(nullptr, &PGXP::CPU_ADD, Value::FromConstantU32(instruction.bits), lhs, rhs); - } - } - - Value result = AddValues(lhs, rhs, check_overflow); - if (check_overflow) - GenerateExceptionExit(instruction, info, Exception::Ov, Condition::Overflow); - - m_register_cache.WriteGuestRegister(dest, std::move(result)); - - SpeculativeValue value_spec; - if (lhs_spec && rhs_spec) - value_spec = *lhs_spec + *rhs_spec; - SpeculativeWriteReg(dest, value_spec); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Subtract(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - Assert(instruction.op == InstructionOp::funct); - const bool check_overflow = (instruction.r.funct == InstructionFunct::sub); - - Value lhs = m_register_cache.ReadGuestRegister(instruction.r.rs); - Value rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); - - SpeculativeValue lhs_spec = SpeculativeReadReg(instruction.r.rs); - SpeculativeValue rhs_spec = SpeculativeReadReg(instruction.r.rt); - - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_SUB, Value::FromConstantU32(instruction.bits), lhs, rhs); - - Value result = SubValues(lhs, rhs, check_overflow); - if (check_overflow) - GenerateExceptionExit(instruction, info, Exception::Ov, Condition::Overflow); - - m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(result)); - - SpeculativeValue value_spec; - if (lhs_spec && rhs_spec) - value_spec = *lhs_spec - *rhs_spec; - SpeculativeWriteReg(instruction.r.rd, value_spec); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Multiply(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - const bool signed_multiply = (instruction.r.funct == InstructionFunct::mult); - Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); - Value rt = m_register_cache.ReadGuestRegister(instruction.r.rt); - if (g_settings.UsingPGXPCPUMode()) - { - EmitFunctionCall(nullptr, signed_multiply ? &PGXP::CPU_MULT : &PGXP::CPU_MULTU, - Value::FromConstantU32(instruction.bits), rs, rt); - } - - std::pair result = MulValues(rs, rt, signed_multiply); - rs.ReleaseAndClear(); - rt.ReleaseAndClear(); - m_register_cache.WriteGuestRegister(Reg::hi, std::move(result.first)); - m_register_cache.WriteGuestRegister(Reg::lo, std::move(result.second)); - - InstructionEpilogue(instruction, info); - return true; -} - -static std::tuple MIPSDivide(u32 num, u32 denom) -{ - u32 lo, hi; - - if (denom == 0) - { - // divide by zero - lo = UINT32_C(0xFFFFFFFF); - hi = static_cast(num); - } - else - { - lo = num / denom; - hi = num % denom; - } - - return std::tie(lo, hi); -} - -static std::tuple MIPSDivide(s32 num, s32 denom) -{ - s32 lo, hi; - if (denom == 0) - { - // divide by zero - lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); - hi = static_cast(num); - } - else if (static_cast(num) == UINT32_C(0x80000000) && denom == -1) - { - // unrepresentable - lo = UINT32_C(0x80000000); - hi = 0; - } - else - { - lo = num / denom; - hi = num % denom; - } - - return std::tie(lo, hi); -} - -bool CodeGenerator::Compile_Divide(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - Value num = m_register_cache.ReadGuestRegister(instruction.r.rs); - Value denom = m_register_cache.ReadGuestRegister(instruction.r.rt); - - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_DIV, Value::FromConstantU32(instruction.bits), num, denom); - - if (num.IsConstant() && denom.IsConstant()) - { - const auto [lo, hi] = MIPSDivide(static_cast(num.constant_value), static_cast(denom.constant_value)); - m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(lo)); - m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(hi)); - } - else - { - Value num_reg = GetValueInHostRegister(num, false); - Value denom_reg = GetValueInHostRegister(denom, false); - - m_register_cache.InvalidateGuestRegister(Reg::lo); - m_register_cache.InvalidateGuestRegister(Reg::hi); - - Value lo = m_register_cache.AllocateScratch(RegSize_32); - Value hi = m_register_cache.AllocateScratch(RegSize_32); - m_register_cache.InhibitAllocation(); - - LabelType do_divide, done; - - if (!denom.IsConstant() || denom.HasConstantValue(0)) - { - // if (denom == 0) - EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0), - &do_divide); - { - // unrepresentable - EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFF)); - EmitCopyValue(hi.GetHostRegister(), num_reg); - EmitBranch(&done); - } - } - - // else - { - EmitBindLabel(&do_divide); - EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(), - RegSize_32, false); - } - - EmitBindLabel(&done); - - m_register_cache.UninhibitAllocation(); - m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo)); - m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi)); - } - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_SignedDivide(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - Value num = m_register_cache.ReadGuestRegister(instruction.r.rs); - Value denom = m_register_cache.ReadGuestRegister(instruction.r.rt); - - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_DIV, Value::FromConstantU32(instruction.bits), num, denom); - - if (num.IsConstant() && denom.IsConstant()) - { - const auto [lo, hi] = MIPSDivide(num.GetS32ConstantValue(), denom.GetS32ConstantValue()); - m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(static_cast(lo))); - m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(static_cast(hi))); - } - else - { - Value num_reg = GetValueInHostRegister(num, false); - Value denom_reg = GetValueInHostRegister(denom, false); - - m_register_cache.InvalidateGuestRegister(Reg::lo); - m_register_cache.InvalidateGuestRegister(Reg::hi); - - Value lo = m_register_cache.AllocateScratch(RegSize_32); - Value hi = m_register_cache.AllocateScratch(RegSize_32); - m_register_cache.InhibitAllocation(); - - // we need this in a register on ARM because it won't fit in an immediate - EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); - - LabelType do_divide, done; - - LabelType not_zero; - if (!denom.IsConstant() || denom.HasConstantValue(0)) - { - // if (denom == 0) - EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0), - ¬_zero); - { - // hi = static_cast(num); - EmitCopyValue(hi.GetHostRegister(), num_reg); - - // lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); - LabelType greater_equal_zero; - EmitConditionalBranch(Condition::GreaterEqual, false, num_reg.GetHostRegister(), Value::FromConstantU32(0), - &greater_equal_zero); - EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(1)); - EmitBranch(&done); - EmitBindLabel(&greater_equal_zero); - EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFFu)); - EmitBranch(&done); - } - } - - // else if (static_cast(num) == UINT32_C(0x80000000) && denom == -1) - { - EmitBindLabel(¬_zero); - EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantS32(-1), - &do_divide); - EmitConditionalBranch(Condition::NotEqual, false, num_reg.GetHostRegister(), lo, &do_divide); - - // unrepresentable - // EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); // done above - EmitCopyValue(hi.GetHostRegister(), Value::FromConstantU32(0)); - EmitBranch(&done); - } - - // else - { - EmitBindLabel(&do_divide); - EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(), - RegSize_32, true); - } - - EmitBindLabel(&done); - - m_register_cache.UninhibitAllocation(); - m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo)); - m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi)); - } - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_SetLess(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - const bool signed_comparison = - (instruction.op == InstructionOp::slti || - (instruction.op == InstructionOp::funct && instruction.r.funct == InstructionFunct::slt)); - - Reg dest; - Value lhs, rhs; - SpeculativeValue lhs_spec, rhs_spec; - if (instruction.op == InstructionOp::slti || instruction.op == InstructionOp::sltiu) - { - // rt <- rs < {z,s}ext(imm) - dest = instruction.i.rt; - lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true); - rhs = Value::FromConstantU32(instruction.i.imm_sext32()); - lhs_spec = SpeculativeReadReg(instruction.i.rs); - rhs_spec = instruction.i.imm_sext32(); - - // flush the old value which might free up a register - if (dest != instruction.r.rs) - m_register_cache.InvalidateGuestRegister(dest); - } - else - { - // rd <- rs < rt - dest = instruction.r.rd; - lhs = m_register_cache.ReadGuestRegister(instruction.r.rs, true, true); - rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); - lhs_spec = SpeculativeReadReg(instruction.r.rs); - rhs_spec = SpeculativeReadReg(instruction.r.rt); - - // flush the old value which might free up a register - if (dest != instruction.i.rs && dest != instruction.r.rt) - m_register_cache.InvalidateGuestRegister(dest); - } - - if (g_settings.UsingPGXPCPUMode()) - { - if (instruction.op == InstructionOp::slti) - EmitFunctionCall(nullptr, &PGXP::CPU_SLTI, Value::FromConstantU32(instruction.bits), lhs); - else if (instruction.op == InstructionOp::sltiu) - EmitFunctionCall(nullptr, &PGXP::CPU_SLTIU, Value::FromConstantU32(instruction.bits), lhs); - else if (instruction.r.funct == InstructionFunct::slt) - EmitFunctionCall(nullptr, &PGXP::CPU_SLT, Value::FromConstantU32(instruction.bits), lhs, rhs); - else // if (instruction.r.funct == InstructionFunct::sltu) - EmitFunctionCall(nullptr, &PGXP::CPU_SLTU, Value::FromConstantU32(instruction.bits), lhs, rhs); - } - - Value result = m_register_cache.AllocateScratch(RegSize_32); - EmitCmp(lhs.host_reg, rhs); - EmitSetConditionResult(result.host_reg, result.size, signed_comparison ? Condition::Less : Condition::Below); - - m_register_cache.WriteGuestRegister(dest, std::move(result)); - - SpeculativeValue value_spec; - if (lhs_spec && rhs_spec) - { - value_spec = BoolToUInt32(signed_comparison ? (static_cast(*lhs_spec) < static_cast(*rhs_spec)) : - (*lhs_spec < *rhs_spec)); - } - SpeculativeWriteReg(instruction.r.rd, value_spec); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_Branch(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - auto DoBranch = [this, &instruction, &info](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg, - Value&& branch_target) { - const bool can_link_block = info.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking; - - // ensure the lr register is flushed, since we want it's correct value after the branch - // we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg. - if (lr_reg != Reg::count && lr_reg != Reg::zero) - m_register_cache.FlushGuestRegister(lr_reg, false, true); - - // compute return address, which is also set as the new pc when the branch isn't taken - Value constant_next_pc = CalculatePC(4); - Value next_pc = constant_next_pc; - DebugAssert(constant_next_pc.IsConstant()); - if (condition != Condition::Always) - { - next_pc = m_register_cache.AllocateScratch(RegSize_32); - EmitCopyValue(next_pc.GetHostRegister(), constant_next_pc); - } - - Value take_branch; - LabelType branch_taken, branch_not_taken; - if (condition != Condition::Always) - { - if (!can_link_block) - { - // condition is inverted because we want the case for skipping it - if (lhs.IsValid() && rhs.IsValid()) - EmitConditionalBranch(condition, true, lhs.host_reg, rhs, &branch_not_taken); - else if (lhs.IsValid()) - EmitConditionalBranch(condition, true, lhs.host_reg, lhs.size, &branch_not_taken); - else - EmitConditionalBranch(condition, true, &branch_not_taken); - } - else - { - take_branch = m_register_cache.AllocateScratch(RegSize_32); - switch (condition) - { - case Condition::NotEqual: - case Condition::Equal: - case Condition::Overflow: - case Condition::Greater: - case Condition::GreaterEqual: - case Condition::LessEqual: - case Condition::Less: - case Condition::Above: - case Condition::AboveEqual: - case Condition::Below: - case Condition::BelowEqual: - { - EmitCmp(lhs.GetHostRegister(), rhs); - EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition); - } - break; - - case Condition::Negative: - case Condition::PositiveOrZero: - case Condition::NotZero: - case Condition::Zero: - { - Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0)); - EmitTest(lhs.GetHostRegister(), lhs); - EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition); - } - break; - - default: - UnreachableCode(); - break; - } - } - } - - // save the old PC if we want to - if (lr_reg != Reg::count && lr_reg != Reg::zero) - { - // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards, - // if we don't cancel it, at the end of the instruction the value we write can be overridden. - EmitCancelInterpreterLoadDelayForReg(lr_reg); - EmitStoreGuestRegister(lr_reg, next_pc); - - // now invalidate lr because it was possibly written in the branch - m_register_cache.InvalidateGuestRegister(lr_reg); - if (m_register_cache.GetLoadDelayRegister() == lr_reg) - m_register_cache.CancelLoadDelay(); - } - - // we don't need to test the address of constant branches unless they're definitely misaligned, which would be - // strange. - if (g_settings.cpu_recompiler_memory_exceptions && - (!branch_target.IsConstant() || (branch_target.constant_value & 0x3) != 0)) - { - LabelType branch_okay; - - if (branch_target.IsConstant()) - { - WARNING_LOG("Misaligned constant target branch 0x{:08X}, this is strange", - Truncate32(branch_target.constant_value)); - } - else - { - // check the alignment of the target - EmitTest(branch_target.host_reg, Value::FromConstantU32(0x3)); - EmitConditionalBranch(Condition::Zero, false, &branch_okay); - } - - // exception exit for misaligned target - m_register_cache.PushState(); - EmitBranch(GetCurrentFarCodePointer()); - EmitBindLabel(&branch_okay); - - SwitchToFarCode(); - EmitStoreCPUStructField(OFFSETOF(State, cop0_regs.BadVaddr), branch_target); - EmitFunctionCall( - nullptr, static_cast(&CPU::RaiseException), - Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(Exception::AdEL, false, false, 0)), - branch_target); - EmitExceptionExit(); - SwitchToNearCode(); - - m_register_cache.PopState(); - } - - if (can_link_block) - { - // if it's an in-block branch, compile the delay slot now - // TODO: Make this more optimal by moving the condition down if it's a nop - Assert((m_current_instruction.instruction + 1) != m_block_end.instruction); - InstructionEpilogue(instruction, info); - m_current_instruction.instruction++; - m_current_instruction.info++; - if (!CompileInstruction(*m_current_instruction.instruction, *m_current_instruction.info)) - return false; - - // flush all regs since we're at the end of the block now - BlockEpilogue(); - m_block_linked = true; - - // check downcount - Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32); - Value downcount = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks)); - EmitLoadCPUStructField(downcount.GetHostRegister(), RegSize_32, OFFSETOF(State, downcount)); - - // pending < downcount - LabelType return_to_dispatcher; - - if (condition != Condition::Always) - { - EmitBranchIfBitClear(take_branch.GetHostRegister(), take_branch.size, 0, &branch_not_taken); - m_register_cache.PushState(); - { - WriteNewPC(branch_target, false); - EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount, - &return_to_dispatcher); - - // we're committed at this point :D - EmitEndBlock(true, nullptr); - - DebugAssert(branch_target.IsConstant()); - if (static_cast(branch_target.constant_value) == m_block->pc) - { - // self-link - EmitBranch(GetStartNearCodePointer()); - } - else - { - const void* host_target = CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(), - static_cast(branch_target.constant_value)); - EmitBranch(host_target); - } - } - m_register_cache.PopState(); - - SwitchToNearCode(); - EmitBindLabel(&branch_not_taken); - } - - m_register_cache.PushState(); - - if (condition != Condition::Always) - { - WriteNewPC(next_pc, true); - } - else - { - WriteNewPC(branch_target, true); - } - - EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount, - &return_to_dispatcher); - - EmitEndBlock(true, nullptr); - - const Value& jump_target = (condition != Condition::Always) ? constant_next_pc : branch_target; - DebugAssert(jump_target.IsConstant()); - if (static_cast(jump_target.constant_value) == m_block->pc) - { - // self-link - EmitBranch(GetStartNearCodePointer()); - } - else - { - const void* host_target = CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(), - static_cast(jump_target.constant_value)); - EmitBranch(host_target); - } - - m_register_cache.PopState(); - - EmitBindLabel(&return_to_dispatcher); - EmitEndBlock(true, CodeCache::g_run_events_and_dispatch); - } - else - { - if (condition != Condition::Always) - { - // branch taken path - modify the next pc - EmitBindLabel(&branch_taken); - EmitCopyValue(next_pc.GetHostRegister(), branch_target); - - // converge point - EmitBindLabel(&branch_not_taken); - WriteNewPC(next_pc, true); - } - else - { - // next_pc is not used for unconditional branches - WriteNewPC(branch_target, true); - } - - InstructionEpilogue(instruction, info); - } - - return true; - }; - - // Compute the branch target. - // This depends on the form of the instruction. - switch (instruction.op) - { - case InstructionOp::j: - case InstructionOp::jal: - { - // npc = (pc & 0xF0000000) | (target << 2) - Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)), - Value::FromConstantU32(instruction.j.target << 2)); - - return DoBranch(Condition::Always, Value(), Value(), - (instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target)); - } - - case InstructionOp::funct: - { - if (instruction.r.funct == InstructionFunct::jr || instruction.r.funct == InstructionFunct::jalr) - { - // npc = rs, link to rt - Value branch_target = m_register_cache.ReadGuestRegister(instruction.r.rs); - return DoBranch(Condition::Always, Value(), Value(), - (instruction.r.funct == InstructionFunct::jalr) ? instruction.r.rd : Reg::count, - std::move(branch_target)); - } - else if (instruction.r.funct == InstructionFunct::syscall || instruction.r.funct == InstructionFunct::break_) - { - const Exception excode = - (instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP; - GenerateExceptionExit(instruction, info, excode); - InstructionEpilogue(instruction, info); - return true; - } - else - { - UnreachableCode(); - } - } - - case InstructionOp::beq: - case InstructionOp::bne: - { - // npc = pc + (sext(imm) << 2) - Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2); - - // beq zero, zero, addr -> unconditional branch - if (instruction.op == InstructionOp::beq && instruction.i.rs == Reg::zero && instruction.i.rt == Reg::zero) - { - return DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target)); - } - else - { - // branch <- rs op rt - Value lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true); - Value rhs = m_register_cache.ReadGuestRegister(instruction.i.rt); - const Condition condition = (instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual; - return DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target)); - } - } - - case InstructionOp::bgtz: - case InstructionOp::blez: - { - // npc = pc + (sext(imm) << 2) - Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2); - - // branch <- rs op 0 - Value lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true); - - const Condition condition = (instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual; - return DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target)); - } - - case InstructionOp::b: - { - // npc = pc + (sext(imm) << 2) - Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2); - - const u8 rt = static_cast(instruction.i.rt.GetValue()); - const bool bgez = ConvertToBoolUnchecked(rt & u8(1)); - const Condition condition = (bgez && instruction.r.rs == Reg::zero) ? - Condition::Always : - (bgez ? Condition::PositiveOrZero : Condition::Negative); - const bool link = (rt & u8(0x1E)) == u8(0x10); - - // Read has to happen before the link as the compare can use ra. - Value lhs; - if (condition != Condition::Always) - lhs = m_register_cache.ReadGuestRegisterToScratch(instruction.i.rs); - - // The return address is always written if link is set, regardless of whether the branch is taken. - if (link) - { - EmitCancelInterpreterLoadDelayForReg(Reg::ra); - m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4)); - } - - return DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target)); - } - - default: - UnreachableCode(); - } -} - -bool CodeGenerator::Compile_lui(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - InstructionPrologue(instruction, info, 1); - - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_LUI, Value::FromConstantU32(instruction.bits)); - - // rt <- (imm << 16) - const u32 value = instruction.i.imm_zext32() << 16; - m_register_cache.WriteGuestRegister(instruction.i.rt, Value::FromConstantU32(value)); - SpeculativeWriteReg(instruction.i.rt, value); - - InstructionEpilogue(instruction, info); - return true; -} - -bool CodeGenerator::Compile_cop0(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - if (instruction.cop.IsCommonInstruction()) - { - switch (instruction.cop.CommonOp()) - { - case CopCommonInstruction::mfcn: - case CopCommonInstruction::mtcn: - { - u32 offset; - u32 write_mask = UINT32_C(0xFFFFFFFF); - - const Cop0Reg reg = static_cast(instruction.r.rd.GetValue()); - switch (reg) - { - case Cop0Reg::BPC: - offset = OFFSETOF(State, cop0_regs.BPC); - break; - - case Cop0Reg::BPCM: - offset = OFFSETOF(State, cop0_regs.BPCM); - break; - - case Cop0Reg::BDA: - offset = OFFSETOF(State, cop0_regs.BDA); - break; - - case Cop0Reg::BDAM: - offset = OFFSETOF(State, cop0_regs.BDAM); - break; - - case Cop0Reg::DCIC: - offset = OFFSETOF(State, cop0_regs.dcic.bits); - write_mask = Cop0Registers::DCIC::WRITE_MASK; - break; - - case Cop0Reg::JUMPDEST: - offset = OFFSETOF(State, cop0_regs.TAR); - write_mask = 0; - break; - - case Cop0Reg::BadVaddr: - offset = OFFSETOF(State, cop0_regs.BadVaddr); - write_mask = 0; - break; - - case Cop0Reg::SR: - offset = OFFSETOF(State, cop0_regs.sr.bits); - write_mask = Cop0Registers::SR::WRITE_MASK; - break; - - case Cop0Reg::CAUSE: - offset = OFFSETOF(State, cop0_regs.cause.bits); - write_mask = Cop0Registers::CAUSE::WRITE_MASK; - break; - - case Cop0Reg::EPC: - offset = OFFSETOF(State, cop0_regs.EPC); - write_mask = 0; - break; - - case Cop0Reg::PRID: - offset = OFFSETOF(State, cop0_regs.PRID); - write_mask = 0; - break; - - default: - return Compile_Fallback(instruction, info); - } - - InstructionPrologue(instruction, info, 1); - - if (instruction.cop.CommonOp() == CopCommonInstruction::mfcn) - { - // coprocessor loads are load-delayed - Value value = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(value.host_reg, value.size, offset); - - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_MFC0, Value::FromConstantU32(instruction.bits), value); - - m_register_cache.WriteGuestRegisterDelayed(instruction.r.rt, std::move(value)); - - if (reg == Cop0Reg::SR) - SpeculativeWriteReg(instruction.r.rt, m_speculative_constants.cop0_sr); - else - SpeculativeWriteReg(instruction.r.rt, std::nullopt); - } - else - { - // some registers are not writable, so ignore those - if (write_mask != 0) - { - Value value = m_register_cache.ReadGuestRegister(instruction.r.rt); - if (write_mask != UINT32_C(0xFFFFFFFF)) - { - // need to adjust the mask - Value masked_value = AndValues(value, Value::FromConstantU32(write_mask)); - { - Value old_value = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(old_value.GetHostRegister(), RegSize_32, offset); - EmitAnd(old_value.GetHostRegister(), old_value.GetHostRegister(), Value::FromConstantU32(~write_mask)); - OrValueInPlace(masked_value, old_value); - } - - if (g_settings.UsingPGXPCPUMode()) - { - EmitFunctionCall(nullptr, &PGXP::CPU_MTC0, Value::FromConstantU32(instruction.bits), masked_value, - value); - } - value = std::move(masked_value); - } - else - { - if (g_settings.UsingPGXPCPUMode()) - EmitFunctionCall(nullptr, &PGXP::CPU_MTC0, Value::FromConstantU32(instruction.bits), value, value); - } - - if (reg == Cop0Reg::SR) - m_speculative_constants.cop0_sr = SpeculativeReadReg(instruction.r.rt); - - // changing SR[Isc] needs to update fastmem views - if (reg == Cop0Reg::SR) - { - LabelType skip_mem_update; - Value old_value = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(old_value.host_reg, RegSize_32, offset); - EmitStoreCPUStructField(offset, value); - EmitXor(old_value.host_reg, old_value.host_reg, value); - EmitBranchIfBitClear(old_value.host_reg, RegSize_32, 16, &skip_mem_update); - m_register_cache.InhibitAllocation(); - EmitFunctionCall(nullptr, &UpdateMemoryPointers, m_register_cache.GetCPUPtr()); - EmitUpdateFastmemBase(); - EmitBindLabel(&skip_mem_update); - m_register_cache.UninhibitAllocation(); - } - else - { - EmitStoreCPUStructField(offset, value); - } - } - } - - if (instruction.cop.CommonOp() == CopCommonInstruction::mtcn) - { - if (reg == Cop0Reg::CAUSE || reg == Cop0Reg::SR) - { - // Emit an interrupt check on load of CAUSE/SR. - Value sr_value = m_register_cache.AllocateScratch(RegSize_32); - Value cause_value = m_register_cache.AllocateScratch(RegSize_32); - m_register_cache.InhibitAllocation(); - - // m_cop0_regs.sr.IEc && ((m_cop0_regs.cause.Ip & m_cop0_regs.sr.Im) != 0) - LabelType no_interrupt; - EmitLoadCPUStructField(sr_value.host_reg, sr_value.size, OFFSETOF(State, cop0_regs.sr.bits)); - EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, OFFSETOF(State, cop0_regs.cause.bits)); - EmitBranchIfBitClear(sr_value.host_reg, sr_value.size, 0, &no_interrupt); - EmitAnd(sr_value.host_reg, sr_value.host_reg, cause_value); - EmitTest(sr_value.host_reg, Value::FromConstantU32(0xFF00)); - EmitConditionalBranch(Condition::Zero, false, &no_interrupt); - m_register_cache.UninhibitAllocation(); - - EmitBranch(GetCurrentFarCodePointer()); - SwitchToFarCode(); - m_register_cache.PushState(); - if (!info.is_last_instruction) - WriteNewPC(CalculatePC(), false); - EmitStoreCPUStructField(OFFSETOF(State, downcount), Value::FromConstantU32(0)); - EmitExceptionExit(); - m_register_cache.PopState(); - SwitchToNearCode(); - - EmitBindLabel(&no_interrupt); - } - else if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) - { - Value dcic_value = m_register_cache.AllocateScratch(RegSize_32); - m_register_cache.InhibitAllocation(); - - // if ((dcic & master_enable_bits) != master_enable_bits) goto not_enabled; - LabelType not_enabled; - EmitLoadCPUStructField(dcic_value.GetHostRegister(), dcic_value.size, OFFSETOF(State, cop0_regs.dcic.bits)); - EmitAnd(dcic_value.GetHostRegister(), dcic_value.GetHostRegister(), - Value::FromConstantU32(Cop0Registers::DCIC::MASTER_ENABLE_BITS)); - EmitConditionalBranch(Condition::NotEqual, false, dcic_value.host_reg, - Value::FromConstantU32(Cop0Registers::DCIC::MASTER_ENABLE_BITS), ¬_enabled); - - // if ((dcic & breakpoint_bits) == 0) goto not_enabled; - EmitLoadCPUStructField(dcic_value.GetHostRegister(), dcic_value.size, OFFSETOF(State, cop0_regs.dcic.bits)); - EmitTest(dcic_value.GetHostRegister(), - Value::FromConstantU32(Cop0Registers::DCIC::ANY_BREAKPOINTS_ENABLED_BITS)); - EmitConditionalBranch(Condition::Zero, false, ¬_enabled); - - // update dispatcher flag, if enabled, exit block - EmitFunctionCall(nullptr, &UpdateDebugDispatcherFlag); - EmitLoadCPUStructField(dcic_value.GetHostRegister(), RegSize_8, OFFSETOF(State, using_debug_dispatcher)); - EmitBranchIfBitClear(dcic_value.GetHostRegister(), RegSize_8, 0, ¬_enabled); - - m_register_cache.UninhibitAllocation(); - - // exit block early if enabled - EmitBranch(GetCurrentFarCodePointer()); - SwitchToFarCode(); - m_register_cache.PushState(); - WriteNewPC(CalculatePC(), false); - EmitExceptionExit(); - m_register_cache.PopState(); - SwitchToNearCode(); - - EmitBindLabel(¬_enabled); - } - } - - InstructionEpilogue(instruction, info); - return true; - } - - // only mfc/mtc for cop0 - default: - return Compile_Fallback(instruction, info); - } - } - else - { - switch (instruction.cop.Cop0Op()) - { - case Cop0Instruction::rfe: - { - InstructionPrologue(instruction, info, 1); - - // shift mode bits right two, preserving upper bits - static constexpr u32 mode_bits_mask = UINT32_C(0b1111); - Value sr = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(sr.host_reg, RegSize_32, OFFSETOF(State, cop0_regs.sr.bits)); - { - Value new_mode_bits = m_register_cache.AllocateScratch(RegSize_32); - EmitShr(new_mode_bits.host_reg, sr.host_reg, new_mode_bits.size, Value::FromConstantU32(2)); - EmitAnd(new_mode_bits.host_reg, new_mode_bits.host_reg, Value::FromConstantU32(mode_bits_mask)); - EmitAnd(sr.host_reg, sr.host_reg, Value::FromConstantU32(~mode_bits_mask)); - EmitOr(sr.host_reg, sr.host_reg, new_mode_bits); - } - - EmitStoreCPUStructField(OFFSETOF(State, cop0_regs.sr.bits), sr); - - Value cause_value = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, OFFSETOF(State, cop0_regs.cause.bits)); - - LabelType no_interrupt; - EmitAnd(sr.host_reg, sr.host_reg, cause_value); - EmitTest(sr.host_reg, Value::FromConstantU32(0xFF00)); - EmitConditionalBranch(Condition::Zero, false, &no_interrupt); - m_register_cache.InhibitAllocation(); - EmitStoreCPUStructField(OFFSETOF(State, downcount), Value::FromConstantU32(0)); - EmitBindLabel(&no_interrupt); - m_register_cache.UninhibitAllocation(); - - InstructionEpilogue(instruction, info); - return true; - } - - default: - return Compile_Fallback(instruction, info); - } - } -} - -Value CodeGenerator::DoGTERegisterRead(u32 index) -{ - Value value = m_register_cache.AllocateScratch(RegSize_32); - - // Most GTE registers can be read directly. Handle the special cases here. - if (index == 15) // SXY3 - { - // mirror of SXY2 - index = 14; - } - - switch (index) - { - case 28: // IRGB - case 29: // ORGB - { - EmitFunctionCall(&value, >E::ReadRegister, Value::FromConstantU32(index)); - } - break; - - default: - { - EmitLoadCPUStructField(value.host_reg, RegSize_32, State::GTERegisterOffset(index)); - } - break; - } - - return value; -} - -void CodeGenerator::DoGTERegisterWrite(u32 index, const Value& value) -{ - switch (index) - { - case 1: // V0[z] - case 3: // V1[z] - case 5: // V2[z] - case 8: // IR0 - case 9: // IR1 - case 10: // IR2 - case 11: // IR3 - case 36: // RT33 - case 44: // L33 - case 52: // LR33 - case 58: // H - sign-extended on read but zext on use - case 59: // DQA - case 61: // ZSF3 - case 62: // ZSF4 - { - // sign-extend z component of vector registers - Value temp = ConvertValueSize(value.ViewAsSize(RegSize_16), RegSize_32, true); - EmitStoreCPUStructField(State::GTERegisterOffset(index), temp); - return; - } - break; - - case 7: // OTZ - case 16: // SZ0 - case 17: // SZ1 - case 18: // SZ2 - case 19: // SZ3 - { - // zero-extend unsigned values - Value temp = ConvertValueSize(value.ViewAsSize(RegSize_16), RegSize_32, false); - EmitStoreCPUStructField(State::GTERegisterOffset(index), temp); - return; - } - break; - - case 15: // SXY3 - { - // writing to SXYP pushes to the FIFO - Value temp = m_register_cache.AllocateScratch(RegSize_32); - - // SXY0 <- SXY1 - EmitLoadCPUStructField(temp.host_reg, RegSize_32, State::GTERegisterOffset(13)); - EmitStoreCPUStructField(State::GTERegisterOffset(12), temp); - - // SXY1 <- SXY2 - EmitLoadCPUStructField(temp.host_reg, RegSize_32, State::GTERegisterOffset(14)); - EmitStoreCPUStructField(State::GTERegisterOffset(13), temp); - - // SXY2 <- SXYP - EmitStoreCPUStructField(State::GTERegisterOffset(14), value); - return; - } - break; - - case 28: // IRGB - case 30: // LZCS - case 63: // FLAG - { - EmitFunctionCall(nullptr, >E::WriteRegister, Value::FromConstantU32(index), value); - return; - } - - case 29: // ORGB - case 31: // LZCR - { - // read-only registers - return; - } - - default: - { - // written as-is, 2x16 or 1x32 bits - EmitStoreCPUStructField(State::GTERegisterOffset(index), value); - return; - } - } -} - -bool CodeGenerator::Compile_cop2(Instruction instruction, const CodeCache::InstructionInfo& info) -{ - if (instruction.op == InstructionOp::lwc2 || instruction.op == InstructionOp::swc2) - { - StallUntilGTEComplete(); - InstructionPrologue(instruction, info, 1); - - const u32 reg = static_cast(instruction.i.rt.GetValue()); - Value address = AddValues(m_register_cache.ReadGuestRegister(instruction.i.rs), - Value::FromConstantU32(instruction.i.imm_sext32()), false); - SpeculativeValue spec_address = SpeculativeReadReg(instruction.i.rs); - if (spec_address) - spec_address = *spec_address + instruction.i.imm_sext32(); - - if (instruction.op == InstructionOp::lwc2) - { - Value value = EmitLoadGuestMemory(instruction, info, address, spec_address, RegSize_32); - DoGTERegisterWrite(reg, value); - - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(instruction.bits), address, value); - } - else - { - Value value = DoGTERegisterRead(reg); - EmitStoreGuestMemory(instruction, info, address, spec_address, RegSize_32, value); - - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(instruction.bits), address, value); - - if (spec_address) - SpeculativeWriteMemory(*spec_address, std::nullopt); - } - - InstructionEpilogue(instruction, info); - return true; - } - - Assert(instruction.op == InstructionOp::cop2); - - if (instruction.cop.IsCommonInstruction()) - { - switch (instruction.cop.CommonOp()) - { - case CopCommonInstruction::mfcn: - case CopCommonInstruction::cfcn: - { - const u32 reg = static_cast(instruction.r.rd.GetValue()) + - ((instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0); - - StallUntilGTEComplete(); - InstructionPrologue(instruction, info, 1); - - Value value = DoGTERegisterRead(reg); - - // PGXP done first here before ownership is transferred. - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_MFC2, Value::FromConstantU32(instruction.bits), value); - - m_register_cache.WriteGuestRegisterDelayed(instruction.r.rt, std::move(value)); - SpeculativeWriteReg(instruction.r.rt, std::nullopt); - - InstructionEpilogue(instruction, info); - return true; - } - - case CopCommonInstruction::mtcn: - case CopCommonInstruction::ctcn: - { - const u32 reg = static_cast(instruction.r.rd.GetValue()) + - ((instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0); - - StallUntilGTEComplete(); - InstructionPrologue(instruction, info, 1); - - Value value = m_register_cache.ReadGuestRegister(instruction.r.rt); - DoGTERegisterWrite(reg, value); - - if (g_settings.gpu_pgxp_enable) - EmitFunctionCall(nullptr, PGXP::CPU_MTC2, Value::FromConstantU32(instruction.bits), value); - - InstructionEpilogue(instruction, info); - return true; - } - - default: - return Compile_Fallback(instruction, info); - } - } - else - { - TickCount func_ticks; - GTE::InstructionImpl func = GTE::GetInstructionImpl(instruction.bits, &func_ticks); - - // forward everything to the GTE. - StallUntilGTEComplete(); - InstructionPrologue(instruction, info, 1); - - Value instruction_bits = Value::FromConstantU32(instruction.bits & GTE::Instruction::REQUIRED_BITS_MASK); - EmitFunctionCall(nullptr, func, instruction_bits); - AddGTETicks(func_ticks); - - InstructionEpilogue(instruction, info); - return true; - } -} - -void CodeGenerator::InitSpeculativeRegs() -{ - for (u8 i = 0; i < static_cast(Reg::count); i++) - m_speculative_constants.regs[i] = g_state.regs.r[i]; - - m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits; -} - -void CodeGenerator::InvalidateSpeculativeValues() -{ - m_speculative_constants.regs.fill(std::nullopt); - m_speculative_constants.memory.clear(); - m_speculative_constants.cop0_sr.reset(); -} - -CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadReg(Reg reg) -{ - return m_speculative_constants.regs[static_cast(reg)]; -} - -void CodeGenerator::SpeculativeWriteReg(Reg reg, SpeculativeValue value) -{ - m_speculative_constants.regs[static_cast(reg)] = value; -} - -CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadMemory(VirtualMemoryAddress address) -{ - PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; - - auto it = m_speculative_constants.memory.find(address); - if (it != m_speculative_constants.memory.end()) - return it->second; - - u32 value; - if ((phys_addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR) - { - u32 scratchpad_offset = phys_addr & SCRATCHPAD_OFFSET_MASK; - std::memcpy(&value, &CPU::g_state.scratchpad[scratchpad_offset], sizeof(value)); - return value; - } - - if (Bus::IsRAMAddress(phys_addr)) - { - u32 ram_offset = phys_addr & Bus::g_ram_mask; - std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value)); - return value; - } - - return std::nullopt; -} - -void CodeGenerator::SpeculativeWriteMemory(u32 address, SpeculativeValue value) -{ - PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; - - auto it = m_speculative_constants.memory.find(address); - if (it != m_speculative_constants.memory.end()) - { - it->second = value; - return; - } - - if ((phys_addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR || Bus::IsRAMAddress(phys_addr)) - m_speculative_constants.memory.emplace(address, value); -} - -bool CodeGenerator::SpeculativeIsCacheIsolated() -{ - if (!m_speculative_constants.cop0_sr.has_value()) - return false; - - const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()}; - return sr.Isc; -} - -} // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h deleted file mode 100644 index 5c3b931fb..000000000 --- a/src/core/cpu_recompiler_code_generator.h +++ /dev/null @@ -1,314 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#pragma once - -#include "cpu_code_cache_private.h" -#include "cpu_recompiler_register_cache.h" -#include "cpu_recompiler_thunks.h" -#include "cpu_recompiler_types.h" -#include "cpu_types.h" - -#include -#include - -namespace CPU::Recompiler { - -enum class Condition : u8 -{ - Always, - NotEqual, - Equal, - Overflow, - Greater, - GreaterEqual, - LessEqual, - Less, - Negative, - PositiveOrZero, - Above, // unsigned variant of Greater - AboveEqual, // unsigned variant of GreaterEqual - Below, // unsigned variant of Less - BelowEqual, // unsigned variant of LessEqual - - NotZero, - Zero -}; - -class CodeGenerator -{ -public: - using SpeculativeValue = std::optional; - - struct CodeBlockInstruction - { - const Instruction* instruction; - const CodeCache::InstructionInfo* info; - }; - - CodeGenerator(); - ~CodeGenerator(); - - static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize); - - static void BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi); - - const void* CompileBlock(CodeCache::Block* block, u32* out_host_code_size, u32* out_host_far_code_size); - - ////////////////////////////////////////////////////////////////////////// - // Code Generation - ////////////////////////////////////////////////////////////////////////// - void EmitBeginBlock(bool allocate_registers = true); - void EmitEndBlock(bool free_registers, const void* jump_to); - void EmitExceptionExit(); - void EmitExceptionExitOnBool(const Value& value); - const void* FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size); - - void EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size); - void EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size); - void EmitCopyValue(HostReg to_reg, const Value& value); - void EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags); - void EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags); - void EmitCmp(HostReg to_reg, const Value& value); - void EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, bool signed_multiply); - void EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, - bool signed_divide); - void EmitInc(HostReg to_reg, RegSize size); - void EmitDec(HostReg to_reg, RegSize size); - void EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked = true); - void EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked = true); - void EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked = true); - void EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value); - void EmitOr(HostReg to_reg, HostReg from_reg, const Value& value); - void EmitXor(HostReg to_reg, HostReg from_reg, const Value& value); - void EmitTest(HostReg to_reg, const Value& value); - void EmitNot(HostReg to_reg, RegSize size); - void EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition); - - void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg); - void EmitStoreGuestRegister(Reg guest_reg, const Value& value); - void EmitStoreInterpreterLoadDelay(Reg reg, const Value& value); - void EmitFlushInterpreterLoadDelay(); - void EmitMoveNextInterpreterLoadDelay(); - void EmitCancelInterpreterLoadDelayForReg(Reg reg); - void EmitICacheCheckAndUpdate(); - void EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size); - void EmitStallUntilGTEComplete(); - void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset); - void EmitStoreCPUStructField(u32 offset, const Value& value); - void EmitAddCPUStructField(u32 offset, const Value& value); - void EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr); - void EmitStoreGlobal(void* ptr, const Value& value); - void EmitLoadGlobalAddress(HostReg host_reg, const void* ptr); - - // Automatically generates an exception handler. - Value EmitLoadGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address, - const SpeculativeValue& address_spec, RegSize size); - void EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result); - void EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address, - RegSize size, Value& result); - void EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address, - RegSize size, Value& result, bool in_far_code); - void EmitStoreGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, const Value& address, - const SpeculativeValue& address_spec, RegSize size, const Value& value); - void EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value); - void EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value, bool in_far_code); - void EnsureMembaseLoaded(); - void EmitUpdateFastmemBase(); - - // Unconditional branch to pointer. May allocate a scratch register. - void EmitBranch(const void* address, bool allow_scratch = true); - void EmitBranch(LabelType* label); - - // Branching, generates two paths. - void EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, LabelType* label); - void EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, LabelType* label); - void EmitConditionalBranch(Condition condition, bool invert, LabelType* label); - void EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label); - void EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label); - void EmitBindLabel(LabelType* label); - - u32 PrepareStackForCall(); - void RestoreStackAfterCall(u32 adjust_size); - - void EmitCall(const void* ptr); - void EmitFunctionCallPtr(Value* return_value, const void* ptr); - void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1); - void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2); - void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3); - void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3, const Value& arg4); - - template - void EmitFunctionCall(Value* return_value, const FunctionType ptr) - { - EmitFunctionCallPtr(return_value, reinterpret_cast(ptr)); - } - - template - void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1) - { - EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1); - } - - template - void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2) - { - EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1, arg2); - } - - template - void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2, - const Value& arg3) - { - EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1, arg2, arg3); - } - - template - void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2, - const Value& arg3, const Value& arg4) - { - EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1, arg2, arg3, arg4); - } - - // Host register saving. - void EmitPushHostReg(HostReg reg, u32 position); - void EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position); - void EmitPopHostReg(HostReg reg, u32 position); - void EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position); - - // Value ops - Value AddValues(const Value& lhs, const Value& rhs, bool set_flags); - Value SubValues(const Value& lhs, const Value& rhs, bool set_flags); - std::pair MulValues(const Value& lhs, const Value& rhs, bool signed_multiply); - Value ShlValues(const Value& lhs, const Value& rhs, bool assume_amount_masked = true); - Value ShrValues(const Value& lhs, const Value& rhs, bool assume_amount_masked = true); - Value SarValues(const Value& lhs, const Value& rhs, bool assume_amount_masked = true); - Value OrValues(const Value& lhs, const Value& rhs); - void OrValueInPlace(Value& lhs, const Value& rhs); - Value AndValues(const Value& lhs, const Value& rhs); - void AndValueInPlace(Value& lhs, const Value& rhs); - Value XorValues(const Value& lhs, const Value& rhs); - Value NotValue(const Value& val); - - const TickCount* GetFetchMemoryAccessTimePtr() const; - - // Raising exception if condition is true. - void GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, Exception excode, - Condition condition = Condition::Always); - -private: - // Host register setup - void InitHostRegs(); - - Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend); - void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend); - - Value GetValueInHostRegister(const Value& value, bool allow_zero_register = true); - Value GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register = true); - - void SwitchToFarCode(); - void SwitchToNearCode(); - void* GetStartNearCodePointer() const; - void* GetCurrentCodePointer() const; - void* GetCurrentNearCodePointer() const; - void* GetCurrentFarCodePointer() const; - - ////////////////////////////////////////////////////////////////////////// - // Code Generation Helpers - ////////////////////////////////////////////////////////////////////////// - // branch target, memory address, etc - void BlockPrologue(); - void BlockEpilogue(); - void InstructionPrologue(Instruction instruction, const CodeCache::InstructionInfo& info, TickCount cycles, - bool force_sync = false); - void InstructionEpilogue(Instruction instruction, const CodeCache::InstructionInfo& info); - void TruncateBlockAtCurrentInstruction(); - void AddPendingCycles(bool commit); - void AddGTETicks(TickCount ticks); - void StallUntilGTEComplete(); - - Value CalculatePC(u32 offset = 0); - Value GetCurrentInstructionPC(u32 offset = 0); - void WriteNewPC(const Value& value, bool commit); - - Value DoGTERegisterRead(u32 index); - void DoGTERegisterWrite(u32 index, const Value& value); - - ////////////////////////////////////////////////////////////////////////// - // Instruction Code Generators - ////////////////////////////////////////////////////////////////////////// - bool CompileInstruction(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Fallback(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Nop(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Bitwise(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Shift(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Load(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Store(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_LoadLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_StoreLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_MoveHiLo(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Add(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Subtract(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Multiply(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Divide(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_SignedDivide(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_SetLess(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_Branch(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_lui(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_cop0(Instruction instruction, const CodeCache::InstructionInfo& info); - bool Compile_cop2(Instruction instruction, const CodeCache::InstructionInfo& info); - - CodeCache::Block* m_block = nullptr; - CodeBlockInstruction m_block_start = {}; - CodeBlockInstruction m_block_end = {}; - CodeBlockInstruction m_current_instruction = {}; - RegisterCache m_register_cache; - CodeEmitter m_near_emitter; - CodeEmitter m_far_emitter; - CodeEmitter* m_emit; - - TickCount m_delayed_cycles_add = 0; - TickCount m_gte_done_cycle = 0; - - u32 m_pc = 0; - bool m_pc_valid = false; - bool m_block_linked = false; - - // whether various flags need to be reset. - bool m_current_instruction_in_branch_delay_slot_dirty = false; - bool m_branch_was_taken_dirty = false; - bool m_current_instruction_was_branch_taken_dirty = false; - bool m_load_delay_dirty = false; - bool m_next_load_delay_dirty = false; - bool m_gte_busy_cycles_dirty = false; - bool m_membase_loaded = false; - - ////////////////////////////////////////////////////////////////////////// - // Speculative Constants - ////////////////////////////////////////////////////////////////////////// - struct SpeculativeConstants - { - std::array(Reg::count)> regs; - std::unordered_map memory; - SpeculativeValue cop0_sr; - }; - - void InitSpeculativeRegs(); - void InvalidateSpeculativeValues(); - SpeculativeValue SpeculativeReadReg(Reg reg); - void SpeculativeWriteReg(Reg reg, SpeculativeValue value); - SpeculativeValue SpeculativeReadMemory(u32 address); - void SpeculativeWriteMemory(VirtualMemoryAddress address, SpeculativeValue value); - bool SpeculativeIsCacheIsolated(); - - SpeculativeConstants m_speculative_constants; -}; - -} // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator_aarch32.cpp b/src/core/cpu_recompiler_code_generator_aarch32.cpp deleted file mode 100644 index 6d7264256..000000000 --- a/src/core/cpu_recompiler_code_generator_aarch32.cpp +++ /dev/null @@ -1,2329 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "cpu_code_cache_private.h" -#include "cpu_core.h" -#include "cpu_core_private.h" -#include "cpu_recompiler_code_generator.h" -#include "cpu_recompiler_thunks.h" -#include "settings.h" -#include "timing_event.h" - -#include "common/align.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/memmap.h" - -#ifdef CPU_ARCH_ARM32 - -LOG_CHANNEL(Recompiler); - -#ifdef ENABLE_HOST_DISASSEMBLY -#include "vixl/aarch32/disasm-aarch32.h" -#include -#endif - -namespace a32 = vixl::aarch32; - -namespace CPU::Recompiler { -constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers -constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes -constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; - -static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; -static std::unordered_map s_trampoline_targets; -static u8* s_trampoline_start_ptr = nullptr; -static u32 s_trampoline_used = 0; -} // namespace CPU::Recompiler - -bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) -{ - return ((id >= 0 && id <= 3) || // r0-r3 - (id == 12 || id == 14)); // sp, pc -} - -s32 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) -{ - Assert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); - Assert(Common::IsAlignedPow2(reinterpret_cast(target), 4)); - return static_cast((reinterpret_cast(target) - reinterpret_cast(current))); -} - -bool CPU::Recompiler::armIsPCDisplacementInImmediateRange(s32 displacement) -{ - return (displacement >= -33554432 && displacement <= 33554428); -} - -void CPU::Recompiler::armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm) -{ - if (vixl::IsUintN(16, imm)) - { - armAsm->mov(vixl::aarch32::al, rd, imm & 0xffff); - return; - } - - armAsm->mov(vixl::aarch32::al, rd, imm & 0xffff); - armAsm->movt(vixl::aarch32::al, rd, imm >> 16); -} - -void CPU::Recompiler::armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, - const void* addr) -{ - armEmitMov(armAsm, reg, static_cast(reinterpret_cast(addr))); -} - -void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) -{ - const void* cur = armAsm->GetCursorAddress(); - s32 displacement = armGetPCDisplacement(cur, ptr); - bool use_bx = !armIsPCDisplacementInImmediateRange(displacement); - if (use_bx && !force_inline) - { - if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) - { - displacement = armGetPCDisplacement(cur, trampoline); - use_bx = !armIsPCDisplacementInImmediateRange(displacement); - } - } - - if (use_bx) - { - armMoveAddressToReg(armAsm, RSCRATCH, ptr); - armAsm->bx(RSCRATCH); - } - else - { - a32::Label label(displacement + armAsm->GetCursorOffset()); - armAsm->b(&label); - } -} - -void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) -{ - const void* cur = armAsm->GetCursorAddress(); - s32 displacement = armGetPCDisplacement(cur, ptr); - bool use_blx = !armIsPCDisplacementInImmediateRange(displacement); - if (use_blx && !force_inline) - { - if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) - { - displacement = armGetPCDisplacement(cur, trampoline); - use_blx = !armIsPCDisplacementInImmediateRange(displacement); - } - } - - if (use_blx) - { - armMoveAddressToReg(armAsm, RSCRATCH, ptr); - armAsm->blx(RSCRATCH); - } - else - { - a32::Label label(displacement + armAsm->GetCursorOffset()); - armAsm->bl(&label); - } -} - -void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, - const void* ptr) -{ - const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress(), ptr); - if (!armIsPCDisplacementInImmediateRange(displacement)) - { - armMoveAddressToReg(armAsm, RSCRATCH, ptr); - armAsm->blx(cond, RSCRATCH); - } - else - { - a32::Label label(displacement + armAsm->GetCursorOffset()); - armAsm->b(cond, &label); - } -} - -void CPU::Recompiler::armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, - const void* addr) -{ - armMoveAddressToReg(armAsm, reg, addr); - armAsm->ldr(reg, vixl::aarch32::MemOperand(reg)); -} - -void CPU::Recompiler::armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, - const void* addr, const vixl::aarch32::Register& tempreg) -{ - armMoveAddressToReg(armAsm, tempreg, addr); - armAsm->str(reg, vixl::aarch32::MemOperand(tempreg)); -} - -void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) -{ -#ifdef ENABLE_HOST_DISASSEMBLY - a32::PrintDisassembler dis(std::cout, 0); - dis.SetCodeAddress(reinterpret_cast(start)); - dis.DisassembleA32Buffer(static_cast(start), size); -#else - ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); -#endif -} - -u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) -{ - return size / a32::kA32InstructionSizeInBytes; -} - -u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) -{ - using namespace vixl::aarch32; - using namespace CPU::Recompiler; - - const s32 disp = armGetPCDisplacement(code, dst); - DebugAssert(armIsPCDisplacementInImmediateRange(disp)); - - // A32 jumps are silly. - { - vixl::aarch32::Assembler emit(static_cast(code), kA32InstructionSizeInBytes, a32::A32); - a32::Label label(disp); - emit.b(&label); - } - - if (flush_icache) - MemMap::FlushInstructionCache(code, kA32InstructionSizeInBytes); - - return kA32InstructionSizeInBytes; -} - -u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) -{ - auto it = s_trampoline_targets.find(target); - if (it != s_trampoline_targets.end()) - return s_trampoline_start_ptr + it->second; - - // align to 16 bytes? - const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16); - - // 4 movs plus a jump - if (TRAMPOLINE_AREA_SIZE - offset < 20) - { - Panic("Ran out of space in constant pool"); - return nullptr; - } - - u8* start = s_trampoline_start_ptr + offset; - a32::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); - armMoveAddressToReg(&armAsm, RSCRATCH, target); - armAsm.bx(RSCRATCH); - - const u32 size = static_cast(armAsm.GetSizeOfCodeGenerated()); - DebugAssert(size < 20); - s_trampoline_targets.emplace(target, offset); - s_trampoline_used = offset + static_cast(size); - - MemMap::FlushInstructionCache(start, size); - return start; -} - -u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) -{ - using namespace vixl::aarch32; - using namespace CPU::Recompiler; - -#define PTR(x) a32::MemOperand(RSTATE, (s32)(((u8*)(x)) - ((u8*)&g_state))) - - Assembler actual_asm(static_cast(code), code_size); - Assembler* armAsm = &actual_asm; - -#ifdef VIXL_DEBUG - vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); -#endif - - Label dispatch; - - g_enter_recompiler = armAsm->GetCursorAddress(); - { - // reserve some space for saving caller-saved registers - armAsm->sub(sp, sp, FUNCTION_STACK_SIZE); - - // Need the CPU state for basically everything :-) - armMoveAddressToReg(armAsm, RSTATE, &g_state); - } - - // check events then for frame done - g_check_events_and_dispatch = armAsm->GetCursorAddress(); - { - Label skip_event_check; - armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); - armAsm->ldr(RARG2, PTR(&g_state.downcount)); - armAsm->cmp(RARG1, RARG2); - armAsm->b(lt, &skip_event_check); - - g_run_events_and_dispatch = armAsm->GetCursorAddress(); - armEmitCall(armAsm, reinterpret_cast(&TimingEvents::RunEvents), true); - - armAsm->bind(&skip_event_check); - } - - // TODO: align? - g_dispatcher = armAsm->GetCursorAddress(); - { - armAsm->bind(&dispatch); - - // x9 <- s_fast_map[pc >> 16] - armAsm->ldr(RARG1, PTR(&g_state.pc)); - armMoveAddressToReg(armAsm, RARG3, g_code_lut.data()); - armAsm->lsr(RARG2, RARG1, 16); - armAsm->ldr(RARG2, MemOperand(RARG3, RARG2, LSL, 2)); - - // blr(x9[pc * 2]) (fast_map[pc >> 2]) - armAsm->ldr(RARG1, MemOperand(RARG2, RARG1)); - armAsm->blx(RARG1); - } - - g_compile_or_revalidate_block = armAsm->GetCursorAddress(); - { - armAsm->ldr(RARG1, PTR(&g_state.pc)); - armEmitCall(armAsm, reinterpret_cast(&CompileOrRevalidateBlock), true); - armAsm->b(&dispatch); - } - - g_discard_and_recompile_block = armAsm->GetCursorAddress(); - { - armAsm->ldr(RARG1, PTR(&g_state.pc)); - armEmitCall(armAsm, reinterpret_cast(&DiscardAndRecompileBlock), true); - armAsm->b(&dispatch); - } - - g_interpret_block = armAsm->GetCursorAddress(); - { - armEmitCall(armAsm, reinterpret_cast(GetInterpretUncachedBlockFunction()), true); - armAsm->b(&dispatch); - } - - armAsm->FinalizeCode(); - -#if 0 - // TODO: align? - s_trampoline_targets.clear(); - s_trampoline_start_ptr = static_cast(code) + armAsm->GetCursorOffset(); - s_trampoline_used = 0; -#endif - -#undef PTR - return static_cast(armAsm->GetCursorOffset()) /* + TRAMPOLINE_AREA_SIZE*/; -} - -// Macros aren't used with old-rec. -#undef RRET -#undef RARG1 -#undef RARG2 -#undef RARG3 -#undef RSCRATCH -#undef RSTATE - -namespace CPU::Recompiler { - -constexpr HostReg RCPUPTR = 4; -constexpr HostReg RMEMBASEPTR = 3; -constexpr HostReg RRETURN = 0; -constexpr HostReg RARG1 = 0; -constexpr HostReg RARG2 = 1; -constexpr HostReg RARG3 = 2; -constexpr HostReg RARG4 = 3; -constexpr HostReg RSCRATCH = 12; - -static const a32::Register GetHostReg8(HostReg reg) -{ - return a32::Register(reg); -} - -static const a32::Register GetHostReg8(const Value& value) -{ - DebugAssert(value.size == RegSize_8 && value.IsInHostRegister()); - return a32::Register(value.host_reg); -} - -static const a32::Register GetHostReg16(HostReg reg) -{ - return a32::Register(reg); -} - -static const a32::Register GetHostReg16(const Value& value) -{ - DebugAssert(value.size == RegSize_16 && value.IsInHostRegister()); - return a32::Register(value.host_reg); -} - -static const a32::Register GetHostReg32(HostReg reg) -{ - return a32::Register(reg); -} - -static const a32::Register GetHostReg32(const Value& value) -{ - DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); - return a32::Register(value.host_reg); -} - -static const a32::Register GetCPUPtrReg() -{ - return GetHostReg32(RCPUPTR); -} - -static const a32::Register GetFastmemBasePtrReg() -{ - return GetHostReg32(RMEMBASEPTR); -} - -CodeGenerator::CodeGenerator() - : m_register_cache(*this), m_near_emitter(static_cast(CPU::CodeCache::GetFreeCodePointer()), - CPU::CodeCache::GetFreeCodeSpace(), a32::A32), - m_far_emitter(static_cast(CPU::CodeCache::GetFreeFarCodePointer()), - CPU::CodeCache::GetFreeFarCodeSpace(), a32::A32), - m_emit(&m_near_emitter) -{ - InitHostRegs(); -} - -CodeGenerator::~CodeGenerator() = default; - -const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/) -{ - static constexpr std::array reg_names = { - {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}}; - if (reg >= static_cast(HostReg_Count)) - return ""; - - switch (size) - { - case RegSize_32: - return reg_names[reg]; - default: - return ""; - } -} - -void CodeGenerator::InitHostRegs() -{ - // allocate nonvolatile before volatile - // NOTE: vixl also uses r12 for the macro assembler - m_register_cache.SetHostRegAllocationOrder({4, 5, 6, 7, 8, 9, 10, 11}); - m_register_cache.SetCallerSavedHostRegs({0, 1, 2, 3, 12}); - m_register_cache.SetCalleeSavedHostRegs({4, 5, 6, 7, 8, 9, 10, 11, 13, 14}); - m_register_cache.SetCPUPtrHostReg(RCPUPTR); -} - -void CodeGenerator::SwitchToFarCode() -{ - m_emit = &m_far_emitter; -} - -void CodeGenerator::SwitchToNearCode() -{ - m_emit = &m_near_emitter; -} - -void* CodeGenerator::GetStartNearCodePointer() const -{ - return static_cast(CPU::CodeCache::GetFreeCodePointer()); -} - -void* CodeGenerator::GetCurrentNearCodePointer() const -{ - return static_cast(CPU::CodeCache::GetFreeCodePointer()) + m_near_emitter.GetCursorOffset(); -} - -void* CodeGenerator::GetCurrentFarCodePointer() const -{ - return static_cast(CPU::CodeCache::GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset(); -} - -Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */) -{ - if (value.IsInHostRegister()) - return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); - - Value new_value = m_register_cache.AllocateScratch(value.size); - EmitCopyValue(new_value.host_reg, value); - return new_value; -} - -Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register /* = true */) -{ - if (value.IsInHostRegister()) - return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); - - Value new_value = Value::FromHostReg(&m_register_cache, RSCRATCH, value.size); - EmitCopyValue(new_value.host_reg, value); - return new_value; -} - -void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */) -{ - if (allocate_registers) - { - // Save the link register, since we'll be calling functions. - const bool link_reg_allocated = m_register_cache.AllocateHostReg(14); - DebugAssert(link_reg_allocated); - UNREFERENCED_VARIABLE(link_reg_allocated); - m_register_cache.AssumeCalleeSavedRegistersAreSaved(); - - // Store the CPU struct pointer. TODO: make this better. - const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR); - // m_emit->Mov(GetCPUPtrReg(), reinterpret_cast(&g_state)); - DebugAssert(cpu_reg_allocated); - UNREFERENCED_VARIABLE(cpu_reg_allocated); - } -} - -void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, const void* jump_to) -{ - if (free_registers) - { - m_register_cache.FreeHostReg(RCPUPTR); - m_register_cache.FreeHostReg(14); - m_register_cache.PopCalleeSavedRegisters(true); - } - - if (jump_to) - armEmitJmp(m_emit, jump_to, true); -} - -void CodeGenerator::EmitExceptionExit() -{ - // ensure all unflushed registers are written back - m_register_cache.FlushAllGuestRegisters(false, false); - - // the interpreter load delay might have its own value, but we'll overwrite it here anyway - // technically RaiseException() and FlushPipeline() have already been called, but that should be okay - m_register_cache.FlushLoadDelay(false); - - m_register_cache.PopCalleeSavedRegisters(false); - - armEmitJmp(m_emit, CodeCache::g_check_events_and_dispatch, true); -} - -void CodeGenerator::EmitExceptionExitOnBool(const Value& value) -{ - Assert(!value.IsConstant() && value.IsInHostRegister()); - - m_register_cache.PushState(); - - // TODO: This is... not great. - a32::Label skip_branch; - m_emit->tst(GetHostReg32(value.host_reg), 1); - m_emit->b(a32::eq, &skip_branch); - EmitBranch(GetCurrentFarCodePointer()); - m_emit->Bind(&skip_branch); - - SwitchToFarCode(); - EmitExceptionExit(); - SwitchToNearCode(); - - m_register_cache.PopState(); -} - -const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size) -{ - m_near_emitter.FinalizeCode(); - m_far_emitter.FinalizeCode(); - - const void* code = CPU::CodeCache::GetFreeCodePointer(); - *out_host_code_size = static_cast(m_near_emitter.GetSizeOfCodeGenerated()); - *out_host_far_code_size = static_cast(m_far_emitter.GetSizeOfCodeGenerated()); - - CPU::CodeCache::CommitCode(static_cast(m_near_emitter.GetSizeOfCodeGenerated())); - CPU::CodeCache::CommitFarCode(static_cast(m_far_emitter.GetSizeOfCodeGenerated())); - - m_near_emitter = CodeEmitter(static_cast(CPU::CodeCache::GetFreeCodePointer()), - CPU::CodeCache::GetFreeCodeSpace(), a32::A32); - m_far_emitter = CodeEmitter(static_cast(CPU::CodeCache::GetFreeFarCodePointer()), - CPU::CodeCache::GetFreeFarCodeSpace(), a32::A32); - - return code; -} - -void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) -{ - switch (to_size) - { - case RegSize_16: - { - switch (from_size) - { - case RegSize_8: - m_emit->sxtb(GetHostReg16(to_reg), GetHostReg8(from_reg)); - m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); - return; - } - } - break; - - case RegSize_32: - { - switch (from_size) - { - case RegSize_8: - m_emit->sxtb(GetHostReg32(to_reg), GetHostReg8(from_reg)); - return; - case RegSize_16: - m_emit->sxth(GetHostReg32(to_reg), GetHostReg16(from_reg)); - return; - } - } - break; - } - - Panic("Unknown sign-extend combination"); -} - -void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) -{ - switch (to_size) - { - case RegSize_16: - { - switch (from_size) - { - case RegSize_8: - m_emit->and_(GetHostReg16(to_reg), GetHostReg8(from_reg), 0xFF); - return; - } - } - break; - - case RegSize_32: - { - switch (from_size) - { - case RegSize_8: - m_emit->and_(GetHostReg32(to_reg), GetHostReg8(from_reg), 0xFF); - return; - case RegSize_16: - m_emit->and_(GetHostReg32(to_reg), GetHostReg16(from_reg), 0xFFFF); - return; - } - } - break; - } - - Panic("Unknown sign-extend combination"); -} - -void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value) -{ - // TODO: mov x, 0 -> xor x, x - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - - switch (value.size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (value.IsConstant()) - m_emit->Mov(GetHostReg32(to_reg), value.GetS32ConstantValue()); - else - m_emit->Mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - } - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (set_flags) - m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - else - m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - const s32 constant_value = value.GetS32ConstantValue(); - if (a32::ImmediateA32::IsImmediateA32(static_cast(constant_value))) - { - if (set_flags) - m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - else - m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - - return; - } - - // need a temporary - m_emit->Mov(GetHostReg32(RSCRATCH), constant_value); - if (set_flags) - m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); - else - m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (set_flags) - m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - else - m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - const s32 constant_value = value.GetS32ConstantValue(); - if (a32::ImmediateA32::IsImmediateA32(static_cast(constant_value))) - { - if (set_flags) - m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - else - m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - - return; - } - - // need a temporary - m_emit->Mov(GetHostReg32(RSCRATCH), constant_value); - if (set_flags) - m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); - else - m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - const s32 constant_value = value.GetS32ConstantValue(); - if (constant_value >= 0) - { - if (a32::ImmediateA32::IsImmediateA32(static_cast(constant_value))) - { - m_emit->cmp(GetHostReg32(to_reg), constant_value); - return; - } - } - else - { - if (a32::ImmediateA32::IsImmediateA32(static_cast(-constant_value))) - { - m_emit->cmn(GetHostReg32(to_reg), -constant_value); - return; - } - } - - // need a temporary - m_emit->Mov(GetHostReg32(RSCRATCH), constant_value); - m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, - bool signed_multiply) -{ - // We could use GetValueInHostRegister() here, but we run out of registers... - // Value lhs_in_reg = GetValueInHostRegister(lhs); - // Value rhs_in_reg = GetValueInHostRegister(rhs); - const HostReg lhs_in_reg = lhs.IsInHostRegister() ? lhs.GetHostRegister() : (EmitCopyValue(RARG1, lhs), RARG1); - const HostReg rhs_in_reg = rhs.IsInHostRegister() ? rhs.GetHostRegister() : (EmitCopyValue(RARG2, rhs), RARG2); - - if (lhs.size < RegSize_64) - { - if (signed_multiply) - { - m_emit->smull(GetHostReg32(to_reg_lo), GetHostReg32(to_reg_hi), GetHostReg32(lhs_in_reg), - GetHostReg32(rhs_in_reg)); - } - else - { - m_emit->umull(GetHostReg32(to_reg_lo), GetHostReg32(to_reg_hi), GetHostReg32(lhs_in_reg), - GetHostReg32(rhs_in_reg)); - } - } - else - { - // TODO: Use mul + smulh - Panic("Not implemented"); - } -} - -void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, - bool signed_divide) -{ - // only 32-bit supported for now.. - Assert(size == RegSize_32); - - Value quotient_value; - if (to_reg_quotient == HostReg_Count) - quotient_value.SetHostReg(&m_register_cache, RSCRATCH, size); - else - quotient_value.SetHostReg(&m_register_cache, to_reg_quotient, size); - - if (signed_divide) - { - m_emit->sdiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); - if (to_reg_remainder != HostReg_Count) - { - m_emit->mul(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom)); - m_emit->sub(GetHostReg32(to_reg_remainder), GetHostReg32(num), GetHostReg32(to_reg_remainder)); - } - } - else - { - m_emit->udiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); - if (to_reg_remainder != HostReg_Count) - { - m_emit->mul(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom)); - m_emit->sub(GetHostReg32(to_reg_remainder), GetHostReg32(num), GetHostReg32(to_reg_remainder)); - } - } -} - -void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) -{ - Panic("Not implemented"); -#if 0 - switch (size) - { - case RegSize_8: - m_emit->inc(GetHostReg8(to_reg)); - break; - case RegSize_16: - m_emit->inc(GetHostReg16(to_reg)); - break; - case RegSize_32: - m_emit->inc(GetHostReg32(to_reg)); - break; - default: - UnreachableCode(); - break; - } -#endif -} - -void CodeGenerator::EmitDec(HostReg to_reg, RegSize size) -{ - Panic("Not implemented"); -#if 0 - switch (size) - { - case RegSize_8: - m_emit->dec(GetHostReg8(to_reg)); - break; - case RegSize_16: - m_emit->dec(GetHostReg16(to_reg)); - break; - case RegSize_32: - m_emit->dec(GetHostReg32(to_reg)); - break; - default: - UnreachableCode(); - break; - } -#endif -} - -void CodeGenerator::EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (amount_value.IsConstant()) - { - m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), static_cast(amount_value.constant_value & 0x1F)); - } - else if (assume_amount_masked) - { - m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); - } - else - { - m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(amount_value), 0x1F); - m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); - } - - if (size == RegSize_8) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); - else if (size == RegSize_16) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); - } - break; - } -} - -void CodeGenerator::EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (amount_value.IsConstant()) - { - m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), static_cast(amount_value.constant_value & 0x1F)); - } - else if (assume_amount_masked) - { - m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); - } - else - { - m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(amount_value), 0x1F); - m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); - } - - if (size == RegSize_8) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); - else if (size == RegSize_16) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); - } - break; - } -} - -void CodeGenerator::EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (amount_value.IsConstant()) - { - m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), static_cast(amount_value.constant_value & 0x1F)); - } - else if (assume_amount_masked) - { - m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); - } - else - { - m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(amount_value), 0x1F); - m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); - } - - if (size == RegSize_8) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); - else if (size == RegSize_16) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); - } - break; - } -} - -static bool CanFitInBitwiseImmediate(const Value& value) -{ - return a32::ImmediateA32::IsImmediateA32(static_cast(value.constant_value)); -} - -void CodeGenerator::EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), s32(value.constant_value)); - return; - } - - // need a temporary - m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitOr(HostReg to_reg, HostReg from_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), s32(value.constant_value)); - return; - } - - // need a temporary - m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); - m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitXor(HostReg to_reg, HostReg from_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), s32(value.constant_value)); - return; - } - - // need a temporary - m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); - m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitTest(HostReg to_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - m_emit->tst(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - m_emit->tst(GetHostReg32(to_reg), s32(value.constant_value)); - return; - } - - // need a temporary - m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); - m_emit->tst(GetHostReg32(to_reg), GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitNot(HostReg to_reg, RegSize size) -{ - switch (size) - { - case RegSize_8: - m_emit->mvn(GetHostReg8(to_reg), GetHostReg8(to_reg)); - m_emit->and_(GetHostReg8(to_reg), GetHostReg8(to_reg), 0xFF); - break; - - case RegSize_16: - m_emit->mvn(GetHostReg16(to_reg), GetHostReg16(to_reg)); - m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); - break; - - case RegSize_32: - m_emit->mvn(GetHostReg32(to_reg), GetHostReg32(to_reg)); - break; - - default: - break; - } -} - -void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition) -{ - if (condition == Condition::Always) - { - m_emit->Mov(GetHostReg32(to_reg), 1); - return; - } - - a32::Condition acond(a32::Condition::Never()); - switch (condition) - { - case Condition::NotEqual: - acond = a32::ne; - break; - - case Condition::Equal: - acond = a32::eq; - break; - - case Condition::Overflow: - acond = a32::vs; - break; - - case Condition::Greater: - acond = a32::gt; - break; - - case Condition::GreaterEqual: - acond = a32::ge; - break; - - case Condition::Less: - acond = a32::lt; - break; - - case Condition::LessEqual: - acond = a32::le; - break; - - case Condition::Negative: - acond = a32::mi; - break; - - case Condition::PositiveOrZero: - acond = a32::pl; - break; - - case Condition::Above: - acond = a32::hi; - break; - - case Condition::AboveEqual: - acond = a32::cs; - break; - - case Condition::Below: - acond = a32::cc; - break; - - case Condition::BelowEqual: - acond = a32::ls; - break; - - default: - UnreachableCode(); - return; - } - - m_emit->mov(GetHostReg32(to_reg), 0); - m_emit->mov(acond, GetHostReg32(to_reg), 1); -} - -u32 CodeGenerator::PrepareStackForCall() -{ - m_register_cache.PushCallerSavedRegisters(); - m_membase_loaded = false; - return 0; -} - -void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) -{ - m_register_cache.PopCallerSavedRegisters(); -} - -void CodeGenerator::EmitCall(const void* ptr) -{ - armEmitCall(m_emit, ptr, false); -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3) -{ - if (return_value) - m_register_cache.DiscardHostReg(return_value->GetHostRegister()); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - EmitCopyValue(RARG3, arg3); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3, const Value& arg4) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - EmitCopyValue(RARG3, arg3); - EmitCopyValue(RARG4, arg4); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position) -{ - const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - (position * 4)); - m_emit->str(GetHostReg32(reg), addr); -} - -void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position) -{ - // TODO: Use stm? - EmitPushHostReg(reg, position); - EmitPushHostReg(reg2, position + 1); -} - -void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position) -{ - const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - (position * 4)); - m_emit->ldr(GetHostReg32(reg), addr); -} - -void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position) -{ - // TODO: Use ldm? - Assert(position > 0); - EmitPopHostReg(reg2, position); - EmitPopHostReg(reg, position - 1); -} - -void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset) -{ - const s32 s_offset = static_cast(offset); - - switch (guest_size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg8(host_reg), a32::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_16: - m_emit->ldrh(GetHostReg16(host_reg), a32::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_32: - m_emit->ldr(GetHostReg32(host_reg), a32::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value) -{ - const Value hr_value = GetValueInHostOrScratchRegister(value); - const s32 s_offset = static_cast(offset); - - switch (value.size) - { - case RegSize_8: - m_emit->strb(GetHostReg8(hr_value), a32::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_16: - m_emit->strh(GetHostReg16(hr_value), a32::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_32: - m_emit->str(GetHostReg32(hr_value), a32::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) -{ - const s32 s_offset = static_cast(offset); - const a32::MemOperand o_offset(GetCPUPtrReg(), s_offset); - - Value real_value; - if (value.IsInHostRegister()) - { - real_value.SetHostReg(&m_register_cache, value.host_reg, value.size); - } - else - { - // do we need temporary storage for the constant, if it won't fit in an immediate? - Assert(value.IsConstant()); - const s32 constant_value = value.GetS32ConstantValue(); - if (!a32::ImmediateA32::IsImmediateA32(static_cast(constant_value))) - { - real_value.SetHostReg(&m_register_cache, RARG2, value.size); - EmitCopyValue(real_value.host_reg, value); - } - else - { - real_value = value; - } - } - - // Don't need to mask here because we're storing back to memory. - switch (value.size) - { - case RegSize_8: - { - m_emit->Ldrb(GetHostReg8(RARG1), o_offset); - if (real_value.IsConstant()) - m_emit->Add(GetHostReg8(RARG1), GetHostReg8(RARG1), real_value.GetS32ConstantValue()); - else - m_emit->Add(GetHostReg8(RARG1), GetHostReg8(RARG1), GetHostReg8(real_value)); - m_emit->Strb(GetHostReg8(RARG1), o_offset); - } - break; - - case RegSize_16: - { - m_emit->Ldrh(GetHostReg16(RARG1), o_offset); - if (real_value.IsConstant()) - m_emit->Add(GetHostReg16(RARG1), GetHostReg16(RARG1), real_value.GetS32ConstantValue()); - else - m_emit->Add(GetHostReg16(RARG1), GetHostReg16(RARG1), GetHostReg16(real_value)); - m_emit->Strh(GetHostReg16(RARG1), o_offset); - } - break; - - case RegSize_32: - { - m_emit->Ldr(GetHostReg32(RARG1), o_offset); - if (real_value.IsConstant()) - m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), real_value.GetS32ConstantValue()); - else - m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), GetHostReg32(real_value)); - m_emit->Str(GetHostReg32(RARG1), o_offset); - } - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EnsureMembaseLoaded() -{ - if (m_membase_loaded) - return; - - m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base))); - m_membase_loaded = true; -} - -void CodeGenerator::EmitUpdateFastmemBase() -{ - m_membase_loaded = false; -} - -void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) -{ - EnsureMembaseLoaded(); - - HostReg address_reg; - if (address.IsConstant()) - { - m_emit->Mov(GetHostReg32(RSCRATCH), static_cast(address.constant_value)); - address_reg = RSCRATCH; - } - else - { - address_reg = address.host_reg; - } - - m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->ldr(GetHostReg32(RARG1), - a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load - - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, Value& result) -{ - EnsureMembaseLoaded(); - - HostReg address_reg; - if (address.IsConstant()) - { - m_emit->Mov(GetHostReg32(RSCRATCH), static_cast(address.constant_value)); - address_reg = RSCRATCH; - } - else - { - address_reg = address.host_reg; - } - - m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->ldr(GetHostReg32(RARG1), - a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load - - m_register_cache.InhibitAllocation(); - - void* host_pc = GetCurrentNearCodePointer(); - - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } - - const u32 host_code_size = - static_cast(static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc))); - - // generate slowmem fallback - const void* host_slowmem_pc = GetCurrentFarCodePointer(); - SwitchToFarCode(); - - // we add the ticks *after* the add here, since we counted incorrectly, then correct for it below - DebugAssert(m_delayed_cycles_add > 0); - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast(m_delayed_cycles_add))); - m_delayed_cycles_add += Bus::RAM_READ_TICKS; - - EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true); - - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); - - // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); - - SwitchToNearCode(); - m_register_cache.UninhibitAllocation(); - - CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); -} - -void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, Value& result, bool in_far_code) -{ - if (g_settings.cpu_recompiler_memory_exceptions) - { - // NOTE: This can leave junk in the upper bits - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address); - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.PushState(); - - a32::Label load_okay; - m_emit->tst(GetHostReg32(1), 1); - m_emit->b(a32::ne, &load_okay); - EmitBranch(GetCurrentFarCodePointer()); - m_emit->Bind(&load_okay); - - // load exception path - if (!in_far_code) - SwitchToFarCode(); - - // cause_bits = (-result << 2) | BD | cop_n - m_emit->rsb(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 0); - m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); - EmitOr(result.host_reg, result.host_reg, - Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( - static_cast(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); - - EmitExceptionExit(); - - if (!in_far_code) - SwitchToNearCode(); - - m_register_cache.PopState(); - } - else - { - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryByte, address); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryHalfWord, address); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryWord, address); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value) -{ - EnsureMembaseLoaded(); - - Value actual_value = GetValueInHostRegister(value); - - HostReg address_reg; - if (address.IsConstant()) - { - m_emit->Mov(GetHostReg32(RSCRATCH), static_cast(address.constant_value)); - address_reg = RSCRATCH; - } - else - { - address_reg = address.host_reg; - } - - // TODO: if this gets backpatched, these instructions are wasted - - m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->ldr(GetHostReg32(RARG1), - a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load - - m_register_cache.InhibitAllocation(); - - void* host_pc = GetCurrentNearCodePointer(); - - switch (size) - { - case RegSize_8: - m_emit->strb(GetHostReg32(actual_value.host_reg), - a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->strh(GetHostReg32(actual_value.host_reg), - a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->str(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } - - const u32 host_code_size = - static_cast(static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc))); - - // generate slowmem fallback - void* host_slowmem_pc = GetCurrentFarCodePointer(); - SwitchToFarCode(); - - DebugAssert(m_delayed_cycles_add > 0); - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast(m_delayed_cycles_add))); - - EmitStoreGuestMemorySlowmem(instruction, info, address, size, actual_value, true); - - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); - - // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); - - SwitchToNearCode(); - m_register_cache.UninhibitAllocation(); - - CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); -} - -void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value, - bool in_far_code) -{ - Value value_in_hr = GetValueInHostRegister(value); - - if (g_settings.cpu_recompiler_memory_exceptions) - { - Assert(!in_far_code); - - Value result = m_register_cache.AllocateScratch(RegSize_32); - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value_in_hr); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value_in_hr); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value_in_hr); - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.PushState(); - - a32::Label store_okay; - m_emit->tst(GetHostReg32(result.host_reg), 1); - m_emit->b(a32::eq, &store_okay); - EmitBranch(GetCurrentFarCodePointer()); - m_emit->Bind(&store_okay); - - // store exception path - if (!in_far_code) - SwitchToFarCode(); - - // cause_bits = (result << 2) | BD | cop_n - m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); - EmitOr(result.host_reg, result.host_reg, - Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( - static_cast(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); - - if (!in_far_code) - EmitExceptionExit(); - SwitchToNearCode(); - - m_register_cache.PopState(); - } - else - { - switch (size) - { - case RegSize_8: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value_in_hr); - break; - - case RegSize_16: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value_in_hr); - break; - - case RegSize_32: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value_in_hr); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi) -{ - DEV_LOG("Backpatching {} (guest PC 0x{:08X}) to slowmem at {}", host_pc, lbi.guest_pc, lbi.thunk_address); - - // turn it into a jump to the slowmem handler - vixl::aarch32::MacroAssembler emit(static_cast(host_pc), lbi.code_size, a32::A32); - - // check jump distance - const s32 displacement = armGetPCDisplacement(host_pc, lbi.thunk_address); - if (!armIsPCDisplacementInImmediateRange(displacement)) - { - armMoveAddressToReg(&emit, GetHostReg32(RSCRATCH), lbi.thunk_address); - emit.bx(GetHostReg32(RSCRATCH)); - } - else - { - a32::Label label(displacement + emit.GetCursorOffset()); - emit.b(&label); - } - - const s32 nops = (static_cast(lbi.code_size) - static_cast(emit.GetCursorOffset())) / 4; - Assert(nops >= 0); - for (s32 i = 0; i < nops; i++) - emit.nop(); - - MemMap::FlushInstructionCache(host_pc, lbi.code_size); -} - -void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) -{ - EmitLoadGlobalAddress(RSCRATCH, ptr); - switch (size) - { - case RegSize_8: - m_emit->Ldrb(GetHostReg8(host_reg), a32::MemOperand(GetHostReg32(RSCRATCH))); - break; - - case RegSize_16: - m_emit->Ldrh(GetHostReg16(host_reg), a32::MemOperand(GetHostReg32(RSCRATCH))); - break; - - case RegSize_32: - m_emit->Ldr(GetHostReg32(host_reg), a32::MemOperand(GetHostReg32(RSCRATCH))); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) -{ - Value value_in_hr = GetValueInHostRegister(value); - - EmitLoadGlobalAddress(RSCRATCH, ptr); - switch (value.size) - { - case RegSize_8: - m_emit->Strb(GetHostReg8(value_in_hr), a32::MemOperand(GetHostReg32(RSCRATCH))); - break; - - case RegSize_16: - m_emit->Strh(GetHostReg16(value_in_hr), a32::MemOperand(GetHostReg32(RSCRATCH))); - break; - - case RegSize_32: - m_emit->Str(GetHostReg32(value_in_hr), a32::MemOperand(GetHostReg32(RSCRATCH))); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitFlushInterpreterLoadDelay() -{ - Value reg = Value::FromHostReg(&m_register_cache, 0, RegSize_32); - Value value = Value::FromHostReg(&m_register_cache, 1, RegSize_32); - - const a32::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); - const a32::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); - const a32::MemOperand regs_base(GetCPUPtrReg(), OFFSETOF(State, regs.r[0])); - - a32::Label skip_flush; - - // reg = load_delay_reg - m_emit->Ldrb(GetHostReg32(reg), load_delay_reg); - - // if load_delay_reg == Reg::count goto skip_flush - m_emit->Cmp(GetHostReg32(reg), static_cast(Reg::count)); - m_emit->B(a32::eq, &skip_flush); - - // value = load_delay_value - m_emit->Ldr(GetHostReg32(value), load_delay_value); - - // reg = offset(r[0] + reg << 2) - m_emit->Lsl(GetHostReg32(reg), GetHostReg32(reg), 2); - m_emit->Add(GetHostReg32(reg), GetHostReg32(reg), OFFSETOF(State, regs.r[0])); - - // r[reg] = value - m_emit->Str(GetHostReg32(value), a32::MemOperand(GetCPUPtrReg(), GetHostReg32(reg))); - - // load_delay_reg = Reg::count - m_emit->Mov(GetHostReg32(reg), static_cast(Reg::count)); - m_emit->Strb(GetHostReg32(reg), load_delay_reg); - - m_emit->Bind(&skip_flush); -} - -void CodeGenerator::EmitMoveNextInterpreterLoadDelay() -{ - Value reg = Value::FromHostReg(&m_register_cache, 0, RegSize_32); - Value value = Value::FromHostReg(&m_register_cache, 1, RegSize_32); - - const a32::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); - const a32::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); - const a32::MemOperand next_load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_reg)); - const a32::MemOperand next_load_delay_value(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_value)); - - m_emit->ldrb(GetHostReg32(reg), next_load_delay_reg); - m_emit->ldr(GetHostReg32(value), next_load_delay_value); - m_emit->strb(GetHostReg32(reg), load_delay_reg); - m_emit->str(GetHostReg32(value), load_delay_value); - m_emit->Mov(GetHostReg32(reg), static_cast(Reg::count)); - m_emit->strb(GetHostReg32(reg), next_load_delay_reg); -} - -void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) -{ - if (!m_load_delay_dirty) - return; - - const a32::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); - Value temp = Value::FromHostReg(&m_register_cache, RSCRATCH, RegSize_8); - - a32::Label skip_cancel; - - // if load_delay_reg != reg goto skip_cancel - m_emit->ldrb(GetHostReg8(temp), load_delay_reg); - m_emit->cmp(GetHostReg8(temp), static_cast(reg)); - m_emit->B(a32::ne, &skip_cancel); - - // load_delay_reg = Reg::count - m_emit->Mov(GetHostReg8(temp), static_cast(Reg::count)); - m_emit->strb(GetHostReg8(temp), load_delay_reg); - - m_emit->Bind(&skip_cancel); -} - -void CodeGenerator::EmitICacheCheckAndUpdate() -{ - if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) - { - if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) - { - armEmitFarLoad(m_emit, GetHostReg32(RARG2), GetFetchMemoryAccessTimePtr()); - m_emit->ldr(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - m_emit->Mov(GetHostReg32(RARG3), m_block->size); - m_emit->mul(GetHostReg32(RARG2), GetHostReg32(RARG2), GetHostReg32(RARG3)); - m_emit->add(GetHostReg32(RARG1), GetHostReg32(RARG1), GetHostReg32(RARG2)); - m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - } - else - { - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks))); - } - } - else if (m_block->icache_line_count > 0) - { - const auto& ticks_reg = a32::r0; - const auto& current_tag_reg = a32::r1; - const auto& existing_tag_reg = a32::r2; - - VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; - m_emit->ldr(ticks_reg, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - m_emit->Mov(current_tag_reg, current_pc); - - for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) - { - const TickCount fill_ticks = GetICacheFillTicks(current_pc); - if (fill_ticks <= 0) - continue; - - const u32 line = GetICacheLine(current_pc); - const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); - - a32::Label cache_hit; - m_emit->ldr(existing_tag_reg, a32::MemOperand(GetCPUPtrReg(), offset)); - m_emit->cmp(existing_tag_reg, current_tag_reg); - m_emit->B(a32::eq, &cache_hit); - - m_emit->str(current_tag_reg, a32::MemOperand(GetCPUPtrReg(), offset)); - EmitAdd(0, 0, Value::FromConstantU32(static_cast(fill_ticks)), false); - m_emit->Bind(&cache_hit); - - if (i != (m_block->icache_line_count - 1)) - m_emit->add(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE); - } - - m_emit->str(ticks_reg, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - } -} - -void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) -{ - // store it first to reduce code size, because we can offset - armMoveAddressToReg(m_emit, GetHostReg32(RARG1), ram_ptr); - armMoveAddressToReg(m_emit, GetHostReg32(RARG2), shadow_ptr); - - u32 offset = 0; - a32::Label block_changed; - -#if 0 - /* TODO: Vectorize -#include -#include - -bool foo(const void* a, const void* b) -{ - uint8x16_t v1 = vld1q_u8((const uint8_t*)a); - uint8x16_t v2 = vld1q_u8((const uint8_t*)b); - uint8x16_t v3 = vld1q_u8((const uint8_t*)a + 16); - uint8x16_t v4 = vld1q_u8((const uint8_t*)a + 16); - uint8x16_t r = vceqq_u8(v1, v2); - uint8x16_t r2 = vceqq_u8(v2, v3); - uint8x16_t r3 = vandq_u8(r, r2); - uint32x2_t rr = vpmin_u32(vget_low_u32(vreinterpretq_u32_u8(r3)), vget_high_u32(vreinterpretq_u32_u8(r3))); - if ((vget_lane_u32(rr, 0) & vget_lane_u32(rr, 1)) != 0xFFFFFFFFu) - return false; - else - return true; -} -*/ - bool first = true; - - while (size >= 16) - { - const a32::VRegister vtmp = a32::v2.V4S(); - const a32::VRegister dst = first ? a32::v0.V4S() : a32::v1.V4S(); - m_emit->ldr(dst, a32::MemOperand(RXARG1, offset)); - m_emit->ldr(vtmp, a32::MemOperand(RXARG2, offset)); - m_emit->cmeq(dst, dst, vtmp); - if (!first) - m_emit->and_(dst.V16B(), dst.V16B(), vtmp.V16B()); - else - first = false; - - offset += 16; - size -= 16; - } - - if (!first) - { - // TODO: make sure this doesn't choke on ffffffff - m_emit->uminv(a32::s0, a32::v0.V4S()); - m_emit->fcmp(a32::s0, 0.0); - m_emit->b(&block_changed, a32::eq); - } -#endif - - while (size >= 4) - { - m_emit->ldr(GetHostReg32(RARG3), a32::MemOperand(GetHostReg32(RARG1), offset)); - m_emit->ldr(GetHostReg32(RARG4), a32::MemOperand(GetHostReg32(RARG2), offset)); - m_emit->cmp(GetHostReg32(RARG3), GetHostReg32(RARG4)); - m_emit->b(a32::ne, &block_changed); - offset += 4; - size -= 4; - } - - DebugAssert(size == 0); - - a32::Label block_unchanged; - m_emit->b(&block_unchanged); - m_emit->bind(&block_changed); - armEmitJmp(m_emit, CodeCache::g_discard_and_recompile_block, false); - m_emit->bind(&block_unchanged); -} - -void CodeGenerator::EmitStallUntilGTEComplete() -{ - static_assert(OFFSETOF(State, pending_ticks) + sizeof(u32) == OFFSETOF(State, gte_completion_tick)); - - m_emit->ldr(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - m_emit->ldr(GetHostReg32(RARG2), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, gte_completion_tick))); - - if (m_delayed_cycles_add > 0) - { - m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), static_cast(m_delayed_cycles_add)); - m_delayed_cycles_add = 0; - } - - m_emit->cmp(GetHostReg32(RARG2), GetHostReg32(RARG1)); - m_emit->mov(a32::hi, GetHostReg32(RARG1), GetHostReg32(RARG2)); - m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); -} - -void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) -{ - const s32 displacement = armGetPCDisplacement(GetCurrentCodePointer(), address); - if (armIsPCDisplacementInImmediateRange(displacement)) - { - a32::Label label(displacement + m_emit->GetCursorOffset()); - m_emit->b(&label); - return; - } - - m_emit->Mov(GetHostReg32(RSCRATCH), reinterpret_cast(address)); - m_emit->bx(GetHostReg32(RSCRATCH)); -} - -void CodeGenerator::EmitBranch(LabelType* label) -{ - m_emit->b(label); -} - -static a32::Condition TranslateCondition(Condition condition, bool invert) -{ - switch (condition) - { - case Condition::Always: - return a32::Condition::None(); - - case Condition::NotEqual: - case Condition::NotZero: - return invert ? a32::eq : a32::ne; - - case Condition::Equal: - case Condition::Zero: - return invert ? a32::ne : a32::eq; - - case Condition::Overflow: - return invert ? a32::vc : a32::vs; - - case Condition::Greater: - return invert ? a32::le : a32::gt; - - case Condition::GreaterEqual: - return invert ? a32::lt : a32::ge; - - case Condition::Less: - return invert ? a32::ge : a32::lt; - - case Condition::LessEqual: - return invert ? a32::gt : a32::le; - - case Condition::Negative: - return invert ? a32::pl : a32::mi; - - case Condition::PositiveOrZero: - return invert ? a32::mi : a32::pl; - - case Condition::Above: - return invert ? a32::ls : a32::hi; - - case Condition::AboveEqual: - return invert ? a32::cc : a32::cs; - - case Condition::Below: - return invert ? a32::cs : a32::cc; - - case Condition::BelowEqual: - return invert ? a32::hi : a32::ls; - - default: - UnreachableCode(); - return a32::Condition::Never(); - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, - LabelType* label) -{ - switch (condition) - { - case Condition::NotEqual: - case Condition::Equal: - case Condition::Overflow: - case Condition::Greater: - case Condition::GreaterEqual: - case Condition::LessEqual: - case Condition::Less: - case Condition::Above: - case Condition::AboveEqual: - case Condition::Below: - case Condition::BelowEqual: - Panic("Needs a comparison value"); - return; - - case Condition::Negative: - case Condition::PositiveOrZero: - { - switch (size) - { - case RegSize_8: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - break; - case RegSize_16: - m_emit->tst(GetHostReg16(value), GetHostReg16(value)); - break; - case RegSize_32: - m_emit->tst(GetHostReg32(value), GetHostReg32(value)); - break; - default: - UnreachableCode(); - break; - } - - EmitConditionalBranch(condition, invert, label); - return; - } - - case Condition::NotZero: - { - switch (size) - { - case RegSize_8: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - m_emit->b(a32::ne, label); - break; - case RegSize_16: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - m_emit->b(a32::ne, label); - break; - case RegSize_32: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - m_emit->b(a32::ne, label); - break; - default: - UnreachableCode(); - break; - } - - return; - } - - case Condition::Zero: - { - switch (size) - { - case RegSize_8: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - m_emit->b(a32::eq, label); - break; - case RegSize_16: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - m_emit->b(a32::eq, label); - break; - case RegSize_32: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - m_emit->b(a32::eq, label); - break; - default: - UnreachableCode(); - break; - } - - return; - } - - case Condition::Always: - m_emit->b(label); - return; - - default: - UnreachableCode(); - return; - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, - LabelType* label) -{ - switch (condition) - { - case Condition::NotEqual: - case Condition::Equal: - case Condition::Overflow: - case Condition::Greater: - case Condition::GreaterEqual: - case Condition::LessEqual: - case Condition::Less: - case Condition::Above: - case Condition::AboveEqual: - case Condition::Below: - case Condition::BelowEqual: - { - EmitCmp(lhs, rhs); - EmitConditionalBranch(condition, invert, label); - return; - } - - case Condition::Negative: - case Condition::PositiveOrZero: - case Condition::NotZero: - case Condition::Zero: - { - Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0)); - EmitConditionalBranch(condition, invert, lhs, rhs.size, label); - return; - } - - case Condition::Always: - m_emit->b(label); - return; - - default: - UnreachableCode(); - return; - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, LabelType* label) -{ - if (condition == Condition::Always) - m_emit->b(label); - else - m_emit->b(TranslateCondition(condition, invert), label); -} - -void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - m_emit->tst(GetHostReg32(reg), static_cast(1u << bit)); - m_emit->b(a32::eq, label); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitBindLabel(LabelType* label) -{ - m_emit->Bind(label); -} - -void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr) -{ - m_emit->Mov(GetHostReg32(host_reg), reinterpret_cast(ptr)); -} - -} // namespace CPU::Recompiler - -#endif // CPU_ARCH_ARM32 diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp deleted file mode 100644 index bf207e4e6..000000000 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ /dev/null @@ -1,2702 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "cpu_code_cache_private.h" -#include "cpu_core.h" -#include "cpu_core_private.h" -#include "cpu_recompiler_code_generator.h" -#include "cpu_recompiler_thunks.h" -#include "settings.h" -#include "timing_event.h" - -#include "common/align.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/memmap.h" - -#ifdef CPU_ARCH_ARM64 - -LOG_CHANNEL(Recompiler); - -#ifdef ENABLE_HOST_DISASSEMBLY -#include "vixl/aarch64/disasm-aarch64.h" -#endif - -namespace a64 = vixl::aarch64; - -namespace CPU::Recompiler { -constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers -constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes -constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; - -static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; -static std::unordered_map s_trampoline_targets; -static u8* s_trampoline_start_ptr = nullptr; -static u32 s_trampoline_used = 0; -} // namespace CPU::Recompiler - -bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) -{ - // same on both linux and windows - return (id <= 18); -} - -void CPU::Recompiler::armEmitMov(a64::Assembler* armAsm, const a64::Register& rd, u64 imm) -{ - DebugAssert(vixl::IsUint32(imm) || vixl::IsInt32(imm) || rd.Is64Bits()); - DebugAssert(rd.GetCode() != a64::sp.GetCode()); - - if (imm == 0) - { - armAsm->mov(rd, a64::Assembler::AppropriateZeroRegFor(rd)); - return; - } - - // The worst case for size is mov 64-bit immediate to sp: - // * up to 4 instructions to materialise the constant - // * 1 instruction to move to sp - - // Immediates on Aarch64 can be produced using an initial value, and zero to - // three move keep operations. - // - // Initial values can be generated with: - // 1. 64-bit move zero (movz). - // 2. 32-bit move inverted (movn). - // 3. 64-bit move inverted. - // 4. 32-bit orr immediate. - // 5. 64-bit orr immediate. - // Move-keep may then be used to modify each of the 16-bit half words. - // - // The code below supports all five initial value generators, and - // applying move-keep operations to move-zero and move-inverted initial - // values. - - // Try to move the immediate in one instruction, and if that fails, switch to - // using multiple instructions. - const unsigned reg_size = rd.GetSizeInBits(); - - if (a64::Assembler::IsImmMovz(imm, reg_size) && !rd.IsSP()) - { - // Immediate can be represented in a move zero instruction. Movz can't write - // to the stack pointer. - armAsm->movz(rd, imm); - return; - } - else if (a64::Assembler::IsImmMovn(imm, reg_size) && !rd.IsSP()) - { - // Immediate can be represented in a move negative instruction. Movn can't - // write to the stack pointer. - armAsm->movn(rd, rd.Is64Bits() ? ~imm : (~imm & a64::kWRegMask)); - return; - } - else if (a64::Assembler::IsImmLogical(imm, reg_size)) - { - // Immediate can be represented in a logical orr instruction. - DebugAssert(!rd.IsZero()); - armAsm->orr(rd, a64::Assembler::AppropriateZeroRegFor(rd), imm); - return; - } - - // Generic immediate case. Imm will be represented by - // [imm3, imm2, imm1, imm0], where each imm is 16 bits. - // A move-zero or move-inverted is generated for the first non-zero or - // non-0xffff immX, and a move-keep for subsequent non-zero immX. - - uint64_t ignored_halfword = 0; - bool invert_move = false; - // If the number of 0xffff halfwords is greater than the number of 0x0000 - // halfwords, it's more efficient to use move-inverted. - if (vixl::CountClearHalfWords(~imm, reg_size) > vixl::CountClearHalfWords(imm, reg_size)) - { - ignored_halfword = 0xffff; - invert_move = true; - } - - // Iterate through the halfwords. Use movn/movz for the first non-ignored - // halfword, and movk for subsequent halfwords. - DebugAssert((reg_size % 16) == 0); - bool first_mov_done = false; - for (unsigned i = 0; i < (reg_size / 16); i++) - { - uint64_t imm16 = (imm >> (16 * i)) & 0xffff; - if (imm16 != ignored_halfword) - { - if (!first_mov_done) - { - if (invert_move) - armAsm->movn(rd, ~imm16 & 0xffff, 16 * i); - else - armAsm->movz(rd, imm16, 16 * i); - first_mov_done = true; - } - else - { - // Construct a wider constant. - armAsm->movk(rd, imm16, 16 * i); - } - } - } - - DebugAssert(first_mov_done); -} - -s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) -{ - // pxAssert(Common::IsAlignedPow2(reinterpret_cast(current), 4)); - // pxAssert(Common::IsAlignedPow2(reinterpret_cast(target), 4)); - return static_cast((reinterpret_cast(target) - reinterpret_cast(current)) >> 2); -} - -bool CPU::Recompiler::armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr) -{ - const void* cur = armAsm->GetCursorAddress(); - const void* current_code_ptr_page = - reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); - const void* ptr_page = - reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); - const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; - const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); - - return (vixl::IsInt21(page_displacement) && - (a64::Assembler::IsImmAddSub(page_offset) || a64::Assembler::IsImmLogical(page_offset, 64))); -} - -void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::Register& reg, const void* addr) -{ - DebugAssert(reg.IsX()); - - const void* cur = armAsm->GetCursorAddress(); - const void* current_code_ptr_page = - reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); - const void* ptr_page = - reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); - const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; - const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); - if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmAddSub(page_offset)) - { - armAsm->adrp(reg, page_displacement); - armAsm->add(reg, reg, page_offset); - } - else if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64)) - { - armAsm->adrp(reg, page_displacement); - armAsm->orr(reg, reg, page_offset); - } - else - { - armEmitMov(armAsm, reg, reinterpret_cast(addr)); - } -} -void CPU::Recompiler::armEmitJmp(a64::Assembler* armAsm, const void* ptr, bool force_inline) -{ - const void* cur = armAsm->GetCursorAddress(); - s64 displacement = armGetPCDisplacement(cur, ptr); - bool use_blr = !vixl::IsInt26(displacement); - bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); - if (use_blr && use_trampoline && !force_inline) - { - if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) - { - displacement = armGetPCDisplacement(cur, trampoline); - use_blr = !vixl::IsInt26(displacement); - } - } - - if (use_blr) - { - armMoveAddressToReg(armAsm, RXSCRATCH, ptr); - armAsm->br(RXSCRATCH); - } - else - { - armAsm->b(displacement); - } -} - -void CPU::Recompiler::armEmitCall(a64::Assembler* armAsm, const void* ptr, bool force_inline) -{ - const void* cur = armAsm->GetCursorAddress(); - s64 displacement = armGetPCDisplacement(cur, ptr); - bool use_blr = !vixl::IsInt26(displacement); - bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); - if (use_blr && use_trampoline && !force_inline) - { - if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) - { - displacement = armGetPCDisplacement(cur, trampoline); - use_blr = !vixl::IsInt26(displacement); - } - } - - if (use_blr) - { - armMoveAddressToReg(armAsm, RXSCRATCH, ptr); - armAsm->blr(RXSCRATCH); - } - else - { - armAsm->bl(displacement); - } -} - -void CPU::Recompiler::armEmitCondBranch(a64::Assembler* armAsm, a64::Condition cond, const void* ptr) -{ - const s64 jump_distance = static_cast(reinterpret_cast(ptr) - - reinterpret_cast(armAsm->GetCursorAddress())); - // pxAssert(Common::IsAligned(jump_distance, 4)); - - if (a64::Instruction::IsValidImmPCOffset(a64::CondBranchType, jump_distance >> 2)) - { - armAsm->b(jump_distance >> 2, cond); - } - else - { - a64::Label branch_not_taken; - armAsm->b(&branch_not_taken, InvertCondition(cond)); - - const s64 new_jump_distance = static_cast(reinterpret_cast(ptr) - - reinterpret_cast(armAsm->GetCursorAddress())); - armAsm->b(new_jump_distance >> 2); - armAsm->bind(&branch_not_taken); - } -} - -void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, - const void* addr, bool sign_extend_word) -{ - const void* cur = armAsm->GetCursorAddress(); - const void* current_code_ptr_page = - reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); - const void* ptr_page = - reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); - const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; - const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); - a64::MemOperand memop; - - const vixl::aarch64::Register xreg = reg.X(); - if (vixl::IsInt21(page_displacement)) - { - armAsm->adrp(xreg, page_displacement); - memop = vixl::aarch64::MemOperand(xreg, static_cast(page_offset)); - } - else - { - armMoveAddressToReg(armAsm, xreg, addr); - memop = vixl::aarch64::MemOperand(xreg); - } - - if (sign_extend_word) - armAsm->ldrsw(reg, memop); - else - armAsm->ldr(reg, memop); -} - -void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, - const void* addr, const vixl::aarch64::Register& tempreg) -{ - DebugAssert(tempreg.IsX()); - - const void* cur = armAsm->GetCursorAddress(); - const void* current_code_ptr_page = - reinterpret_cast(reinterpret_cast(cur) & ~static_cast(0xFFF)); - const void* ptr_page = - reinterpret_cast(reinterpret_cast(addr) & ~static_cast(0xFFF)); - const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; - const u32 page_offset = static_cast(reinterpret_cast(addr) & 0xFFFu); - - if (vixl::IsInt21(page_displacement)) - { - armAsm->adrp(tempreg, page_displacement); - armAsm->str(reg, vixl::aarch64::MemOperand(tempreg, static_cast(page_offset))); - } - else - { - armMoveAddressToReg(armAsm, tempreg, addr); - armAsm->str(reg, vixl::aarch64::MemOperand(tempreg)); - } -} - -u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) -{ - auto it = s_trampoline_targets.find(target); - if (it != s_trampoline_targets.end()) - return s_trampoline_start_ptr + it->second; - - // align to 16 bytes? - const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16); - - // 4 movs plus a jump - if (TRAMPOLINE_AREA_SIZE - offset < 20) - { - Panic("Ran out of space in constant pool"); - return nullptr; - } - - u8* start = s_trampoline_start_ptr + offset; - a64::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); -#ifdef VIXL_DEBUG - vixl::CodeBufferCheckScope armAsmCheck(&armAsm, TRAMPOLINE_AREA_SIZE - offset, - vixl::CodeBufferCheckScope::kDontReserveBufferSpace); -#endif - armMoveAddressToReg(&armAsm, RXSCRATCH, target); - armAsm.br(RXSCRATCH); - armAsm.FinalizeCode(); - - const u32 size = static_cast(armAsm.GetSizeOfCodeGenerated()); - DebugAssert(size < 20); - s_trampoline_targets.emplace(target, offset); - s_trampoline_used = offset + static_cast(size); - - MemMap::FlushInstructionCache(start, size); - return start; -} - -void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) -{ -#ifdef ENABLE_HOST_DISASSEMBLY - class MyDisassembler : public a64::Disassembler - { - protected: - void ProcessOutput(const a64::Instruction* instr) override - { - DEBUG_LOG("0x{:016X} {:08X}\t\t{}", reinterpret_cast(instr), instr->GetInstructionBits(), GetOutput()); - } - }; - - a64::Decoder decoder; - MyDisassembler disas; - decoder.AppendVisitor(&disas); - decoder.Decode(static_cast(start), - reinterpret_cast(static_cast(start) + size)); -#else - ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); -#endif -} - -u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) -{ - return size / a64::kInstructionSize; -} - -u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) -{ - using namespace a64; - using namespace CPU::Recompiler; - - const s64 disp = armGetPCDisplacement(code, dst); - DebugAssert(vixl::IsInt26(disp)); - - const u32 new_code = B | Assembler::ImmUncondBranch(disp); - std::memcpy(code, &new_code, sizeof(new_code)); - if (flush_icache) - MemMap::FlushInstructionCache(code, kInstructionSize); - - return kInstructionSize; -} - -u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) -{ - using namespace vixl::aarch64; - using namespace CPU::Recompiler; - -#define PTR(x) a64::MemOperand(RSTATE, (s64)(((u8*)(x)) - ((u8*)&g_state))) - - Assembler actual_asm(static_cast(code), code_size); - Assembler* armAsm = &actual_asm; - -#ifdef VIXL_DEBUG - vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); -#endif - - Label dispatch; - - g_enter_recompiler = armAsm->GetCursorAddress(); - { - // reserve some space for saving caller-saved registers - armAsm->sub(sp, sp, CPU::Recompiler::FUNCTION_STACK_SIZE); - - // Need the CPU state for basically everything :-) - armMoveAddressToReg(armAsm, RSTATE, &g_state); - - // Fastmem setup, oldrec doesn't need it - if (IsUsingFastmem() && g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler) - armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base)); - - // Fall through to event dispatcher - } - - // check events then for frame done - g_check_events_and_dispatch = armAsm->GetCursorAddress(); - { - Label skip_event_check; - armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); - armAsm->ldr(RWARG2, PTR(&g_state.downcount)); - armAsm->cmp(RWARG1, RWARG2); - armAsm->b(&skip_event_check, lt); - - g_run_events_and_dispatch = armAsm->GetCursorAddress(); - armEmitCall(armAsm, reinterpret_cast(&TimingEvents::RunEvents), true); - - armAsm->bind(&skip_event_check); - } - - // TODO: align? - g_dispatcher = armAsm->GetCursorAddress(); - { - armAsm->bind(&dispatch); - - // x9 <- s_fast_map[pc >> 16] - armAsm->ldr(RWARG1, PTR(&g_state.pc)); - armMoveAddressToReg(armAsm, RXARG3, g_code_lut.data()); - armAsm->lsr(RWARG2, RWARG1, 16); - armAsm->lsr(RWARG1, RWARG1, 2); - armAsm->ldr(RXARG2, MemOperand(RXARG3, RXARG2, LSL, 3)); - - // blr(x9[pc * 2]) (fast_map[pc >> 2]) - armAsm->ldr(RXARG1, MemOperand(RXARG2, RXARG1, LSL, 3)); - armAsm->blr(RXARG1); - } - - g_compile_or_revalidate_block = armAsm->GetCursorAddress(); - { - armAsm->ldr(RWARG1, PTR(&g_state.pc)); - armEmitCall(armAsm, reinterpret_cast(&CompileOrRevalidateBlock), true); - armAsm->b(&dispatch); - } - - g_discard_and_recompile_block = armAsm->GetCursorAddress(); - { - armAsm->ldr(RWARG1, PTR(&g_state.pc)); - armEmitCall(armAsm, reinterpret_cast(&DiscardAndRecompileBlock), true); - armAsm->b(&dispatch); - } - - g_interpret_block = armAsm->GetCursorAddress(); - { - armEmitCall(armAsm, reinterpret_cast(GetInterpretUncachedBlockFunction()), true); - armAsm->b(&dispatch); - } - - armAsm->FinalizeCode(); - - // TODO: align? - s_trampoline_targets.clear(); - s_trampoline_start_ptr = static_cast(code) + armAsm->GetCursorOffset(); - s_trampoline_used = 0; - -#undef PTR - return static_cast(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE; -} - -namespace CPU::Recompiler { - -constexpr HostReg RCPUPTR = 19; -constexpr HostReg RMEMBASEPTR = 20; -constexpr HostReg RRETURN = 0; -constexpr HostReg RARG1 = 0; -constexpr HostReg RARG2 = 1; -constexpr HostReg RARG3 = 2; -constexpr HostReg RARG4 = 3; -constexpr HostReg RSCRATCH = 8; - -static const a64::WRegister GetHostReg8(HostReg reg) -{ - return a64::WRegister(reg); -} - -static const a64::WRegister GetHostReg8(const Value& value) -{ - DebugAssert(value.size == RegSize_8 && value.IsInHostRegister()); - return a64::WRegister(value.host_reg); -} - -static const a64::WRegister GetHostReg16(HostReg reg) -{ - return a64::WRegister(reg); -} - -static const a64::WRegister GetHostReg16(const Value& value) -{ - DebugAssert(value.size == RegSize_16 && value.IsInHostRegister()); - return a64::WRegister(value.host_reg); -} - -static const a64::WRegister GetHostReg32(HostReg reg) -{ - return a64::WRegister(reg); -} - -static const a64::WRegister GetHostReg32(const Value& value) -{ - DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); - return a64::WRegister(value.host_reg); -} - -static const a64::XRegister GetHostReg64(HostReg reg) -{ - return a64::XRegister(reg); -} - -static const a64::XRegister GetHostReg64(const Value& value) -{ - DebugAssert(value.size == RegSize_64 && value.IsInHostRegister()); - return a64::XRegister(value.host_reg); -} - -static const a64::XRegister GetCPUPtrReg() -{ - return GetHostReg64(RCPUPTR); -} - -static const a64::XRegister GetFastmemBasePtrReg() -{ - return GetHostReg64(RMEMBASEPTR); -} - -CodeGenerator::CodeGenerator() - : m_register_cache(*this), m_near_emitter(static_cast(CPU::CodeCache::GetFreeCodePointer()), - CPU::CodeCache::GetFreeCodeSpace(), a64::PositionDependentCode), - m_far_emitter(static_cast(CPU::CodeCache::GetFreeFarCodePointer()), - CPU::CodeCache::GetFreeFarCodeSpace(), a64::PositionDependentCode), - m_emit(&m_near_emitter) -{ - // remove the temporaries from vixl's list to prevent it from using them. - // eventually we won't use the macro assembler and this won't be a problem... - m_near_emitter.GetScratchRegisterList()->Remove(16); - m_near_emitter.GetScratchRegisterList()->Remove(17); - m_far_emitter.GetScratchRegisterList()->Remove(16); - m_far_emitter.GetScratchRegisterList()->Remove(17); - InitHostRegs(); -} - -CodeGenerator::~CodeGenerator() = default; - -const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/) -{ - static constexpr std::array reg32_names = { - {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", - "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "w31"}}; - static constexpr std::array reg64_names = { - {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", - "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"}}; - if (reg >= static_cast(HostReg_Count)) - return ""; - - switch (size) - { - case RegSize_32: - return reg32_names[reg]; - case RegSize_64: - return reg64_names[reg]; - default: - return ""; - } -} - -void CodeGenerator::InitHostRegs() -{ - // TODO: function calls mess up the parameter registers if we use them.. fix it - // allocate nonvolatile before volatile - m_register_cache.SetHostRegAllocationOrder( - {19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17}); - m_register_cache.SetCallerSavedHostRegs({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}); - m_register_cache.SetCalleeSavedHostRegs({19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30}); - m_register_cache.SetCPUPtrHostReg(RCPUPTR); -} - -void CodeGenerator::SwitchToFarCode() -{ - m_emit = &m_far_emitter; -} - -void CodeGenerator::SwitchToNearCode() -{ - m_emit = &m_near_emitter; -} - -void* CodeGenerator::GetStartNearCodePointer() const -{ - return static_cast(CPU::CodeCache::GetFreeCodePointer()); -} - -void* CodeGenerator::GetCurrentNearCodePointer() const -{ - return static_cast(CPU::CodeCache::GetFreeCodePointer()) + m_near_emitter.GetCursorOffset(); -} - -void* CodeGenerator::GetCurrentFarCodePointer() const -{ - return static_cast(CPU::CodeCache::GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset(); -} - -Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */) -{ - if (value.IsInHostRegister()) - return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); - - if (value.HasConstantValue(0) && allow_zero_register) - return Value::FromHostReg(&m_register_cache, static_cast(31), value.size); - - Value new_value = m_register_cache.AllocateScratch(value.size); - EmitCopyValue(new_value.host_reg, value); - return new_value; -} - -Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register /* = true */) -{ - if (value.IsInHostRegister()) - return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); - - if (value.HasConstantValue(0) && allow_zero_register) - return Value::FromHostReg(&m_register_cache, static_cast(31), value.size); - - Value new_value = Value::FromHostReg(&m_register_cache, RSCRATCH, value.size); - EmitCopyValue(new_value.host_reg, value); - return new_value; -} - -void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */) -{ - if (allocate_registers) - { - // Save the link register, since we'll be calling functions. - const bool link_reg_allocated = m_register_cache.AllocateHostReg(30); - DebugAssert(link_reg_allocated); - UNREFERENCED_VARIABLE(link_reg_allocated); - - m_register_cache.AssumeCalleeSavedRegistersAreSaved(); - - // Store the CPU struct pointer. TODO: make this better. - const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR); - DebugAssert(cpu_reg_allocated); - UNREFERENCED_VARIABLE(cpu_reg_allocated); - - // If there's loadstore instructions, preload the fastmem base. - if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions)) - { - const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR); - Assert(fastmem_reg_allocated); - m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base))); - } - } -} - -void CodeGenerator::EmitEndBlock(bool free_registers, const void* jump_to) -{ - if (free_registers) - { - if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions)) - m_register_cache.FreeHostReg(RMEMBASEPTR); - - m_register_cache.FreeHostReg(RCPUPTR); - m_register_cache.FreeHostReg(30); // lr - - m_register_cache.PopCalleeSavedRegisters(true); - } - - if (jump_to) - armEmitJmp(m_emit, jump_to, true); -} - -void CodeGenerator::EmitExceptionExit() -{ - // ensure all unflushed registers are written back - m_register_cache.FlushAllGuestRegisters(false, false); - - // the interpreter load delay might have its own value, but we'll overwrite it here anyway - // technically RaiseException() and FlushPipeline() have already been called, but that should be okay - m_register_cache.FlushLoadDelay(false); - - m_register_cache.PopCalleeSavedRegisters(false); - - armEmitJmp(m_emit, CodeCache::g_check_events_and_dispatch, true); -} - -void CodeGenerator::EmitExceptionExitOnBool(const Value& value) -{ - Assert(!value.IsConstant() && value.IsInHostRegister()); - - m_register_cache.PushState(); - - // TODO: This is... not great. - a64::Label skip_branch; - m_emit->Cbz(GetHostReg64(value.host_reg), &skip_branch); - EmitBranch(GetCurrentFarCodePointer()); - m_emit->Bind(&skip_branch); - - SwitchToFarCode(); - EmitExceptionExit(); - SwitchToNearCode(); - - m_register_cache.PopState(); -} - -const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size) -{ - m_near_emitter.FinalizeCode(); - m_far_emitter.FinalizeCode(); - - const void* code = CPU::CodeCache::GetFreeCodePointer(); - *out_host_code_size = static_cast(m_near_emitter.GetSizeOfCodeGenerated()); - *out_host_far_code_size = static_cast(m_far_emitter.GetSizeOfCodeGenerated()); - - CPU::CodeCache::CommitCode(static_cast(m_near_emitter.GetSizeOfCodeGenerated())); - CPU::CodeCache::CommitFarCode(static_cast(m_far_emitter.GetSizeOfCodeGenerated())); - - m_near_emitter.Reset(); - m_far_emitter.Reset(); - - return code; -} - -void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) -{ - switch (to_size) - { - case RegSize_16: - { - switch (from_size) - { - case RegSize_8: - m_emit->sxtb(GetHostReg16(to_reg), GetHostReg8(from_reg)); - m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); - return; - - default: - break; - } - } - break; - - case RegSize_32: - { - switch (from_size) - { - case RegSize_8: - m_emit->sxtb(GetHostReg32(to_reg), GetHostReg8(from_reg)); - return; - case RegSize_16: - m_emit->sxth(GetHostReg32(to_reg), GetHostReg16(from_reg)); - return; - - default: - break; - } - } - break; - - default: - break; - } - - Panic("Unknown sign-extend combination"); -} - -void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) -{ - switch (to_size) - { - case RegSize_16: - { - switch (from_size) - { - case RegSize_8: - m_emit->and_(GetHostReg16(to_reg), GetHostReg8(from_reg), 0xFF); - return; - - default: - break; - } - } - break; - - case RegSize_32: - { - switch (from_size) - { - case RegSize_8: - m_emit->and_(GetHostReg32(to_reg), GetHostReg8(from_reg), 0xFF); - return; - case RegSize_16: - m_emit->and_(GetHostReg32(to_reg), GetHostReg16(from_reg), 0xFFFF); - return; - - default: - break; - } - } - break; - - default: - break; - } - - Panic("Unknown sign-extend combination"); -} - -void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value) -{ - // TODO: mov x, 0 -> xor x, x - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - - switch (value.size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (value.IsConstant()) - m_emit->Mov(GetHostReg32(to_reg), value.constant_value); - else - m_emit->Mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.IsConstant()) - m_emit->Mov(GetHostReg64(to_reg), value.constant_value); - else - m_emit->Mov(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - } - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (value.size < RegSize_64) - { - if (set_flags) - m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - else - m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - } - else - { - if (set_flags) - m_emit->adds(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); - else - m_emit->add(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); - } - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - const s64 constant_value = value.GetS64ConstantValue(); - if (a64::Assembler::IsImmAddSub(constant_value)) - { - if (value.size < RegSize_64) - { - if (set_flags) - m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - else - m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - } - else - { - if (set_flags) - m_emit->adds(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); - else - m_emit->add(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); - } - - return; - } - - // need a temporary - Assert(from_reg != RSCRATCH); - Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); - if (value.size < RegSize_64) - m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value); - else - m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value); - EmitAdd(to_reg, from_reg, temp_value, set_flags); -} - -void CodeGenerator::EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (value.size < RegSize_64) - { - if (set_flags) - m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - else - m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - } - else - { - if (set_flags) - m_emit->subs(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); - else - m_emit->sub(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); - } - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - const s64 constant_value = value.GetS64ConstantValue(); - if (a64::Assembler::IsImmAddSub(value.constant_value)) - { - if (value.size < RegSize_64) - { - if (set_flags) - m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - else - m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); - } - else - { - if (set_flags) - m_emit->subs(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); - else - m_emit->sub(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); - } - - return; - } - - // need a temporary - Assert(from_reg != RSCRATCH); - Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); - if (value.size < RegSize_64) - m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value); - else - m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value); - EmitSub(to_reg, from_reg, temp_value, set_flags); -} - -void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (value.size < RegSize_64) - m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - else - m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - const s64 constant_value = value.GetS64ConstantValue(); - if (constant_value >= 0) - { - if (a64::Assembler::IsImmAddSub(constant_value)) - { - if (value.size < RegSize_64) - m_emit->cmp(GetHostReg32(to_reg), constant_value); - else - m_emit->cmp(GetHostReg64(to_reg), constant_value); - - return; - } - } - else - { - if (a64::Assembler::IsImmAddSub(-constant_value)) - { - if (value.size < RegSize_64) - m_emit->cmn(GetHostReg32(to_reg), -constant_value); - else - m_emit->cmn(GetHostReg64(to_reg), -constant_value); - - return; - } - } - - // need a temporary - Assert(to_reg != RSCRATCH); - Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); - if (value.size < RegSize_64) - m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value); - else - m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value); - EmitCmp(to_reg, temp_value); -} - -void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, - bool signed_multiply) -{ - Value lhs_in_reg = GetValueInHostRegister(lhs); - Value rhs_in_reg = GetValueInHostRegister(rhs); - - if (lhs.size < RegSize_64) - { - if (signed_multiply) - { - m_emit->smull(GetHostReg64(to_reg_lo), GetHostReg32(lhs_in_reg.host_reg), GetHostReg32(rhs_in_reg.host_reg)); - m_emit->asr(GetHostReg64(to_reg_hi), GetHostReg64(to_reg_lo), 32); - } - else - { - m_emit->umull(GetHostReg64(to_reg_lo), GetHostReg32(lhs_in_reg.host_reg), GetHostReg32(rhs_in_reg.host_reg)); - m_emit->lsr(GetHostReg64(to_reg_hi), GetHostReg64(to_reg_lo), 32); - } - } - else - { - // TODO: Use mul + smulh - Panic("Not implemented"); - } -} - -void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, - bool signed_divide) -{ - // only 32-bit supported for now.. - Assert(size == RegSize_32); - - Value quotient_value; - if (to_reg_quotient == HostReg_Count) - { - Assert(to_reg_quotient != RSCRATCH); - quotient_value = Value::FromHostReg(&m_register_cache, RSCRATCH, size); - } - else - { - quotient_value.SetHostReg(&m_register_cache, to_reg_quotient, size); - } - - if (signed_divide) - { - m_emit->sdiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); - if (to_reg_remainder != HostReg_Count) - { - m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom), - GetHostReg32(num)); - } - } - else - { - m_emit->udiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); - if (to_reg_remainder != HostReg_Count) - { - m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom), - GetHostReg32(num)); - } - } -} - -void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) -{ - Panic("Not implemented"); -#if 0 - switch (size) - { - case RegSize_8: - m_emit->inc(GetHostReg8(to_reg)); - break; - case RegSize_16: - m_emit->inc(GetHostReg16(to_reg)); - break; - case RegSize_32: - m_emit->inc(GetHostReg32(to_reg)); - break; - default: - UnreachableCode(); - break; - } -#endif -} - -void CodeGenerator::EmitDec(HostReg to_reg, RegSize size) -{ - Panic("Not implemented"); -#if 0 - switch (size) - { - case RegSize_8: - m_emit->dec(GetHostReg8(to_reg)); - break; - case RegSize_16: - m_emit->dec(GetHostReg16(to_reg)); - break; - case RegSize_32: - m_emit->dec(GetHostReg32(to_reg)); - break; - default: - UnreachableCode(); - break; - } -#endif -} - -void CodeGenerator::EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked /* = true */) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (amount_value.IsConstant()) - m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F); - else - m_emit->lslv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); - - if (size == RegSize_8) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); - else if (size == RegSize_16) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); - } - break; - - case RegSize_64: - { - if (amount_value.IsConstant()) - m_emit->lsl(GetHostReg64(to_reg), GetHostReg64(from_reg), amount_value.constant_value & 0x3F); - else - m_emit->lslv(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(amount_value)); - } - break; - } -} - -void CodeGenerator::EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked /* = true */) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (amount_value.IsConstant()) - m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F); - else - m_emit->lsrv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); - - if (size == RegSize_8) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); - else if (size == RegSize_16) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); - } - break; - - case RegSize_64: - { - if (amount_value.IsConstant()) - m_emit->lsr(GetHostReg64(to_reg), GetHostReg64(to_reg), amount_value.constant_value & 0x3F); - else - m_emit->lsrv(GetHostReg64(to_reg), GetHostReg64(to_reg), GetHostReg64(amount_value)); - } - break; - } -} - -void CodeGenerator::EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked /* = true */) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - { - if (amount_value.IsConstant()) - m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F); - else - m_emit->asrv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); - - if (size == RegSize_8) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); - else if (size == RegSize_16) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); - } - break; - - case RegSize_64: - { - if (amount_value.IsConstant()) - m_emit->asr(GetHostReg64(to_reg), GetHostReg64(from_reg), amount_value.constant_value & 0x3F); - else - m_emit->asrv(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(amount_value)); - } - break; - } -} - -static bool CanFitInBitwiseImmediate(const Value& value) -{ - const unsigned reg_size = (value.size < RegSize_64) ? 32 : 64; - unsigned n, imm_s, imm_r; - return a64::Assembler::IsImmLogical(s64(value.constant_value), reg_size, &n, &imm_s, &imm_r); -} - -void CodeGenerator::EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (value.size < RegSize_64) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - else - m_emit->and_(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - if (value.size < RegSize_64) - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value)); - else - m_emit->and_(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value)); - - return; - } - - // need a temporary - Assert(from_reg != RSCRATCH); - Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); - if (value.size < RegSize_64) - m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); - else - m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); - EmitAnd(to_reg, from_reg, temp_value); -} - -void CodeGenerator::EmitOr(HostReg to_reg, HostReg from_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (value.size < RegSize_64) - m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - else - m_emit->orr(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - if (value.size < RegSize_64) - m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value)); - else - m_emit->orr(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value)); - - return; - } - - // need a temporary - Assert(from_reg != RSCRATCH); - Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); - if (value.size < RegSize_64) - m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); - else - m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); - EmitOr(to_reg, from_reg, temp_value); -} - -void CodeGenerator::EmitXor(HostReg to_reg, HostReg from_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (value.size < RegSize_64) - m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); - else - m_emit->eor(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - if (value.size < RegSize_64) - m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value)); - else - m_emit->eor(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value)); - - return; - } - - // need a temporary - Assert(from_reg != RSCRATCH); - Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); - if (value.size < RegSize_64) - m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); - else - m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); - EmitXor(to_reg, from_reg, temp_value); -} - -void CodeGenerator::EmitTest(HostReg to_reg, const Value& value) -{ - Assert(value.IsConstant() || value.IsInHostRegister()); - - // if it's in a host register already, this is easy - if (value.IsInHostRegister()) - { - if (value.size < RegSize_64) - m_emit->tst(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - else - m_emit->tst(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - - return; - } - - // do we need temporary storage for the constant, if it won't fit in an immediate? - if (CanFitInBitwiseImmediate(value)) - { - if (value.size < RegSize_64) - m_emit->tst(GetHostReg32(to_reg), s64(value.constant_value)); - else - m_emit->tst(GetHostReg64(to_reg), s64(value.constant_value)); - - return; - } - - // need a temporary - Assert(to_reg != RSCRATCH); - Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); - if (value.size < RegSize_64) - m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); - else - m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); - EmitTest(to_reg, temp_value); -} - -void CodeGenerator::EmitNot(HostReg to_reg, RegSize size) -{ - switch (size) - { - case RegSize_8: - m_emit->mvn(GetHostReg8(to_reg), GetHostReg8(to_reg)); - m_emit->and_(GetHostReg8(to_reg), GetHostReg8(to_reg), 0xFF); - break; - - case RegSize_16: - m_emit->mvn(GetHostReg16(to_reg), GetHostReg16(to_reg)); - m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); - break; - - case RegSize_32: - m_emit->mvn(GetHostReg32(to_reg), GetHostReg32(to_reg)); - break; - - case RegSize_64: - m_emit->mvn(GetHostReg64(to_reg), GetHostReg64(to_reg)); - break; - - default: - break; - } -} - -void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition) -{ - if (condition == Condition::Always) - { - if (to_size < RegSize_64) - m_emit->Mov(GetHostReg32(to_reg), 1); - else - m_emit->Mov(GetHostReg64(to_reg), 1); - - return; - } - - a64::Condition acond; - switch (condition) - { - case Condition::NotEqual: - acond = a64::ne; - break; - - case Condition::Equal: - acond = a64::eq; - break; - - case Condition::Overflow: - acond = a64::vs; - break; - - case Condition::Greater: - acond = a64::gt; - break; - - case Condition::GreaterEqual: - acond = a64::ge; - break; - - case Condition::Less: - acond = a64::lt; - break; - - case Condition::LessEqual: - acond = a64::le; - break; - - case Condition::Negative: - acond = a64::mi; - break; - - case Condition::PositiveOrZero: - acond = a64::pl; - break; - - case Condition::Above: - acond = a64::hi; - break; - - case Condition::AboveEqual: - acond = a64::cs; - break; - - case Condition::Below: - acond = a64::cc; - break; - - case Condition::BelowEqual: - acond = a64::ls; - break; - - default: - UnreachableCode(); - return; - } - - if (to_size < RegSize_64) - m_emit->cset(GetHostReg32(to_reg), acond); - else - m_emit->cset(GetHostReg64(to_reg), acond); -} - -u32 CodeGenerator::PrepareStackForCall() -{ - m_register_cache.PushCallerSavedRegisters(); - return 0; -} - -void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) -{ - m_register_cache.PopCallerSavedRegisters(); -} - -void CodeGenerator::EmitCall(const void* ptr) -{ - armEmitCall(m_emit, ptr, false); -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3) -{ - if (return_value) - m_register_cache.DiscardHostReg(return_value->GetHostRegister()); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - EmitCopyValue(RARG3, arg3); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3, const Value& arg4) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - EmitCopyValue(RARG3, arg3); - EmitCopyValue(RARG4, arg4); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position) -{ - const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8)); - m_emit->str(GetHostReg64(reg), addr); -} - -void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position) -{ - const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - ((position + 1) * 8)); - m_emit->stp(GetHostReg64(reg2), GetHostReg64(reg), addr); -} - -void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position) -{ - const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8)); - m_emit->ldr(GetHostReg64(reg), addr); -} - -void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position) -{ - const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8)); - m_emit->ldp(GetHostReg64(reg2), GetHostReg64(reg), addr); -} - -void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset) -{ - const s64 s_offset = static_cast(ZeroExtend64(offset)); - - switch (guest_size) - { - case RegSize_8: - m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_16: - m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_32: - m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_64: - m_emit->Ldr(GetHostReg64(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value) -{ - const Value hr_value = GetValueInHostRegister(value); - const s64 s_offset = static_cast(ZeroExtend64(offset)); - - switch (value.size) - { - case RegSize_8: - m_emit->Strb(GetHostReg8(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_16: - m_emit->Strh(GetHostReg16(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_32: - m_emit->Str(GetHostReg32(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - case RegSize_64: - m_emit->Str(GetHostReg64(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) -{ - const s64 s_offset = static_cast(ZeroExtend64(offset)); - const a64::MemOperand o_offset(GetCPUPtrReg(), s_offset); - - Value real_value; - if (value.IsInHostRegister()) - { - real_value.SetHostReg(&m_register_cache, value.host_reg, value.size); - } - else - { - // do we need temporary storage for the constant, if it won't fit in an immediate? - Assert(value.IsConstant()); - const s64 constant_value = value.GetS64ConstantValue(); - if (!a64::Assembler::IsImmAddSub(constant_value)) - { - real_value.SetHostReg(&m_register_cache, RARG4, value.size); - EmitCopyValue(real_value.host_reg, value); - } - else - { - real_value = value; - } - } - - // Don't need to mask here because we're storing back to memory. - switch (value.size) - { - case RegSize_8: - { - m_emit->Ldrb(GetHostReg8(RSCRATCH), o_offset); - if (real_value.IsConstant()) - m_emit->Add(GetHostReg8(RSCRATCH), GetHostReg8(RSCRATCH), real_value.GetS64ConstantValue()); - else - m_emit->Add(GetHostReg8(RSCRATCH), GetHostReg8(RSCRATCH), GetHostReg8(real_value)); - m_emit->Strb(GetHostReg8(RSCRATCH), o_offset); - } - break; - - case RegSize_16: - { - m_emit->Ldrh(GetHostReg16(RSCRATCH), o_offset); - if (real_value.IsConstant()) - m_emit->Add(GetHostReg16(RSCRATCH), GetHostReg16(RSCRATCH), real_value.GetS64ConstantValue()); - else - m_emit->Add(GetHostReg16(RSCRATCH), GetHostReg16(RSCRATCH), GetHostReg16(real_value)); - m_emit->Strh(GetHostReg16(RSCRATCH), o_offset); - } - break; - - case RegSize_32: - { - m_emit->Ldr(GetHostReg32(RSCRATCH), o_offset); - if (real_value.IsConstant()) - m_emit->Add(GetHostReg32(RSCRATCH), GetHostReg32(RSCRATCH), real_value.GetS64ConstantValue()); - else - m_emit->Add(GetHostReg32(RSCRATCH), GetHostReg32(RSCRATCH), GetHostReg32(real_value)); - m_emit->Str(GetHostReg32(RSCRATCH), o_offset); - } - break; - - case RegSize_64: - { - m_emit->Ldr(GetHostReg64(RSCRATCH), o_offset); - if (real_value.IsConstant()) - m_emit->Add(GetHostReg64(RSCRATCH), GetHostReg64(RSCRATCH), s64(real_value.constant_value)); - else - m_emit->Add(GetHostReg64(RSCRATCH), GetHostReg64(RSCRATCH), GetHostReg64(real_value)); - m_emit->Str(GetHostReg64(RSCRATCH), o_offset); - } - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) -{ - HostReg address_reg; - a64::MemOperand actual_address; - if (address.IsConstant()) - { - m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value); - address_reg = result.host_reg; - } - else - { - address_reg = address.host_reg; - } - - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) - { - m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); - } - - const a64::XRegister membase = - (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, Value& result) -{ - HostReg address_reg; - if (address.IsConstant()) - { - m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value); - address_reg = result.host_reg; - } - else - { - address_reg = address.host_reg; - } - - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) - { - m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); - } - - const a64::XRegister membase = - (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - - m_register_cache.InhibitAllocation(); - - void* host_pc = GetCurrentNearCodePointer(); - - switch (size) - { - case RegSize_8: - m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } - - const u32 host_code_size = - static_cast(static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc))); - - // generate slowmem fallback - const void* host_slowmem_pc = GetCurrentFarCodePointer(); - SwitchToFarCode(); - - // we add the ticks *after* the add here, since we counted incorrectly, then correct for it below - DebugAssert(m_delayed_cycles_add > 0); - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast(m_delayed_cycles_add))); - m_delayed_cycles_add += Bus::RAM_READ_TICKS; - - EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true); - - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); - - // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); - - SwitchToNearCode(); - m_register_cache.UninhibitAllocation(); - - CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); -} - -void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, Value& result, bool in_far_code) -{ - if (g_settings.cpu_recompiler_memory_exceptions) - { - // NOTE: This can leave junk in the upper bits - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address); - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.PushState(); - - a64::Label load_okay; - m_emit->Tbz(GetHostReg64(result.host_reg), 63, &load_okay); - EmitBranch(GetCurrentFarCodePointer()); - m_emit->Bind(&load_okay); - - // load exception path - if (!in_far_code) - SwitchToFarCode(); - - // cause_bits = (-result << 2) | BD | cop_n - m_emit->neg(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg)); - m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); - EmitOr(result.host_reg, result.host_reg, - Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( - static_cast(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); - - EmitExceptionExit(); - - if (!in_far_code) - SwitchToNearCode(); - - m_register_cache.PopState(); - } - else - { - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryByte, address); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryHalfWord, address); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryWord, address); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value) -{ - Value value_in_hr = GetValueInHostRegister(value); - - HostReg address_reg; - if (address.IsConstant()) - { - m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value); - address_reg = RSCRATCH; - } - else - { - address_reg = address.host_reg; - } - - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) - { - m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); - } - - const a64::XRegister membase = - (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - - // fastmem - void* host_pc = GetCurrentNearCodePointer(); - - m_register_cache.InhibitAllocation(); - - switch (size) - { - case RegSize_8: - m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - case RegSize_16: - m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - case RegSize_32: - m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); - break; - - default: - UnreachableCode(); - break; - } - - const u32 host_code_size = - static_cast(static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc))); - - // generate slowmem fallback - void* host_slowmem_pc = GetCurrentFarCodePointer(); - SwitchToFarCode(); - - DebugAssert(m_delayed_cycles_add > 0); - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast(m_delayed_cycles_add))); - - EmitStoreGuestMemorySlowmem(instruction, info, address, size, value_in_hr, true); - - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); - - // return to the block code - EmitBranch(GetCurrentNearCodePointer(), false); - - SwitchToNearCode(); - m_register_cache.UninhibitAllocation(); - - CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); -} - -void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value, - bool in_far_code) -{ - Value value_in_hr = GetValueInHostRegister(value); - - if (g_settings.cpu_recompiler_memory_exceptions) - { - Assert(!in_far_code); - - Value result = m_register_cache.AllocateScratch(RegSize_32); - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value_in_hr); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value_in_hr); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value_in_hr); - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.PushState(); - - a64::Label store_okay; - m_emit->Cbz(GetHostReg64(result.host_reg), &store_okay); - EmitBranch(GetCurrentFarCodePointer()); - m_emit->Bind(&store_okay); - - // store exception path - if (!in_far_code) - SwitchToFarCode(); - - // cause_bits = (result << 2) | BD | cop_n - m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); - EmitOr(result.host_reg, result.host_reg, - Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( - static_cast(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); - - if (!in_far_code) - EmitExceptionExit(); - SwitchToNearCode(); - - m_register_cache.PopState(); - } - else - { - switch (size) - { - case RegSize_8: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value_in_hr); - break; - - case RegSize_16: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value_in_hr); - break; - - case RegSize_32: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value_in_hr); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::EmitUpdateFastmemBase() -{ - m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base))); -} - -void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi) -{ - DEV_LOG("Backpatching {} (guest PC 0x{:08X}) to slowmem at {}", host_pc, lbi.guest_pc, lbi.thunk_address); - - // check jump distance - const s64 jump_distance = - static_cast(reinterpret_cast(lbi.thunk_address) - reinterpret_cast(host_pc)); - Assert(Common::IsAligned(jump_distance, 4)); - Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2)); - - // turn it into a jump to the slowmem handler - vixl::aarch64::MacroAssembler emit(static_cast(host_pc), lbi.code_size, a64::PositionDependentCode); - emit.b(jump_distance >> 2); - - const s32 nops = (static_cast(lbi.code_size) - static_cast(emit.GetCursorOffset())) / 4; - Assert(nops >= 0); - for (s32 i = 0; i < nops; i++) - emit.nop(); - - MemMap::FlushInstructionCache(host_pc, lbi.code_size); -} - -void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) -{ - EmitLoadGlobalAddress(RSCRATCH, ptr); - switch (size) - { - case RegSize_8: - m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); - break; - - case RegSize_16: - m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); - break; - - case RegSize_32: - m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) -{ - Value value_in_hr = GetValueInHostRegister(value); - - EmitLoadGlobalAddress(RSCRATCH, ptr); - switch (value.size) - { - case RegSize_8: - m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); - break; - - case RegSize_16: - m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); - break; - - case RegSize_32: - m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitFlushInterpreterLoadDelay() -{ - Value reg = m_register_cache.AllocateScratch(RegSize_32); - Value value = m_register_cache.AllocateScratch(RegSize_32); - - const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); - const a64::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); - const a64::MemOperand regs_base(GetCPUPtrReg(), OFFSETOF(State, regs.r[0])); - - a64::Label skip_flush; - - // reg = load_delay_reg - m_emit->Ldrb(GetHostReg32(reg), load_delay_reg); - - // if load_delay_reg == Reg::count goto skip_flush - m_emit->Cmp(GetHostReg32(reg), static_cast(Reg::count)); - m_emit->B(a64::eq, &skip_flush); - - // value = load_delay_value - m_emit->Ldr(GetHostReg32(value), load_delay_value); - - // reg = offset(r[0] + reg << 2) - m_emit->Lsl(GetHostReg32(reg), GetHostReg32(reg), 2); - m_emit->Add(GetHostReg32(reg), GetHostReg32(reg), OFFSETOF(State, regs.r[0])); - - // r[reg] = value - m_emit->Str(GetHostReg32(value), a64::MemOperand(GetCPUPtrReg(), GetHostReg32(reg))); - - // load_delay_reg = Reg::count - m_emit->Mov(GetHostReg32(reg), static_cast(Reg::count)); - m_emit->Strb(GetHostReg32(reg), load_delay_reg); - - m_emit->Bind(&skip_flush); -} - -void CodeGenerator::EmitMoveNextInterpreterLoadDelay() -{ - Value reg = m_register_cache.AllocateScratch(RegSize_32); - Value value = m_register_cache.AllocateScratch(RegSize_32); - - const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); - const a64::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); - const a64::MemOperand next_load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_reg)); - const a64::MemOperand next_load_delay_value(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_value)); - - m_emit->Ldrb(GetHostReg32(reg), next_load_delay_reg); - m_emit->Ldr(GetHostReg32(value), next_load_delay_value); - m_emit->Strb(GetHostReg32(reg), load_delay_reg); - m_emit->Str(GetHostReg32(value), load_delay_value); - m_emit->Mov(GetHostReg32(reg), static_cast(Reg::count)); - m_emit->Strb(GetHostReg32(reg), next_load_delay_reg); -} - -void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) -{ - if (!m_load_delay_dirty) - return; - - const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); - Value temp = m_register_cache.AllocateScratch(RegSize_8); - - a64::Label skip_cancel; - - // if load_delay_reg != reg goto skip_cancel - m_emit->Ldrb(GetHostReg8(temp), load_delay_reg); - m_emit->Cmp(GetHostReg8(temp), static_cast(reg)); - m_emit->B(a64::ne, &skip_cancel); - - // load_delay_reg = Reg::count - m_emit->Mov(GetHostReg8(temp), static_cast(Reg::count)); - m_emit->Strb(GetHostReg8(temp), load_delay_reg); - - m_emit->Bind(&skip_cancel); -} - -void CodeGenerator::EmitICacheCheckAndUpdate() -{ - if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) - { - if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) - { - armEmitFarLoad(m_emit, RWARG2, GetFetchMemoryAccessTimePtr()); - m_emit->Ldr(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - m_emit->Mov(RWARG3, m_block->size); - m_emit->Mul(RWARG2, RWARG2, RWARG3); - m_emit->Add(RWARG1, RWARG1, RWARG2); - m_emit->Str(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - } - else - { - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks))); - } - } - else if (m_block->icache_line_count > 0) - { - const auto& ticks_reg = a64::w0; - const auto& current_tag_reg = a64::w1; - const auto& existing_tag_reg = a64::w2; - - VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; - m_emit->Ldr(ticks_reg, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - m_emit->Mov(current_tag_reg, current_pc); - - for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) - { - const TickCount fill_ticks = GetICacheFillTicks(current_pc); - if (fill_ticks <= 0) - continue; - - const u32 line = GetICacheLine(current_pc); - const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); - - a64::Label cache_hit; - m_emit->Ldr(existing_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset)); - m_emit->Cmp(existing_tag_reg, current_tag_reg); - m_emit->B(&cache_hit, a64::eq); - - m_emit->Str(current_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset)); - EmitAdd(0, 0, Value::FromConstantU32(static_cast(fill_ticks)), false); - m_emit->Bind(&cache_hit); - - if (i != (m_block->icache_line_count - 1)) - m_emit->Add(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE); - } - - m_emit->Str(ticks_reg, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - } -} - -void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) -{ - // store it first to reduce code size, because we can offset - armMoveAddressToReg(m_emit, RXARG1, ram_ptr); - armMoveAddressToReg(m_emit, RXARG2, shadow_ptr); - - bool first = true; - u32 offset = 0; - a64::Label block_changed; - - while (size >= 16) - { - const a64::VRegister vtmp = a64::v2.V4S(); - const a64::VRegister dst = first ? a64::v0.V4S() : a64::v1.V4S(); - m_emit->ldr(dst, a64::MemOperand(RXARG1, offset)); - m_emit->ldr(vtmp, a64::MemOperand(RXARG2, offset)); - m_emit->cmeq(dst, dst, vtmp); - if (!first) - m_emit->and_(a64::v0.V16B(), a64::v0.V16B(), dst.V16B()); - else - first = false; - - offset += 16; - size -= 16; - } - - if (!first) - { - // TODO: make sure this doesn't choke on ffffffff - m_emit->uminv(a64::s0, a64::v0.V4S()); - m_emit->fcmp(a64::s0, 0.0); - m_emit->b(&block_changed, a64::eq); - } - - while (size >= 8) - { - m_emit->ldr(RXARG3, a64::MemOperand(RXARG1, offset)); - m_emit->ldr(RXSCRATCH, a64::MemOperand(RXARG2, offset)); - m_emit->cmp(RXARG3, RXSCRATCH); - m_emit->b(&block_changed, a64::ne); - offset += 8; - size -= 8; - } - - while (size >= 4) - { - m_emit->ldr(RWARG3, a64::MemOperand(RXARG1, offset)); - m_emit->ldr(RWSCRATCH, a64::MemOperand(RXARG2, offset)); - m_emit->cmp(RWARG3, RWSCRATCH); - m_emit->b(&block_changed, a64::ne); - offset += 4; - size -= 4; - } - - DebugAssert(size == 0); - - a64::Label block_unchanged; - m_emit->b(&block_unchanged); - m_emit->bind(&block_changed); - armEmitJmp(m_emit, CodeCache::g_discard_and_recompile_block, false); - m_emit->bind(&block_unchanged); -} - -void CodeGenerator::EmitStallUntilGTEComplete() -{ - static_assert(OFFSETOF(State, pending_ticks) + sizeof(u32) == OFFSETOF(State, gte_completion_tick)); - m_emit->ldp(GetHostReg32(RARG1), GetHostReg32(RARG2), - a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); - - if (m_delayed_cycles_add > 0) - { - m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), static_cast(m_delayed_cycles_add)); - m_delayed_cycles_add = 0; - } - - m_emit->cmp(GetHostReg32(RARG2), GetHostReg32(RARG1)); - m_emit->csel(GetHostReg32(RARG1), GetHostReg32(RARG2), GetHostReg32(RARG1), a64::Condition::hi); - m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); -} - -void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) -{ - const s64 jump_distance = - static_cast(reinterpret_cast(address) - reinterpret_cast(GetCurrentCodePointer())); - Assert(Common::IsAligned(jump_distance, 4)); - if (a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2)) - { - m_emit->b(jump_distance >> 2); - return; - } - - Assert(allow_scratch); - - m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast(address)); - m_emit->br(GetHostReg64(RSCRATCH)); -} - -void CodeGenerator::EmitBranch(LabelType* label) -{ - m_emit->B(label); -} - -static a64::Condition TranslateCondition(Condition condition, bool invert) -{ - switch (condition) - { - case Condition::Always: - return a64::nv; - - case Condition::NotEqual: - case Condition::NotZero: - return invert ? a64::eq : a64::ne; - - case Condition::Equal: - case Condition::Zero: - return invert ? a64::ne : a64::eq; - - case Condition::Overflow: - return invert ? a64::vc : a64::vs; - - case Condition::Greater: - return invert ? a64::le : a64::gt; - - case Condition::GreaterEqual: - return invert ? a64::lt : a64::ge; - - case Condition::Less: - return invert ? a64::ge : a64::lt; - - case Condition::LessEqual: - return invert ? a64::gt : a64::le; - - case Condition::Negative: - return invert ? a64::pl : a64::mi; - - case Condition::PositiveOrZero: - return invert ? a64::mi : a64::pl; - - case Condition::Above: - return invert ? a64::ls : a64::hi; - - case Condition::AboveEqual: - return invert ? a64::cc : a64::cs; - - case Condition::Below: - return invert ? a64::cs : a64::cc; - - case Condition::BelowEqual: - return invert ? a64::hi : a64::ls; - - default: - UnreachableCode(); - return a64::nv; - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, - LabelType* label) -{ - switch (condition) - { - case Condition::NotEqual: - case Condition::Equal: - case Condition::Overflow: - case Condition::Greater: - case Condition::GreaterEqual: - case Condition::LessEqual: - case Condition::Less: - case Condition::Above: - case Condition::AboveEqual: - case Condition::Below: - case Condition::BelowEqual: - Panic("Needs a comparison value"); - return; - - case Condition::Negative: - case Condition::PositiveOrZero: - { - switch (size) - { - case RegSize_8: - m_emit->tst(GetHostReg8(value), GetHostReg8(value)); - break; - case RegSize_16: - m_emit->tst(GetHostReg16(value), GetHostReg16(value)); - break; - case RegSize_32: - m_emit->tst(GetHostReg32(value), GetHostReg32(value)); - break; - case RegSize_64: - m_emit->tst(GetHostReg64(value), GetHostReg64(value)); - break; - default: - UnreachableCode(); - break; - } - - EmitConditionalBranch(condition, invert, label); - return; - } - - case Condition::NotZero: - { - switch (size) - { - case RegSize_8: - m_emit->cbnz(GetHostReg8(value), label); - break; - case RegSize_16: - m_emit->cbz(GetHostReg16(value), label); - break; - case RegSize_32: - m_emit->cbnz(GetHostReg32(value), label); - break; - case RegSize_64: - m_emit->cbnz(GetHostReg64(value), label); - break; - default: - UnreachableCode(); - break; - } - - return; - } - - case Condition::Zero: - { - switch (size) - { - case RegSize_8: - m_emit->cbz(GetHostReg8(value), label); - break; - case RegSize_16: - m_emit->cbz(GetHostReg16(value), label); - break; - case RegSize_32: - m_emit->cbz(GetHostReg32(value), label); - break; - case RegSize_64: - m_emit->cbz(GetHostReg64(value), label); - break; - default: - UnreachableCode(); - break; - } - - return; - } - - case Condition::Always: - m_emit->b(label); - return; - - default: - UnreachableCode(); - return; - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, - LabelType* label) -{ - switch (condition) - { - case Condition::NotEqual: - case Condition::Equal: - case Condition::Overflow: - case Condition::Greater: - case Condition::GreaterEqual: - case Condition::LessEqual: - case Condition::Less: - case Condition::Above: - case Condition::AboveEqual: - case Condition::Below: - case Condition::BelowEqual: - { - EmitCmp(lhs, rhs); - EmitConditionalBranch(condition, invert, label); - return; - } - - case Condition::Negative: - case Condition::PositiveOrZero: - case Condition::NotZero: - case Condition::Zero: - { - Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0)); - EmitConditionalBranch(condition, invert, lhs, rhs.size, label); - return; - } - - case Condition::Always: - m_emit->b(label); - return; - - default: - UnreachableCode(); - return; - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, LabelType* label) -{ - if (condition == Condition::Always) - m_emit->b(label); - else - m_emit->b(label, TranslateCondition(condition, invert)); -} - -void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label) -{ - switch (size) - { - case RegSize_8: - case RegSize_16: - case RegSize_32: - m_emit->tbz(GetHostReg32(reg), bit, label); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitBindLabel(LabelType* label) -{ - m_emit->Bind(label); -} - -void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr) -{ - const void* current_code_ptr_page = reinterpret_cast( - reinterpret_cast(GetCurrentCodePointer()) & ~static_cast(0xFFF)); - const void* ptr_page = - reinterpret_cast(reinterpret_cast(ptr) & ~static_cast(0xFFF)); - const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; - const u32 page_offset = static_cast(reinterpret_cast(ptr) & 0xFFFu); - if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64)) - { - m_emit->adrp(GetHostReg64(host_reg), page_displacement); - m_emit->orr(GetHostReg64(host_reg), GetHostReg64(host_reg), page_offset); - } - else - { - m_emit->Mov(GetHostReg64(host_reg), reinterpret_cast(ptr)); - } -} - -} // namespace CPU::Recompiler - -#endif // CPU_ARCH_ARM64 diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp deleted file mode 100644 index 2661e1d13..000000000 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ /dev/null @@ -1,254 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "cpu_core.h" -#include "cpu_core_private.h" -#include "cpu_recompiler_code_generator.h" -#include "settings.h" - -#include "common/log.h" - -LOG_CHANNEL(Recompiler); - -namespace CPU::Recompiler { - -void CodeGenerator::EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg) -{ - EmitLoadCPUStructField(host_reg, RegSize_32, State::GPRRegisterOffset(static_cast(guest_reg))); -} - -void CodeGenerator::EmitStoreGuestRegister(Reg guest_reg, const Value& value) -{ - DebugAssert(value.size == RegSize_32); - EmitStoreCPUStructField(State::GPRRegisterOffset(static_cast(guest_reg)), value); -} - -void CodeGenerator::EmitStoreInterpreterLoadDelay(Reg reg, const Value& value) -{ - DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); - EmitStoreCPUStructField(OFFSETOF(State, load_delay_reg), Value::FromConstantU8(static_cast(reg))); - EmitStoreCPUStructField(OFFSETOF(State, load_delay_value), value); - m_load_delay_dirty = true; -} - -Value CodeGenerator::EmitLoadGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, const SpeculativeValue& address_spec, RegSize size) -{ - if (address.IsConstant() && !SpeculativeIsCacheIsolated()) - { - TickCount read_ticks; - void* ptr = GetDirectReadMemoryPointer( - static_cast(address.constant_value), - (size == RegSize_8) ? MemoryAccessSize::Byte : - ((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word), - &read_ticks); - if (ptr) - { - Value result = m_register_cache.AllocateScratch(size); - - // TODO: mask off... - if (CodeCache::IsUsingFastmem() && Bus::IsRAMAddress(static_cast(address.constant_value))) - { - // have to mask away the high bits for mirrors, since we don't map them in fastmem - EmitLoadGuestRAMFastmem(Value::FromConstantU32(static_cast(address.constant_value) & Bus::g_ram_mask), - size, result); - } - else - { - EmitLoadGlobal(result.GetHostRegister(), size, ptr); - } - - m_delayed_cycles_add += read_ticks; - return result; - } - } - - Value result = m_register_cache.AllocateScratch(HostPointerSize); - - const bool use_fastmem = !g_settings.cpu_recompiler_memory_exceptions && - (address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) && - !SpeculativeIsCacheIsolated(); - if (address_spec) - { - if (!use_fastmem) - { - DEBUG_LOG("Non-constant load at 0x{:08X}, speculative address 0x{:08X}, using fastmem = {}", info.pc, - *address_spec, use_fastmem ? "yes" : "no"); - } - } - else - { - DEBUG_LOG("Non-constant load at 0x{:08X}, speculative address UNKNOWN, using fastmem = {}", info.pc, - use_fastmem ? "yes" : "no"); - } - - if (CodeCache::IsUsingFastmem() && use_fastmem) - { - EmitLoadGuestMemoryFastmem(instruction, info, address, size, result); - } - else - { - AddPendingCycles(true); - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, false); - } - - // Downcast to ignore upper 56/48/32 bits. This should be a noop. - if (result.size != size) - { - switch (size) - { - case RegSize_8: - ConvertValueSizeInPlace(&result, RegSize_8, false); - break; - - case RegSize_16: - ConvertValueSizeInPlace(&result, RegSize_16, false); - break; - - case RegSize_32: - ConvertValueSizeInPlace(&result, RegSize_32, false); - break; - - default: - UnreachableCode(); - break; - } - } - - return result; -} - -void CodeGenerator::EmitStoreGuestMemory(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, const SpeculativeValue& address_spec, RegSize size, - const Value& value) -{ - if (address.IsConstant() && !SpeculativeIsCacheIsolated()) - { - void* ptr = GetDirectWriteMemoryPointer( - static_cast(address.constant_value), - (size == RegSize_8) ? MemoryAccessSize::Byte : - ((size == RegSize_16) ? MemoryAccessSize::HalfWord : MemoryAccessSize::Word)); - if (ptr) - { - if (value.size != size) - EmitStoreGlobal(ptr, value.ViewAsSize(size)); - else - EmitStoreGlobal(ptr, value); - - return; - } - } - - const bool use_fastmem = !g_settings.cpu_recompiler_memory_exceptions && - (address_spec ? Bus::CanUseFastmemForAddress(*address_spec) : true) && - !SpeculativeIsCacheIsolated(); - if (address_spec) - { - if (!use_fastmem) - { - DEBUG_LOG("Non-constant store at 0x{:08X}, speculative address 0x{:08X}, using fastmem = {}", info.pc, - *address_spec, use_fastmem ? "yes" : "no"); - } - } - else - { - DEBUG_LOG("Non-constant store at 0x{:08X}, speculative address UNKNOWN, using fastmem = {}", info.pc, - use_fastmem ? "yes" : "no"); - } - - if (CodeCache::IsUsingFastmem() && use_fastmem) - { - EmitStoreGuestMemoryFastmem(instruction, info, address, size, value); - } - else - { - AddPendingCycles(true); - m_register_cache.FlushCallerSavedGuestRegisters(true, true); - EmitStoreGuestMemorySlowmem(instruction, info, address, size, value, false); - } -} - -#if 0 // Not used - -void CodeGenerator::EmitICacheCheckAndUpdate() -{ - Value temp = m_register_cache.AllocateScratch(RegSize_32); - - if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) - { - EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks)); - EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), - Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks)), false); - EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), temp); - } - else - { - // cached path - Value temp2 = m_register_cache.AllocateScratch(RegSize_32); - - m_register_cache.InhibitAllocation(); - - VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; - for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) - { - const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc); - const TickCount fill_ticks = GetICacheFillTicks(current_pc); - if (fill_ticks <= 0) - continue; - - const u32 line = GetICacheLine(current_pc); - const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); - LabelType cache_hit; - - EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, offset); - EmitCopyValue(temp2.GetHostRegister(), Value::FromConstantU32(current_pc)); - EmitCmp(temp2.GetHostRegister(), temp); - EmitConditionalBranch(Condition::Equal, false, temp.GetHostRegister(), temp2, &cache_hit); - - EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks)); - EmitStoreCPUStructField(offset, temp2); - EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(static_cast(fill_ticks)), - false); - EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), temp); - EmitBindLabel(&cache_hit); - } - - m_register_cache.UninhibitAllocation(); - } -} - -#endif - -#if 0 // Not Used - -void CodeGenerator::EmitStallUntilGTEComplete() -{ - Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32); - Value gte_completion_tick = m_register_cache.AllocateScratch(RegSize_32); - EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks)); - EmitLoadCPUStructField(gte_completion_tick.GetHostRegister(), RegSize_32, OFFSETOF(State, gte_completion_tick)); - - // commit cycles here, should always be nonzero - if (m_delayed_cycles_add > 0) - { - EmitAdd(pending_ticks.GetHostRegister(), pending_ticks.GetHostRegister(), - Value::FromConstantU32(m_delayed_cycles_add), false); - m_delayed_cycles_add = 0; - } - - LabelType gte_done; - EmitSub(gte_completion_tick.GetHostRegister(), gte_completion_tick.GetHostRegister(), pending_ticks, true); - EmitConditionalBranch(Condition::Below, false, >e_done); - - // add stall ticks - EmitAdd(pending_ticks.GetHostRegister(), pending_ticks.GetHostRegister(), gte_completion_tick, false); - - // store new ticks - EmitBindLabel(>e_done); - EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), pending_ticks); -} - -#endif - -} // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp deleted file mode 100644 index 4ceb71cca..000000000 --- a/src/core/cpu_recompiler_code_generator_x64.cpp +++ /dev/null @@ -1,3210 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "cpu_code_cache_private.h" -#include "cpu_core.h" -#include "cpu_core_private.h" -#include "cpu_recompiler_code_generator.h" -#include "cpu_recompiler_thunks.h" -#include "settings.h" -#include "timing_event.h" - -#include "common/align.h" -#include "common/assert.h" -#include "common/log.h" -#include "common/memmap.h" - -#ifdef CPU_ARCH_X64 - -LOG_CHANNEL(Recompiler); - -#ifdef ENABLE_HOST_DISASSEMBLY -#include "Zycore/Format.h" -#include "Zycore/Status.h" -#include "Zydis/Zydis.h" -#endif - -bool CPU::Recompiler::IsCallerSavedRegister(u32 id) -{ -#ifdef _WIN32 - // The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile. - return (id <= 2 || (id >= 8 && id <= 11)); -#else - // rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. - return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11)); -#endif -} - -u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) -{ - using namespace Xbyak; - -#define PTR(x) (cg->rbp + (((u8*)(x)) - ((u8*)&g_state))) - -#ifdef _WIN32 - // Shadow space for Win32 - constexpr u32 stack_size = 32 + 8; -#else - // Stack still needs to be aligned - constexpr u32 stack_size = 8; -#endif - - DebugAssert(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler || - g_settings.cpu_execution_mode == CPUExecutionMode::NewRec); - - CodeGenerator acg(code_size, static_cast(code)); - CodeGenerator* cg = &acg; - - Label dispatch; - Label exit_recompiler; - - g_enter_recompiler = reinterpret_cast(const_cast(cg->getCurr())); - { - // Don't need to save registers, because we fastjmp out when execution is interrupted. - cg->sub(cg->rsp, stack_size); - - // CPU state pointer - cg->lea(cg->rbp, cg->qword[cg->rip + &g_state]); - - // newrec preloads fastmem base - if (g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler && CodeCache::IsUsingFastmem()) - cg->mov(cg->rbx, cg->qword[PTR(&g_state.fastmem_base)]); - - // Fall through to event dispatcher - } - - // check events then for frame done - g_check_events_and_dispatch = cg->getCurr(); - { - Label skip_event_check; - cg->mov(RWARG1, cg->dword[PTR(&g_state.pending_ticks)]); - cg->cmp(RWARG1, cg->dword[PTR(&g_state.downcount)]); - cg->jl(skip_event_check); - - g_run_events_and_dispatch = cg->getCurr(); - cg->call(reinterpret_cast(&TimingEvents::RunEvents)); - - cg->L(skip_event_check); - } - - // TODO: align? - g_dispatcher = cg->getCurr(); - { - cg->L(dispatch); - - // rcx <- s_fast_map[pc >> 16] - cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); - cg->lea(RXARG2, cg->dword[PTR(g_code_lut.data())]); - cg->mov(RWARG3, RWARG1); - cg->shr(RWARG3, 16); - cg->mov(RXARG2, cg->qword[RXARG2 + RXARG3 * 8]); - - // call(rcx[pc * 2]) (fast_map[pc >> 2]) - cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]); - } - - g_compile_or_revalidate_block = cg->getCurr(); - { - cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); - cg->call(&CompileOrRevalidateBlock); - cg->jmp(dispatch); - } - - g_discard_and_recompile_block = cg->getCurr(); - { - cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); - cg->call(&DiscardAndRecompileBlock); - cg->jmp(dispatch); - } - - g_interpret_block = cg->getCurr(); - { - cg->call(CodeCache::GetInterpretUncachedBlockFunction()); - cg->jmp(dispatch); - } - -#undef PTR - - return static_cast(cg->getSize()); -} - -u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) -{ - u8* ptr = static_cast(code); - *(ptr++) = 0xE9; // jmp - - const ptrdiff_t disp = (reinterpret_cast(dst) - reinterpret_cast(code)) - 5; - DebugAssert(disp >= static_cast(std::numeric_limits::min()) && - disp <= static_cast(std::numeric_limits::max())); - - const s32 disp32 = static_cast(disp); - std::memcpy(ptr, &disp32, sizeof(disp32)); - return 5; -} - -#ifdef ENABLE_HOST_DISASSEMBLY - -static ZydisFormatterFunc s_old_print_address; - -static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter, ZydisFormatterBuffer* buffer, - ZydisFormatterContext* context) -{ - using namespace CPU; - - ZyanU64 address; - ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand, context->runtime_address, &address)); - - char buf[128]; - u32 len = 0; - -#define A(x) static_cast(reinterpret_cast(x)) - - if (address >= A(Bus::g_ram) && address < A(Bus::g_ram + Bus::g_ram_size)) - { - len = snprintf(buf, sizeof(buf), "g_ram+0x%08X", static_cast(address - A(Bus::g_ram))); - } - else if (address >= A(&g_state.regs) && - address < A(reinterpret_cast(&g_state.regs) + sizeof(CPU::Registers))) - { - len = snprintf(buf, sizeof(buf), "g_state.regs.%s", - GetRegName(static_cast(((address - A(&g_state.regs.r[0])) / 4u)))); - } - else if (address >= A(&g_state.cop0_regs) && - address < A(reinterpret_cast(&g_state.cop0_regs) + sizeof(CPU::Cop0Registers))) - { - for (const DebuggerRegisterListEntry& rle : g_debugger_register_list) - { - if (address == static_cast(reinterpret_cast(rle.value_ptr))) - { - len = snprintf(buf, sizeof(buf), "g_state.cop0_regs.%s", rle.name); - break; - } - } - } - else if (address >= A(&g_state.gte_regs) && - address < A(reinterpret_cast(&g_state.gte_regs) + sizeof(GTE::Regs))) - { - for (const DebuggerRegisterListEntry& rle : g_debugger_register_list) - { - if (address == static_cast(reinterpret_cast(rle.value_ptr))) - { - len = snprintf(buf, sizeof(buf), "g_state.gte_regs.%s", rle.name); - break; - } - } - } - else if (address == A(&g_state.load_delay_reg)) - { - len = snprintf(buf, sizeof(buf), "g_state.load_delay_reg"); - } - else if (address == A(&g_state.next_load_delay_reg)) - { - len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_reg"); - } - else if (address == A(&g_state.load_delay_value)) - { - len = snprintf(buf, sizeof(buf), "g_state.load_delay_value"); - } - else if (address == A(&g_state.next_load_delay_value)) - { - len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_value"); - } - else if (address == A(&g_state.pending_ticks)) - { - len = snprintf(buf, sizeof(buf), "g_state.pending_ticks"); - } - else if (address == A(&g_state.downcount)) - { - len = snprintf(buf, sizeof(buf), "g_state.downcount"); - } - -#undef A - - if (len > 0) - { - ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL)); - ZyanString* string; - ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string)); - return ZyanStringAppendFormat(string, "&%s", buf); - } - - return s_old_print_address(formatter, buffer, context); -} - -void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) -{ - ZydisDecoder disas_decoder; - ZydisFormatter disas_formatter; - ZydisDecodedInstruction disas_instruction; - ZydisDecodedOperand disas_operands[ZYDIS_MAX_OPERAND_COUNT]; - ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); - ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL); - s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute; - ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address); - - const u8* ptr = static_cast(start); - TinyString hex; - ZyanUSize remaining = size; - while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(&disas_decoder, ptr, remaining, &disas_instruction, disas_operands))) - { - char buffer[256]; - if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, disas_operands, - ZYDIS_MAX_OPERAND_COUNT, buffer, sizeof(buffer), - static_cast(reinterpret_cast(ptr)), nullptr))) - { - hex.clear(); - for (u32 i = 0; i < 10; i++) - { - if (i < disas_instruction.length) - hex.append_format(" {:02X}", ptr[i]); - else - hex.append(" "); - } - DEBUG_LOG(" {:016X} {} {}", static_cast(reinterpret_cast(ptr)), hex, buffer); - } - - ptr += disas_instruction.length; - remaining -= disas_instruction.length; - } -} - -u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) -{ - ZydisDecoder disas_decoder; - ZydisDecodedInstruction disas_instruction; - ZydisDecoderContext disas_context; - ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); - - const u8* ptr = static_cast(start); - ZyanUSize remaining = size; - u32 inst_count = 0; - while ( - ZYAN_SUCCESS(ZydisDecoderDecodeInstruction(&disas_decoder, &disas_context, ptr, remaining, &disas_instruction))) - { - ptr += disas_instruction.length; - remaining -= disas_instruction.length; - inst_count++; - } - - return inst_count; -} - -#else - -void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) -{ - ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); -} - -u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) -{ - ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); - return 0; -} - -#endif // ENABLE_HOST_DISASSEMBLY - -namespace CPU::Recompiler { - -static constexpr HostReg RCPUPTR = Xbyak::Operand::RBP; -static constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX; -static constexpr HostReg RRETURN = RXRET.getIdx(); -static constexpr HostReg RARG1 = RXARG1.getIdx(); -static constexpr HostReg RARG2 = RXARG2.getIdx(); -static constexpr HostReg RARG3 = RXARG3.getIdx(); -static constexpr HostReg RARG4 = RXARG4.getIdx(); - -static const Xbyak::Reg8 GetHostReg8(HostReg reg) -{ - return Xbyak::Reg8(reg, reg >= Xbyak::Operand::SPL); -} - -static const Xbyak::Reg8 GetHostReg8(const Value& value) -{ - DebugAssert(value.size == RegSize_8 && value.IsInHostRegister()); - return Xbyak::Reg8(value.host_reg, value.host_reg >= Xbyak::Operand::SPL); -} - -static const Xbyak::Reg16 GetHostReg16(HostReg reg) -{ - return Xbyak::Reg16(reg); -} - -static const Xbyak::Reg16 GetHostReg16(const Value& value) -{ - DebugAssert(value.size == RegSize_16 && value.IsInHostRegister()); - return Xbyak::Reg16(value.host_reg); -} - -static const Xbyak::Reg32 GetHostReg32(HostReg reg) -{ - return Xbyak::Reg32(reg); -} - -static const Xbyak::Reg32 GetHostReg32(const Value& value) -{ - DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); - return Xbyak::Reg32(value.host_reg); -} - -static const Xbyak::Reg64 GetHostReg64(HostReg reg) -{ - return Xbyak::Reg64(reg); -} - -static const Xbyak::Reg64 GetHostReg64(const Value& value) -{ - DebugAssert(value.size == RegSize_64 && value.IsInHostRegister()); - return Xbyak::Reg64(value.host_reg); -} - -static const Xbyak::Reg64 GetCPUPtrReg() -{ - return Xbyak::Reg64(RCPUPTR); -} - -static const Xbyak::Reg64 GetFastmemBasePtrReg() -{ - return GetHostReg64(RMEMBASEPTR); -} - -CodeGenerator::CodeGenerator() - : m_register_cache(*this), m_near_emitter(CPU::CodeCache::GetFreeCodeSpace(), CPU::CodeCache::GetFreeCodePointer()), - m_far_emitter(CPU::CodeCache::GetFreeFarCodeSpace(), CPU::CodeCache::GetFreeFarCodePointer()), - m_emit(&m_near_emitter) -{ - InitHostRegs(); -} - -CodeGenerator::~CodeGenerator() = default; - -const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/) -{ - static constexpr std::array reg8_names = { - {"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"}}; - static constexpr std::array reg16_names = { - {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"}}; - static constexpr std::array reg32_names = {{"eax", "ecx", "edx", "ebx", "esp", "ebp", - "esi", "edi", "r8d", "r9d", "r10d", "r11d", - "r12d", "r13d", "r14d", "r15d"}}; - static constexpr std::array reg64_names = { - {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}}; - if (reg >= static_cast(HostReg_Count)) - return ""; - - switch (size) - { - case RegSize_8: - return reg8_names[reg]; - case RegSize_16: - return reg16_names[reg]; - case RegSize_32: - return reg32_names[reg]; - case RegSize_64: - return reg64_names[reg]; - default: - return ""; - } -} - -void CodeGenerator::InitHostRegs() -{ -#if defined(ABI_WIN64) - // TODO: function calls mess up the parameter registers if we use them.. fix it - // allocate nonvolatile before volatile - m_register_cache.SetHostRegAllocationOrder( - {Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI, /*Xbyak::Operand::RSP, */ - Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, /*Xbyak::Operand::RCX, - Xbyak::Operand::RDX, Xbyak::Operand::R8, Xbyak::Operand::R9, */ - Xbyak::Operand::R10, Xbyak::Operand::R11, - /*Xbyak::Operand::RAX*/}); - m_register_cache.SetCallerSavedHostRegs({Xbyak::Operand::RAX, Xbyak::Operand::RCX, Xbyak::Operand::RDX, - Xbyak::Operand::R8, Xbyak::Operand::R9, Xbyak::Operand::R10, - Xbyak::Operand::R11}); - m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, - Xbyak::Operand::RSI, Xbyak::Operand::RSP, Xbyak::Operand::R12, - Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15}); -#elif defined(ABI_SYSV) - m_register_cache.SetHostRegAllocationOrder( - {Xbyak::Operand::RBX, /*Xbyak::Operand::RSP, */ Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13, - Xbyak::Operand::R14, Xbyak::Operand::R15, - /*Xbyak::Operand::RAX, */ /*Xbyak::Operand::RDI, */ /*Xbyak::Operand::RSI, */ - /*Xbyak::Operand::RDX, */ /*Xbyak::Operand::RCX, */ Xbyak::Operand::R8, Xbyak::Operand::R9, Xbyak::Operand::R10, - Xbyak::Operand::R11}); - m_register_cache.SetCallerSavedHostRegs({Xbyak::Operand::RAX, Xbyak::Operand::RDI, Xbyak::Operand::RSI, - Xbyak::Operand::RDX, Xbyak::Operand::RCX, Xbyak::Operand::R8, - Xbyak::Operand::R9, Xbyak::Operand::R10, Xbyak::Operand::R11}); - m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RSP, Xbyak::Operand::RBP, - Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, - Xbyak::Operand::R15}); -#endif - - m_register_cache.SetCPUPtrHostReg(RCPUPTR); -} - -void CodeGenerator::SwitchToFarCode() -{ - m_emit = &m_far_emitter; -} - -void CodeGenerator::SwitchToNearCode() -{ - m_emit = &m_near_emitter; -} - -void* CodeGenerator::GetStartNearCodePointer() const -{ - return m_near_emitter.getCode(); -} - -void* CodeGenerator::GetCurrentNearCodePointer() const -{ - return m_near_emitter.getCurr(); -} - -void* CodeGenerator::GetCurrentFarCodePointer() const -{ - return m_far_emitter.getCurr(); -} - -Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */) -{ - if (value.IsInHostRegister()) - return Value(value.regcache, value.host_reg, value.size, ValueFlags::Valid | ValueFlags::InHostRegister); - - Value new_value = m_register_cache.AllocateScratch(value.size); - EmitCopyValue(new_value.host_reg, value); - return new_value; -} - -Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register /* = true */) -{ - if (value.IsInHostRegister()) - return Value(value.regcache, value.host_reg, value.size, ValueFlags::Valid | ValueFlags::InHostRegister); - - Value new_value = m_register_cache.AllocateScratch(value.size); - EmitCopyValue(new_value.host_reg, value); - return new_value; -} - -void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */) -{ - if (allocate_registers) - { - m_register_cache.AssumeCalleeSavedRegistersAreSaved(); - - // Store the CPU struct pointer. - const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR); - DebugAssert(cpu_reg_allocated); - UNREFERENCED_VARIABLE(cpu_reg_allocated); - - // If there's loadstore instructions, preload the fastmem base. - if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions)) - { - const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR); - DebugAssert(fastmem_reg_allocated); - UNREFERENCED_VARIABLE(fastmem_reg_allocated); - m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + OFFSETOF(CPU::State, fastmem_base)]); - } - } -} - -void CodeGenerator::EmitEndBlock(bool free_registers, const void* jump_to) -{ - if (free_registers) - { - m_register_cache.FreeHostReg(RCPUPTR); - if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions)) - m_register_cache.FreeHostReg(RMEMBASEPTR); - - m_register_cache.PopCalleeSavedRegisters(true); - } - - if (jump_to) - m_emit->jmp(jump_to); -} - -void CodeGenerator::EmitExceptionExit() -{ - AddPendingCycles(false); - - // ensure all unflushed registers are written back - m_register_cache.FlushAllGuestRegisters(false, false); - - // the interpreter load delay might have its own value, but we'll overwrite it here anyway - // technically RaiseException() and FlushPipeline() have already been called, but that should be okay - m_register_cache.FlushLoadDelay(false); - - m_register_cache.PopCalleeSavedRegisters(false); - m_emit->jmp(CodeCache::g_check_events_and_dispatch); -} - -void CodeGenerator::EmitExceptionExitOnBool(const Value& value) -{ - Assert(!value.IsConstant() && value.IsInHostRegister()); - - m_emit->test(GetHostReg8(value), GetHostReg8(value)); - m_emit->jnz(GetCurrentFarCodePointer()); - - m_register_cache.PushState(); - - SwitchToFarCode(); - EmitExceptionExit(); - SwitchToNearCode(); - - m_register_cache.PopState(); -} - -const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size) -{ - m_near_emitter.ready(); - m_far_emitter.ready(); - - const u32 near_size = static_cast(m_near_emitter.getSize()); - const u32 far_size = static_cast(m_far_emitter.getSize()); - const void* code = m_near_emitter.getCode(); - *out_host_code_size = near_size; - *out_host_far_code_size = far_size; - CPU::CodeCache::CommitCode(near_size); - CPU::CodeCache::CommitFarCode(far_size); - - m_near_emitter.reset(); - m_far_emitter.reset(); - - return code; -} - -void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) -{ - switch (to_size) - { - case RegSize_16: - { - switch (from_size) - { - case RegSize_8: - m_emit->movsx(GetHostReg16(to_reg), GetHostReg8(from_reg)); - return; - default: - break; - } - } - break; - - case RegSize_32: - { - switch (from_size) - { - case RegSize_8: - m_emit->movsx(GetHostReg32(to_reg), GetHostReg8(from_reg)); - return; - case RegSize_16: - m_emit->movsx(GetHostReg32(to_reg), GetHostReg16(from_reg)); - return; - default: - break; - } - } - break; - - default: - break; - } - - Panic("Unknown sign-extend combination"); -} - -void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) -{ - switch (to_size) - { - case RegSize_16: - { - switch (from_size) - { - case RegSize_8: - m_emit->movzx(GetHostReg16(to_reg), GetHostReg8(from_reg)); - return; - default: - break; - } - } - break; - - case RegSize_32: - { - switch (from_size) - { - case RegSize_8: - m_emit->movzx(GetHostReg32(to_reg), GetHostReg8(from_reg)); - return; - case RegSize_16: - m_emit->movzx(GetHostReg32(to_reg), GetHostReg16(from_reg)); - return; - default: - break; - } - } - break; - - default: - break; - } - - Panic("Unknown sign-extend combination"); -} - -void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value) -{ - // TODO: mov x, 0 -> xor x, x - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - - switch (value.size) - { - case RegSize_8: - { - if (value.HasConstantValue(0)) - m_emit->xor_(GetHostReg8(to_reg), GetHostReg8(to_reg)); - else if (value.IsConstant()) - m_emit->mov(GetHostReg8(to_reg), value.constant_value); - else - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (value.HasConstantValue(0)) - m_emit->xor_(GetHostReg16(to_reg), GetHostReg16(to_reg)); - else if (value.IsConstant()) - m_emit->mov(GetHostReg16(to_reg), value.constant_value); - else - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (value.HasConstantValue(0)) - m_emit->xor_(GetHostReg32(to_reg), GetHostReg32(to_reg)); - else if (value.IsConstant()) - m_emit->mov(GetHostReg32(to_reg), value.constant_value); - else - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.HasConstantValue(0)) - m_emit->xor_(GetHostReg64(to_reg), GetHostReg64(to_reg)); - else if (value.IsConstant()) - m_emit->mov(GetHostReg64(to_reg), value.constant_value); - else - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - } - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - - switch (value.size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (value.IsConstant()) - m_emit->add(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); - else - m_emit->add(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (value.IsConstant()) - m_emit->add(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); - else - m_emit->add(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (value.IsConstant()) - m_emit->add(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->add(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value); - m_emit->add(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); - } - else - { - m_emit->add(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->add(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - } - } - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - - switch (value.size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (value.IsConstant()) - m_emit->sub(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); - else - m_emit->sub(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (value.IsConstant()) - m_emit->sub(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); - else - m_emit->sub(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (value.IsConstant()) - m_emit->sub(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->sub(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value); - m_emit->sub(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); - } - else - { - m_emit->sub(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->sub(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - } - } - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - - switch (value.size) - { - case RegSize_8: - { - if (value.IsConstant()) - m_emit->cmp(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); - else - m_emit->cmp(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (value.IsConstant()) - m_emit->cmp(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); - else - m_emit->cmp(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (value.IsConstant()) - m_emit->cmp(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value); - m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); - } - else - { - m_emit->cmp(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); - } - } - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, - bool signed_multiply) -{ - const bool save_eax = (to_reg_hi != Xbyak::Operand::RAX && to_reg_lo != Xbyak::Operand::RAX); - const bool save_edx = (to_reg_hi != Xbyak::Operand::RDX && to_reg_lo != Xbyak::Operand::RDX); - - if (save_eax) - m_emit->push(m_emit->rax); - - if (save_edx) - m_emit->push(m_emit->rdx); - -#define DO_MUL(src) \ - if (lhs.size == RegSize_8) \ - signed_multiply ? m_emit->imul(src.changeBit(8)) : m_emit->mul(src.changeBit(8)); \ - else if (lhs.size == RegSize_16) \ - signed_multiply ? m_emit->imul(src.changeBit(16)) : m_emit->mul(src.changeBit(16)); \ - else if (lhs.size == RegSize_32) \ - signed_multiply ? m_emit->imul(src.changeBit(32)) : m_emit->mul(src.changeBit(32)); \ - else \ - signed_multiply ? m_emit->imul(src.changeBit(64)) : m_emit->mul(src.changeBit(64)); - - // x*x - if (lhs.IsInHostRegister() && rhs.IsInHostRegister() && lhs.GetHostRegister() == rhs.GetHostRegister()) - { - if (lhs.GetHostRegister() != Xbyak::Operand::RAX) - EmitCopyValue(Xbyak::Operand::RAX, lhs); - - DO_MUL(m_emit->rax); - } - else if (lhs.IsInHostRegister() && lhs.GetHostRegister() == Xbyak::Operand::RAX) - { - if (!rhs.IsInHostRegister()) - { - EmitCopyValue(Xbyak::Operand::RDX, rhs); - DO_MUL(m_emit->rdx); - } - else - { - DO_MUL(GetHostReg64(rhs)); - } - } - else if (rhs.IsInHostRegister() && rhs.GetHostRegister() == Xbyak::Operand::RAX) - { - if (!lhs.IsInHostRegister()) - { - EmitCopyValue(Xbyak::Operand::RDX, lhs); - DO_MUL(m_emit->rdx); - } - else - { - DO_MUL(GetHostReg64(lhs)); - } - } - else - { - if (lhs.IsInHostRegister()) - { - EmitCopyValue(Xbyak::Operand::RAX, rhs); - if (lhs.size == RegSize_8) - signed_multiply ? m_emit->imul(GetHostReg8(lhs)) : m_emit->mul(GetHostReg8(lhs)); - else if (lhs.size == RegSize_16) - signed_multiply ? m_emit->imul(GetHostReg16(lhs)) : m_emit->mul(GetHostReg16(lhs)); - else if (lhs.size == RegSize_32) - signed_multiply ? m_emit->imul(GetHostReg32(lhs)) : m_emit->mul(GetHostReg32(lhs)); - else - signed_multiply ? m_emit->imul(GetHostReg64(lhs)) : m_emit->mul(GetHostReg64(lhs)); - } - else if (rhs.IsInHostRegister()) - { - EmitCopyValue(Xbyak::Operand::RAX, lhs); - if (lhs.size == RegSize_8) - signed_multiply ? m_emit->imul(GetHostReg8(rhs)) : m_emit->mul(GetHostReg8(rhs)); - else if (lhs.size == RegSize_16) - signed_multiply ? m_emit->imul(GetHostReg16(rhs)) : m_emit->mul(GetHostReg16(rhs)); - else if (lhs.size == RegSize_32) - signed_multiply ? m_emit->imul(GetHostReg32(rhs)) : m_emit->mul(GetHostReg32(rhs)); - else - signed_multiply ? m_emit->imul(GetHostReg64(rhs)) : m_emit->mul(GetHostReg64(rhs)); - } - else - { - EmitCopyValue(Xbyak::Operand::RAX, lhs); - EmitCopyValue(Xbyak::Operand::RDX, rhs); - DO_MUL(m_emit->rdx); - } - } - -#undef DO_MUL - - if (to_reg_hi == Xbyak::Operand::RDX && to_reg_lo == Xbyak::Operand::RAX) - { - // ideal case: registers are the ones we want: don't have to do anything - } - else if (to_reg_hi == Xbyak::Operand::RAX && to_reg_lo == Xbyak::Operand::RDX) - { - // what we want, but swapped, so exchange them - m_emit->xchg(m_emit->rax, m_emit->rdx); - } - else - { - // store to the registers we want.. this could be optimized better - m_emit->push(m_emit->rdx); - m_emit->push(m_emit->rax); - m_emit->pop(GetHostReg64(to_reg_lo)); - m_emit->pop(GetHostReg64(to_reg_hi)); - } - - // restore original contents - if (save_edx) - m_emit->pop(m_emit->rdx); - - if (save_eax) - m_emit->pop(m_emit->rax); -} - -void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, - bool signed_divide) -{ - const bool save_eax = (to_reg_quotient != Xbyak::Operand::RAX && to_reg_remainder != Xbyak::Operand::RAX); - const bool save_edx = (to_reg_quotient != Xbyak::Operand::RDX && to_reg_remainder != Xbyak::Operand::RDX); - - if (save_eax) - m_emit->push(m_emit->rax); - - if (save_edx) - m_emit->push(m_emit->rdx); - - // unsupported cases.. for now - Assert(num != Xbyak::Operand::RDX && num != Xbyak::Operand::RAX); - if (num != Xbyak::Operand::RAX) - EmitCopyValue(Xbyak::Operand::RAX, Value::FromHostReg(&m_register_cache, num, size)); - - if (size == RegSize_8) - { - if (signed_divide) - { - m_emit->cbw(); - m_emit->idiv(GetHostReg8(denom)); - } - else - { - m_emit->xor_(m_emit->dx, m_emit->dx); - m_emit->div(GetHostReg8(denom)); - } - } - else if (size == RegSize_16) - { - if (signed_divide) - { - m_emit->cwd(); - m_emit->idiv(GetHostReg16(denom)); - } - else - { - m_emit->xor_(m_emit->edx, m_emit->edx); - m_emit->div(GetHostReg16(denom)); - } - } - else if (size == RegSize_32) - { - if (signed_divide) - { - m_emit->cdq(); - m_emit->idiv(GetHostReg32(denom)); - } - else - { - m_emit->xor_(m_emit->rdx, m_emit->edx); - m_emit->div(GetHostReg32(denom)); - } - } - else - { - if (signed_divide) - m_emit->idiv(GetHostReg64(denom)); - else - m_emit->div(GetHostReg64(denom)); - } - - if (to_reg_quotient == Xbyak::Operand::RAX && to_reg_remainder == Xbyak::Operand::RDX) - { - // ideal case: registers are the ones we want: don't have to do anything - } - else if (to_reg_quotient == Xbyak::Operand::RDX && to_reg_remainder == Xbyak::Operand::RAX) - { - // what we want, but swapped, so exchange them - m_emit->xchg(m_emit->rax, m_emit->rdx); - } - else if (to_reg_quotient != Xbyak::Operand::RAX && to_reg_quotient != Xbyak::Operand::RDX && - to_reg_remainder != Xbyak::Operand::RAX && to_reg_remainder != Xbyak::Operand::RDX) - { - // store to the registers we want.. this could be optimized better - if (static_cast(to_reg_quotient) != HostReg_Count) - m_emit->mov(GetHostReg64(to_reg_quotient), m_emit->rax); - if (static_cast(to_reg_remainder) != HostReg_Count) - m_emit->mov(GetHostReg64(to_reg_remainder), m_emit->rdx); - } - else - { - // store to the registers we want.. this could be optimized better - if (static_cast(to_reg_quotient) != HostReg_Count) - { - m_emit->push(m_emit->rax); - m_emit->pop(GetHostReg64(to_reg_quotient)); - } - if (static_cast(to_reg_remainder) != HostReg_Count) - { - m_emit->push(m_emit->rdx); - m_emit->pop(GetHostReg64(to_reg_remainder)); - } - } - - // restore original contents - if (save_edx) - m_emit->pop(m_emit->rdx); - - if (save_eax) - m_emit->pop(m_emit->rax); -} - -void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) -{ - switch (size) - { - case RegSize_8: - m_emit->inc(GetHostReg8(to_reg)); - break; - case RegSize_16: - m_emit->inc(GetHostReg16(to_reg)); - break; - case RegSize_32: - m_emit->inc(GetHostReg32(to_reg)); - break; - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitDec(HostReg to_reg, RegSize size) -{ - switch (size) - { - case RegSize_8: - m_emit->dec(GetHostReg8(to_reg)); - break; - case RegSize_16: - m_emit->dec(GetHostReg16(to_reg)); - break; - case RegSize_32: - m_emit->dec(GetHostReg32(to_reg)); - break; - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked /* = true */) -{ - DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister()); - - // We have to use CL for the shift amount :( - const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) && - (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX)); - if (save_cl) - m_emit->push(m_emit->rcx); - - if (!amount_value.IsConstant()) - m_emit->mov(m_emit->cl, GetHostReg8(amount_value.host_reg)); - - switch (size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shl(GetHostReg8(to_reg), Truncate8(amount_value.constant_value)); - else - m_emit->shl(GetHostReg8(to_reg), m_emit->cl); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shl(GetHostReg16(to_reg), Truncate8(amount_value.constant_value)); - else - m_emit->shl(GetHostReg16(to_reg), m_emit->cl); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shl(GetHostReg32(to_reg), Truncate32(amount_value.constant_value)); - else - m_emit->shl(GetHostReg32(to_reg), m_emit->cl); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shl(GetHostReg64(to_reg), Truncate32(amount_value.constant_value)); - else - m_emit->shl(GetHostReg64(to_reg), m_emit->cl); - } - break; - } - - if (save_cl) - m_emit->pop(m_emit->rcx); -} - -void CodeGenerator::EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked /* = true */) -{ - DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister()); - - // We have to use CL for the shift amount :( - const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) && - (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX)); - if (save_cl) - m_emit->push(m_emit->rcx); - - if (!amount_value.IsConstant()) - m_emit->mov(m_emit->cl, GetHostReg8(amount_value.host_reg)); - - switch (size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shr(GetHostReg8(to_reg), Truncate8(amount_value.constant_value)); - else - m_emit->shr(GetHostReg8(to_reg), m_emit->cl); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shr(GetHostReg16(to_reg), Truncate8(amount_value.constant_value)); - else - m_emit->shr(GetHostReg16(to_reg), m_emit->cl); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shr(GetHostReg32(to_reg), Truncate32(amount_value.constant_value)); - else - m_emit->shr(GetHostReg32(to_reg), m_emit->cl); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (amount_value.IsConstant()) - m_emit->shr(GetHostReg64(to_reg), Truncate32(amount_value.constant_value)); - else - m_emit->shr(GetHostReg64(to_reg), m_emit->cl); - } - break; - } - - if (save_cl) - m_emit->pop(m_emit->rcx); -} - -void CodeGenerator::EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, - bool assume_amount_masked /* = true */) -{ - DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister()); - - // We have to use CL for the shift amount :( - const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) && - (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX)); - if (save_cl) - m_emit->push(m_emit->rcx); - - if (!amount_value.IsConstant()) - m_emit->mov(m_emit->cl, GetHostReg8(amount_value.host_reg)); - - switch (size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (amount_value.IsConstant()) - m_emit->sar(GetHostReg8(to_reg), Truncate8(amount_value.constant_value)); - else - m_emit->sar(GetHostReg8(to_reg), m_emit->cl); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (amount_value.IsConstant()) - m_emit->sar(GetHostReg16(to_reg), Truncate8(amount_value.constant_value)); - else - m_emit->sar(GetHostReg16(to_reg), m_emit->cl); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (amount_value.IsConstant()) - m_emit->sar(GetHostReg32(to_reg), Truncate32(amount_value.constant_value)); - else - m_emit->sar(GetHostReg32(to_reg), m_emit->cl); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (amount_value.IsConstant()) - m_emit->sar(GetHostReg64(to_reg), Truncate32(amount_value.constant_value)); - else - m_emit->sar(GetHostReg64(to_reg), m_emit->cl); - } - break; - } - - if (save_cl) - m_emit->pop(m_emit->rcx); -} - -void CodeGenerator::EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - switch (value.size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (value.IsConstant()) - m_emit->and_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); - else - m_emit->and_(GetHostReg8(to_reg), GetHostReg8(value)); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (value.IsConstant()) - m_emit->and_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); - else - m_emit->and_(GetHostReg16(to_reg), GetHostReg16(value)); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (value.IsConstant()) - m_emit->and_(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->and_(GetHostReg32(to_reg), GetHostReg32(value)); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp), value.constant_value); - m_emit->and_(GetHostReg64(to_reg), GetHostReg64(temp)); - } - else - { - m_emit->and_(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->and_(GetHostReg64(to_reg), GetHostReg64(value)); - } - } - break; - } -} - -void CodeGenerator::EmitOr(HostReg to_reg, HostReg from_reg, const Value& value) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - switch (value.size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (value.IsConstant()) - m_emit->or_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); - else - m_emit->or_(GetHostReg8(to_reg), GetHostReg8(value)); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (value.IsConstant()) - m_emit->or_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); - else - m_emit->or_(GetHostReg16(to_reg), GetHostReg16(value)); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (value.IsConstant()) - m_emit->or_(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->or_(GetHostReg32(to_reg), GetHostReg32(value)); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp), value.constant_value); - m_emit->or_(GetHostReg64(to_reg), GetHostReg64(temp)); - } - else - { - m_emit->or_(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->or_(GetHostReg64(to_reg), GetHostReg64(value)); - } - } - break; - } -} - -void CodeGenerator::EmitXor(HostReg to_reg, HostReg from_reg, const Value& value) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - switch (value.size) - { - case RegSize_8: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg)); - - if (value.IsConstant()) - m_emit->xor_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); - else - m_emit->xor_(GetHostReg8(to_reg), GetHostReg8(value)); - } - break; - - case RegSize_16: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg)); - - if (value.IsConstant()) - m_emit->xor_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); - else - m_emit->xor_(GetHostReg16(to_reg), GetHostReg16(value)); - } - break; - - case RegSize_32: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg)); - - if (value.IsConstant()) - m_emit->xor_(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->xor_(GetHostReg32(to_reg), GetHostReg32(value)); - } - break; - - case RegSize_64: - { - if (to_reg != from_reg) - m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg)); - - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp), value.constant_value); - m_emit->xor_(GetHostReg64(to_reg), GetHostReg64(temp)); - } - else - { - m_emit->xor_(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->xor_(GetHostReg64(to_reg), GetHostReg64(value)); - } - } - break; - } -} - -void CodeGenerator::EmitTest(HostReg to_reg, const Value& value) -{ - DebugAssert(value.IsConstant() || value.IsInHostRegister()); - switch (value.size) - { - case RegSize_8: - { - if (value.IsConstant()) - m_emit->test(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); - else - m_emit->test(GetHostReg8(to_reg), GetHostReg8(value)); - } - break; - - case RegSize_16: - { - if (value.IsConstant()) - m_emit->test(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); - else - m_emit->test(GetHostReg16(to_reg), GetHostReg16(value)); - } - break; - - case RegSize_32: - { - if (value.IsConstant()) - m_emit->test(GetHostReg32(to_reg), Truncate32(value.constant_value)); - else - m_emit->test(GetHostReg32(to_reg), GetHostReg32(value)); - } - break; - - case RegSize_64: - { - if (value.IsConstant()) - { - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp), value.constant_value); - m_emit->test(GetHostReg64(to_reg), GetHostReg64(temp)); - } - else - { - m_emit->test(GetHostReg64(to_reg), Truncate32(value.constant_value)); - } - } - else - { - m_emit->test(GetHostReg64(to_reg), GetHostReg64(value)); - } - } - break; - } -} - -void CodeGenerator::EmitNot(HostReg to_reg, RegSize size) -{ - switch (size) - { - case RegSize_8: - m_emit->not_(GetHostReg8(to_reg)); - break; - - case RegSize_16: - m_emit->not_(GetHostReg16(to_reg)); - break; - - case RegSize_32: - m_emit->not_(GetHostReg32(to_reg)); - break; - - case RegSize_64: - m_emit->not_(GetHostReg64(to_reg)); - break; - - default: - break; - } -} - -void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition) -{ - switch (condition) - { - case Condition::Always: - m_emit->mov(GetHostReg8(to_reg), 1); - break; - - case Condition::NotEqual: - m_emit->setne(GetHostReg8(to_reg)); - break; - - case Condition::Equal: - m_emit->sete(GetHostReg8(to_reg)); - break; - - case Condition::Overflow: - m_emit->seto(GetHostReg8(to_reg)); - break; - - case Condition::Greater: - m_emit->setg(GetHostReg8(to_reg)); - break; - - case Condition::GreaterEqual: - m_emit->setge(GetHostReg8(to_reg)); - break; - - case Condition::Less: - m_emit->setl(GetHostReg8(to_reg)); - break; - - case Condition::LessEqual: - m_emit->setle(GetHostReg8(to_reg)); - break; - - case Condition::Negative: - m_emit->sets(GetHostReg8(to_reg)); - break; - - case Condition::PositiveOrZero: - m_emit->setns(GetHostReg8(to_reg)); - break; - - case Condition::Above: - m_emit->seta(GetHostReg8(to_reg)); - break; - - case Condition::AboveEqual: - m_emit->setae(GetHostReg8(to_reg)); - break; - - case Condition::Below: - m_emit->setb(GetHostReg8(to_reg)); - break; - - case Condition::BelowEqual: - m_emit->setbe(GetHostReg8(to_reg)); - break; - - default: - UnreachableCode(); - break; - } - - if (to_size != RegSize_8) - EmitZeroExtend(to_reg, to_size, to_reg, RegSize_8); -} - -u32 CodeGenerator::PrepareStackForCall() -{ - // we assume that the stack is unaligned at this point - const u32 num_callee_saved = m_register_cache.GetActiveCalleeSavedRegisterCount(); - const u32 num_caller_saved = m_register_cache.PushCallerSavedRegisters(); - const u32 current_offset = (num_callee_saved + num_caller_saved) * 8; - const u32 aligned_offset = - (current_offset == 0) ? 0 : Common::AlignUp(current_offset + FUNCTION_CALL_SHADOW_SPACE, 16); - const u32 adjust_size = aligned_offset - current_offset; - if (adjust_size > 0) - m_emit->sub(m_emit->rsp, adjust_size); - - return adjust_size; -} - -void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) -{ - if (adjust_size > 0) - m_emit->add(m_emit->rsp, adjust_size); - - m_register_cache.PopCallerSavedRegisters(); -} - -void CodeGenerator::EmitCall(const void* ptr) -{ - DebugAssert(Xbyak::inner::IsInInt32(reinterpret_cast(ptr) - reinterpret_cast(m_emit->getCurr()))); - m_emit->call(ptr); -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3) -{ - if (return_value) - m_register_cache.DiscardHostReg(return_value->GetHostRegister()); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - EmitCopyValue(RARG3, arg3); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, - const Value& arg3, const Value& arg4) -{ - if (return_value) - return_value->Discard(); - - // shadow space allocate - const u32 adjust_size = PrepareStackForCall(); - - // push arguments - EmitCopyValue(RARG1, arg1); - EmitCopyValue(RARG2, arg2); - EmitCopyValue(RARG3, arg3); - EmitCopyValue(RARG4, arg4); - - // actually call the function - EmitCall(ptr); - - // shadow space release - RestoreStackAfterCall(adjust_size); - - // copy out return value if requested - if (return_value) - { - return_value->Undiscard(); - EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); - } -} - -void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position) -{ - m_emit->push(GetHostReg64(reg)); -} - -void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position) -{ - m_emit->push(GetHostReg64(reg)); - m_emit->push(GetHostReg64(reg2)); -} - -void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position) -{ - m_emit->pop(GetHostReg64(reg)); -} - -void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position) -{ - m_emit->pop(GetHostReg64(reg2)); - m_emit->pop(GetHostReg64(reg)); -} - -void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset) -{ - switch (guest_size) - { - case RegSize_8: - m_emit->mov(GetHostReg8(host_reg), m_emit->byte[GetCPUPtrReg() + offset]); - break; - - case RegSize_16: - m_emit->mov(GetHostReg16(host_reg), m_emit->word[GetCPUPtrReg() + offset]); - break; - - case RegSize_32: - m_emit->mov(GetHostReg32(host_reg), m_emit->dword[GetCPUPtrReg() + offset]); - break; - - case RegSize_64: - m_emit->mov(GetHostReg64(host_reg), m_emit->qword[GetCPUPtrReg() + offset]); - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value) -{ - DebugAssert(value.IsInHostRegister() || value.IsConstant()); - switch (value.size) - { - case RegSize_8: - { - if (value.IsConstant()) - m_emit->mov(m_emit->byte[GetCPUPtrReg() + offset], value.constant_value); - else - m_emit->mov(m_emit->byte[GetCPUPtrReg() + offset], GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (value.IsConstant()) - m_emit->mov(m_emit->word[GetCPUPtrReg() + offset], value.constant_value); - else - m_emit->mov(m_emit->word[GetCPUPtrReg() + offset], GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (value.IsConstant()) - m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], value.constant_value); - else - m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.IsConstant()) - { - // we need a temporary to load the value if it doesn't fit in 32-bits - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - EmitCopyValue(temp.host_reg, value); - m_emit->mov(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(temp.host_reg)); - } - else - { - m_emit->mov(m_emit->qword[GetCPUPtrReg() + offset], value.constant_value); - } - } - else - { - m_emit->mov(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(value.host_reg)); - } - } - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) -{ - DebugAssert(value.IsInHostRegister() || value.IsConstant()); - switch (value.size) - { - case RegSize_8: - { - if (value.IsConstant() && value.constant_value == 1) - m_emit->inc(m_emit->byte[GetCPUPtrReg() + offset]); - else if (value.IsConstant()) - m_emit->add(m_emit->byte[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); - else - m_emit->add(m_emit->byte[GetCPUPtrReg() + offset], GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (value.IsConstant() && value.constant_value == 1) - m_emit->inc(m_emit->word[GetCPUPtrReg() + offset]); - else if (value.IsConstant()) - m_emit->add(m_emit->word[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); - else - m_emit->add(m_emit->word[GetCPUPtrReg() + offset], GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (value.IsConstant() && value.constant_value == 1) - m_emit->inc(m_emit->dword[GetCPUPtrReg() + offset]); - else if (value.IsConstant()) - m_emit->add(m_emit->dword[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); - else - m_emit->add(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.IsConstant() && value.constant_value == 1) - { - m_emit->inc(m_emit->qword[GetCPUPtrReg() + offset]); - } - else if (value.IsConstant()) - { - // we need a temporary to load the value if it doesn't fit in 32-bits - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - EmitCopyValue(temp.host_reg, value); - m_emit->add(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(temp.host_reg)); - } - else - { - m_emit->add(m_emit->qword[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); - } - } - else - { - m_emit->add(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(value.host_reg)); - } - } - break; - - default: - { - UnreachableCode(); - } - break; - } -} - -void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) -{ - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) - { - EmitCopyValue(RARG1, address); - m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); - } - - const Xbyak::Reg64 membase = - (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - if (address.IsConstant() && address.constant_value >= 0x80000000) - { - actual_address = &result; - m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); - } - - // TODO: movsx/zx inline here - switch (size) - { - case RegSize_8: - { - if (actual_address->IsConstant()) - m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]); - else - m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]); - } - break; - - case RegSize_16: - { - if (actual_address->IsConstant()) - m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]); - else - m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]); - } - break; - - case RegSize_32: - { - if (actual_address->IsConstant()) - m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]); - else - m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]); - } - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, Value& result) -{ - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) - { - EmitCopyValue(RARG1, address); - m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); - } - - const Xbyak::Reg64 membase = - (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - if (address.IsConstant() && address.constant_value >= 0x80000000) - { - actual_address = &result; - m_emit->mov(GetHostReg32(result.host_reg), address.constant_value); - } - - void* host_pc = GetCurrentNearCodePointer(); - - m_register_cache.InhibitAllocation(); - - switch (size) - { - case RegSize_8: - { - if (actual_address->IsConstant()) - m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]); - else - m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]); - } - break; - - case RegSize_16: - { - if (actual_address->IsConstant()) - m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]); - else - m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]); - } - break; - - case RegSize_32: - { - if (actual_address->IsConstant()) - m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]); - else - m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]); - } - break; - - default: - UnreachableCode(); - break; - } - - // insert nops, we need at least 5 bytes for a relative jump - const u32 fastmem_size = static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc)); - const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0); - for (u32 i = 0; i < nops; i++) - m_emit->nop(); - - const u32 host_code_size = - static_cast(static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc))); - - // generate slowmem fallback - m_far_emitter.align(16); - void* thunk_host_pc = GetCurrentFarCodePointer(); - SwitchToFarCode(); - - // we add the ticks *after* the add here, since we counted incorrectly, then correct for it below - DebugAssert(m_delayed_cycles_add > 0); - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast(m_delayed_cycles_add))); - m_delayed_cycles_add += Bus::RAM_READ_TICKS; - - EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true); - - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); - - // return to the block code - m_emit->jmp(GetCurrentNearCodePointer()); - - SwitchToNearCode(); - m_register_cache.UninhibitAllocation(); - - CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, thunk_host_pc); -} - -void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, Value& result, bool in_far_code) -{ - if (g_settings.cpu_recompiler_memory_exceptions) - { - // NOTE: This can leave junk in the upper bits - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address); - break; - - default: - UnreachableCode(); - break; - } - - m_emit->test(GetHostReg64(result.host_reg), GetHostReg64(result.host_reg)); - m_emit->js(GetCurrentFarCodePointer()); - - m_register_cache.PushState(); - - // load exception path - if (!in_far_code) - SwitchToFarCode(); - - // cause_bits = (-result << 2) | BD | cop_n - m_emit->neg(GetHostReg32(result.host_reg)); - m_emit->shl(GetHostReg32(result.host_reg), 2); - m_emit->or_(GetHostReg32(result.host_reg), - Cop0Registers::CAUSE::MakeValueForException(static_cast(0), info.is_branch_delay_slot, false, - instruction.cop.cop_n)); - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); - - EmitExceptionExit(); - - if (!in_far_code) - SwitchToNearCode(); - - m_register_cache.PopState(); - } - else - { - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryByte, address); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryHalfWord, address); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryWord, address); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value) -{ - if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) - { - EmitCopyValue(RARG1, address); - m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT); - m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]); - } - - // can't store displacements > 0x80000000 in-line - const Value* actual_address = &address; - Value temp_address; - if (address.IsConstant() && address.constant_value >= 0x80000000) - { - temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32); - actual_address = &temp_address; - m_emit->mov(GetHostReg32(temp_address), address.constant_value); - } - - const Xbyak::Reg64 membase = - (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); - - // fastmem - void* host_pc = GetCurrentNearCodePointer(); - - m_register_cache.InhibitAllocation(); - - switch (size) - { - case RegSize_8: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - m_emit->mov(m_emit->byte[membase + actual_address->constant_value], value.constant_value & 0xFFu); - else - m_emit->mov(m_emit->byte[membase + actual_address->constant_value], GetHostReg8(value.host_reg)); - } - else - { - if (value.IsConstant()) - m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFu); - else - m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], GetHostReg8(value.host_reg)); - } - } - break; - - case RegSize_16: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - m_emit->mov(m_emit->word[membase + actual_address->constant_value], value.constant_value & 0xFFFFu); - else - m_emit->mov(m_emit->word[membase + actual_address->constant_value], GetHostReg16(value.host_reg)); - } - else - { - if (value.IsConstant()) - m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFFFu); - else - m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], GetHostReg16(value.host_reg)); - } - } - break; - - case RegSize_32: - { - if (actual_address->IsConstant()) - { - if (value.IsConstant()) - m_emit->mov(m_emit->dword[membase + actual_address->constant_value], value.constant_value); - else - m_emit->mov(m_emit->dword[membase + actual_address->constant_value], GetHostReg32(value.host_reg)); - } - else - { - if (value.IsConstant()) - m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], value.constant_value); - else - m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], GetHostReg32(value.host_reg)); - } - } - break; - - default: - UnreachableCode(); - break; - } - - // insert nops, we need at least 5 bytes for a relative jump - const u32 fastmem_size = static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc)); - const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0); - for (u32 i = 0; i < nops; i++) - m_emit->nop(); - - const u32 host_code_size = - static_cast(static_cast(static_cast(GetCurrentNearCodePointer()) - static_cast(host_pc))); - - // generate slowmem fallback - m_far_emitter.align(); - const void* host_thunk_pc = GetCurrentFarCodePointer(); - SwitchToFarCode(); - - DebugAssert(m_delayed_cycles_add > 0); - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast(m_delayed_cycles_add))); - - EmitStoreGuestMemorySlowmem(instruction, info, address, size, value, true); - - EmitAddCPUStructField(OFFSETOF(State, pending_ticks), - Value::FromConstantU32(static_cast(-m_delayed_cycles_add))); - - // return to the block code - m_emit->jmp(GetCurrentNearCodePointer()); - - SwitchToNearCode(); - m_register_cache.UninhibitAllocation(); - - CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_thunk_pc); -} - -void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, - const Value& address, RegSize size, const Value& value, - bool in_far_code) -{ - if (g_settings.cpu_recompiler_memory_exceptions) - { - Assert(!in_far_code); - - Value result = m_register_cache.AllocateScratch(RegSize_32); - switch (size) - { - case RegSize_8: - EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value); - break; - - case RegSize_16: - EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value); - break; - - case RegSize_32: - EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value); - break; - - default: - UnreachableCode(); - break; - } - - m_register_cache.PushState(); - - m_emit->test(GetHostReg32(result), GetHostReg32(result)); - m_emit->jnz(GetCurrentFarCodePointer()); - - // store exception path - if (!in_far_code) - SwitchToFarCode(); - - // cause_bits = (result << 2) | BD | cop_n - m_emit->shl(GetHostReg32(result), 2); - m_emit->or_(GetHostReg32(result), - Cop0Registers::CAUSE::MakeValueForException(static_cast(0), info.is_branch_delay_slot, false, - instruction.cop.cop_n)); - EmitFunctionCall(nullptr, static_cast(&CPU::RaiseException), result, GetCurrentInstructionPC()); - - EmitExceptionExit(); - if (!in_far_code) - SwitchToNearCode(); - - m_register_cache.PopState(); - } - else - { - switch (size) - { - case RegSize_8: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value); - break; - - case RegSize_16: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value); - break; - - case RegSize_32: - EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::EmitUpdateFastmemBase() -{ - m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + OFFSETOF(CPU::State, fastmem_base)]); -} - -void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi) -{ - DEV_LOG("Backpatching {} (guest PC 0x{:08X}) to slowmem", host_pc, lbi.guest_pc); - - // turn it into a jump to the slowmem handler - Xbyak::CodeGenerator cg(lbi.code_size, host_pc); - cg.jmp(lbi.thunk_address); - - const s32 nops = static_cast(lbi.code_size) - - static_cast(static_cast(cg.getCurr() - static_cast(host_pc))); - Assert(nops >= 0); - for (s32 i = 0; i < nops; i++) - cg.nop(); - - MemMap::FlushInstructionCache(host_pc, lbi.code_size); -} - -void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) -{ - const s64 displacement = - static_cast(reinterpret_cast(ptr) - reinterpret_cast(m_emit->getCurr())) + 2; - if (Xbyak::inner::IsInInt32(static_cast(displacement))) - { - switch (size) - { - case RegSize_8: - m_emit->mov(GetHostReg8(host_reg), m_emit->byte[m_emit->rip + ptr]); - break; - - case RegSize_16: - m_emit->mov(GetHostReg16(host_reg), m_emit->word[m_emit->rip + ptr]); - break; - - case RegSize_32: - m_emit->mov(GetHostReg32(host_reg), m_emit->dword[m_emit->rip + ptr]); - break; - - case RegSize_64: - m_emit->mov(GetHostReg64(host_reg), m_emit->qword[m_emit->rip + ptr]); - break; - - default: - { - UnreachableCode(); - } - break; - } - } - else - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp), reinterpret_cast(ptr)); - switch (size) - { - case RegSize_8: - m_emit->mov(GetHostReg8(host_reg), m_emit->byte[GetHostReg64(temp)]); - break; - - case RegSize_16: - m_emit->mov(GetHostReg16(host_reg), m_emit->word[GetHostReg64(temp)]); - break; - - case RegSize_32: - m_emit->mov(GetHostReg32(host_reg), m_emit->dword[GetHostReg64(temp)]); - break; - - case RegSize_64: - m_emit->mov(GetHostReg64(host_reg), m_emit->qword[GetHostReg64(temp)]); - break; - - default: - { - UnreachableCode(); - } - break; - } - } -} - -void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) -{ - DebugAssert(value.IsInHostRegister() || value.IsConstant()); - - const s64 displacement = - static_cast(reinterpret_cast(ptr) - reinterpret_cast(m_emit->getCurr())); - if (Xbyak::inner::IsInInt32(static_cast(displacement))) - { - switch (value.size) - { - case RegSize_8: - { - if (value.IsConstant()) - m_emit->mov(m_emit->byte[m_emit->rip + ptr], value.constant_value); - else - m_emit->mov(m_emit->byte[m_emit->rip + ptr], GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (value.IsConstant()) - m_emit->mov(m_emit->word[m_emit->rip + ptr], value.constant_value); - else - m_emit->mov(m_emit->word[m_emit->rip + ptr], GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (value.IsConstant()) - m_emit->mov(m_emit->dword[m_emit->rip + ptr], value.constant_value); - else - m_emit->mov(m_emit->dword[m_emit->rip + ptr], GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.IsConstant()) - { - // we need a temporary to load the value if it doesn't fit in 32-bits - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - EmitCopyValue(temp.host_reg, value); - m_emit->mov(m_emit->qword[m_emit->rip + ptr], GetHostReg64(temp.host_reg)); - } - else - { - m_emit->mov(m_emit->qword[m_emit->rip + ptr], value.constant_value); - } - } - else - { - m_emit->mov(m_emit->qword[m_emit->rip + ptr], GetHostReg64(value.host_reg)); - } - } - break; - - default: - { - UnreachableCode(); - } - break; - } - } - else - { - Value address_temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(address_temp), reinterpret_cast(ptr)); - switch (value.size) - { - case RegSize_8: - { - if (value.IsConstant()) - m_emit->mov(m_emit->byte[GetHostReg64(address_temp)], value.constant_value); - else - m_emit->mov(m_emit->byte[GetHostReg64(address_temp)], GetHostReg8(value.host_reg)); - } - break; - - case RegSize_16: - { - if (value.IsConstant()) - m_emit->mov(m_emit->word[GetHostReg64(address_temp)], value.constant_value); - else - m_emit->mov(m_emit->word[GetHostReg64(address_temp)], GetHostReg16(value.host_reg)); - } - break; - - case RegSize_32: - { - if (value.IsConstant()) - m_emit->mov(m_emit->dword[GetHostReg64(address_temp)], value.constant_value); - else - m_emit->mov(m_emit->dword[GetHostReg64(address_temp)], GetHostReg32(value.host_reg)); - } - break; - - case RegSize_64: - { - if (value.IsConstant()) - { - // we need a temporary to load the value if it doesn't fit in 32-bits - if (!Xbyak::inner::IsInInt32(value.constant_value)) - { - Value temp = m_register_cache.AllocateScratch(RegSize_64); - EmitCopyValue(temp.host_reg, value); - m_emit->mov(m_emit->qword[GetHostReg64(address_temp)], GetHostReg64(temp.host_reg)); - } - else - { - m_emit->mov(m_emit->qword[GetHostReg64(address_temp)], value.constant_value); - } - } - else - { - m_emit->mov(m_emit->qword[GetHostReg64(address_temp)], GetHostReg64(value.host_reg)); - } - } - break; - - default: - { - UnreachableCode(); - } - break; - } - } -} - -void CodeGenerator::EmitFlushInterpreterLoadDelay() -{ - Value reg = m_register_cache.AllocateScratch(RegSize_8); - Value value = m_register_cache.AllocateScratch(RegSize_32); - - auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, load_delay_reg)]; - auto load_delay_value = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, load_delay_value)]; - auto reg_ptr = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, regs.r[0]) + GetHostReg64(reg.host_reg) * 4]; - - Xbyak::Label skip_flush; - - // reg = load_delay_reg - m_emit->movzx(GetHostReg32(reg.host_reg), load_delay_reg); - - // if load_delay_reg == Reg::count goto skip_flush - m_emit->cmp(GetHostReg32(reg.host_reg), static_cast(Reg::count)); - m_emit->je(skip_flush); - - // r[reg] = load_delay_value - m_emit->mov(GetHostReg32(value), load_delay_value); - m_emit->mov(reg_ptr, GetHostReg32(value)); - - // load_delay_reg = Reg::count - m_emit->mov(load_delay_reg, static_cast(Reg::count)); - - m_emit->L(skip_flush); -} - -void CodeGenerator::EmitMoveNextInterpreterLoadDelay() -{ - Value reg = m_register_cache.AllocateScratch(RegSize_8); - Value value = m_register_cache.AllocateScratch(RegSize_32); - - auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, load_delay_reg)]; - auto load_delay_value = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, load_delay_value)]; - auto next_load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, next_load_delay_reg)]; - auto next_load_delay_value = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, next_load_delay_value)]; - - m_emit->mov(GetHostReg32(value), next_load_delay_value); - m_emit->mov(GetHostReg8(reg), next_load_delay_reg); - m_emit->mov(load_delay_value, GetHostReg32(value)); - m_emit->mov(load_delay_reg, GetHostReg8(reg)); - m_emit->mov(next_load_delay_reg, static_cast(Reg::count)); -} - -void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) -{ - if (!m_load_delay_dirty) - return; - - auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, load_delay_reg)]; - - Xbyak::Label skip_cancel; - - // if load_delay_reg != reg goto skip_cancel - m_emit->cmp(load_delay_reg, static_cast(reg)); - m_emit->jne(skip_cancel); - - // load_delay_reg = Reg::count - m_emit->mov(load_delay_reg, static_cast(Reg::count)); - - m_emit->L(skip_cancel); -} - -void CodeGenerator::EmitICacheCheckAndUpdate() -{ - if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) - { - if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) - { - m_emit->mov(m_emit->eax, m_block->size); - m_emit->mul(m_emit->dword[m_emit->rip + GetFetchMemoryAccessTimePtr()]); - m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], m_emit->eax); - } - else - { - m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], - static_cast(m_block->uncached_fetch_ticks)); - } - } - else if (m_block->icache_line_count > 0) - { - VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; - for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) - { - const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc); - const TickCount fill_ticks = GetICacheFillTicks(current_pc); - if (fill_ticks <= 0) - continue; - - const u32 line = GetICacheLine(current_pc); - const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); - Xbyak::Label cache_hit; - - m_emit->cmp(m_emit->dword[GetCPUPtrReg() + offset], tag); - m_emit->je(cache_hit); - m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], tag); - m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], static_cast(fill_ticks)); - m_emit->L(cache_hit); - } - } -} - -void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) -{ - const auto ram_ptr_reg = GetHostReg64(RARG1); - const auto shadow_ptr_reg = GetHostReg64(RARG2); - const auto temp_reg = GetHostReg64(RARG3); - const auto temp_reg32 = GetHostReg32(RARG3); - - // store it first to reduce code size, because we can offset - m_emit->mov(ram_ptr_reg, static_cast(reinterpret_cast(ram_ptr))); - m_emit->mov(shadow_ptr_reg, static_cast(reinterpret_cast(shadow_ptr))); - - bool first = true; - u32 offset = 0; - while (size >= 16) - { - const Xbyak::Xmm& dst = first ? m_emit->xmm0 : m_emit->xmm1; - m_emit->movups(dst, m_emit->xword[ram_ptr_reg + offset]); - m_emit->pcmpeqd(dst, m_emit->xword[shadow_ptr_reg + offset]); - if (!first) - m_emit->pand(m_emit->xmm0, dst); - else - first = false; - - offset += 16; - size -= 16; - } - - // TODO: better codegen for 16 byte aligned blocks - if (!first) - { - m_emit->movmskps(temp_reg32, m_emit->xmm0); - m_emit->cmp(temp_reg32, 0xf); - m_emit->jne(CodeCache::g_discard_and_recompile_block); - } - - while (size >= 8) - { - m_emit->mov(temp_reg, m_emit->qword[ram_ptr_reg + offset]); - m_emit->cmp(temp_reg, m_emit->qword[shadow_ptr_reg + offset]); - m_emit->jne(CodeCache::g_discard_and_recompile_block); - offset += 8; - size -= 8; - } - - while (size >= 4) - { - m_emit->mov(temp_reg32, m_emit->dword[ram_ptr_reg + offset]); - m_emit->cmp(temp_reg32, m_emit->dword[shadow_ptr_reg + offset]); - m_emit->jne(CodeCache::g_discard_and_recompile_block); - offset += 4; - size -= 4; - } - - DebugAssert(size == 0); -} - -void CodeGenerator::EmitStallUntilGTEComplete() -{ - m_emit->mov(GetHostReg32(RRETURN), m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)]); - m_emit->mov(GetHostReg32(RARG1), m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, gte_completion_tick)]); - - if (m_delayed_cycles_add > 0) - { - m_emit->add(GetHostReg32(RRETURN), static_cast(m_delayed_cycles_add)); - m_delayed_cycles_add = 0; - } - - m_emit->cmp(GetHostReg32(RARG1), GetHostReg32(RRETURN)); - m_emit->cmova(GetHostReg32(RRETURN), GetHostReg32(RARG1)); - m_emit->mov(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], GetHostReg32(RRETURN)); -} - -void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) -{ - const s64 jump_distance = - static_cast(reinterpret_cast(address) - reinterpret_cast(GetCurrentCodePointer())); - if (Xbyak::inner::IsInInt32(static_cast(jump_distance))) - { - m_emit->jmp(address, Xbyak::CodeGenerator::T_NEAR); - return; - } - - Assert(allow_scratch); - - Value temp = m_register_cache.AllocateScratch(RegSize_64); - m_emit->mov(GetHostReg64(temp), reinterpret_cast(address)); - m_emit->jmp(GetHostReg64(temp)); -} - -void CodeGenerator::EmitBranch(LabelType* label) -{ - m_emit->jmp(*label); -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, - LabelType* label) -{ - switch (condition) - { - case Condition::NotEqual: - case Condition::Equal: - case Condition::Overflow: - case Condition::Greater: - case Condition::GreaterEqual: - case Condition::LessEqual: - case Condition::Less: - case Condition::Above: - case Condition::AboveEqual: - case Condition::Below: - case Condition::BelowEqual: - Panic("Needs a comparison value"); - return; - - case Condition::Negative: - case Condition::PositiveOrZero: - case Condition::NotZero: - case Condition::Zero: - { - switch (size) - { - case RegSize_8: - m_emit->test(GetHostReg8(value), GetHostReg8(value)); - break; - case RegSize_16: - m_emit->test(GetHostReg16(value), GetHostReg16(value)); - break; - case RegSize_32: - m_emit->test(GetHostReg32(value), GetHostReg32(value)); - break; - case RegSize_64: - m_emit->test(GetHostReg64(value), GetHostReg64(value)); - break; - default: - UnreachableCode(); - break; - } - - EmitConditionalBranch(condition, invert, label); - return; - } - - case Condition::Always: - m_emit->jmp(*label); - return; - - default: - UnreachableCode(); - return; - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, - LabelType* label) -{ - switch (condition) - { - case Condition::NotEqual: - case Condition::Equal: - case Condition::Overflow: - case Condition::Greater: - case Condition::GreaterEqual: - case Condition::LessEqual: - case Condition::Less: - case Condition::Above: - case Condition::AboveEqual: - case Condition::Below: - case Condition::BelowEqual: - { - EmitCmp(lhs, rhs); - EmitConditionalBranch(condition, invert, label); - return; - } - - case Condition::Negative: - case Condition::PositiveOrZero: - case Condition::NotZero: - case Condition::Zero: - { - Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0)); - EmitConditionalBranch(condition, invert, lhs, rhs.size, label); - return; - } - - case Condition::Always: - m_emit->jmp(*label); - return; - - default: - UnreachableCode(); - return; - } -} - -void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, LabelType* label) -{ - switch (condition) - { - case Condition::Always: - m_emit->jmp(*label); - break; - - case Condition::NotEqual: - invert ? m_emit->je(*label) : m_emit->jne(*label); - break; - - case Condition::Equal: - invert ? m_emit->jne(*label) : m_emit->je(*label); - break; - - case Condition::Overflow: - invert ? m_emit->jno(*label) : m_emit->jo(*label); - break; - - case Condition::Greater: - invert ? m_emit->jng(*label) : m_emit->jg(*label); - break; - - case Condition::GreaterEqual: - invert ? m_emit->jnge(*label) : m_emit->jge(*label); - break; - - case Condition::Less: - invert ? m_emit->jnl(*label) : m_emit->jl(*label); - break; - - case Condition::LessEqual: - invert ? m_emit->jnle(*label) : m_emit->jle(*label); - break; - - case Condition::Negative: - invert ? m_emit->jns(*label) : m_emit->js(*label); - break; - - case Condition::PositiveOrZero: - invert ? m_emit->js(*label) : m_emit->jns(*label); - break; - - case Condition::Above: - invert ? m_emit->jna(*label) : m_emit->ja(*label); - break; - - case Condition::AboveEqual: - invert ? m_emit->jnae(*label) : m_emit->jae(*label); - break; - - case Condition::Below: - invert ? m_emit->jnb(*label) : m_emit->jb(*label); - break; - - case Condition::BelowEqual: - invert ? m_emit->jnbe(*label) : m_emit->jbe(*label); - break; - - case Condition::NotZero: - invert ? m_emit->jz(*label) : m_emit->jnz(*label); - break; - - case Condition::Zero: - invert ? m_emit->jnz(*label) : m_emit->jz(*label); - break; - - default: - UnreachableCode(); - break; - } -} - -void CodeGenerator::EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label) -{ - if (bit < 8) - { - // same size, probably faster - switch (size) - { - case RegSize_8: - m_emit->test(GetHostReg8(reg), (1u << bit)); - m_emit->jnz(*label); - break; - - case RegSize_16: - m_emit->test(GetHostReg16(reg), (1u << bit)); - m_emit->jnz(*label); - break; - - case RegSize_32: - m_emit->test(GetHostReg32(reg), (1u << bit)); - m_emit->jnz(*label); - break; - - default: - UnreachableCode(); - break; - } - } - else - { - switch (size) - { - case RegSize_8: - m_emit->bt(GetHostReg8(reg), bit); - m_emit->jc(*label); - break; - - case RegSize_16: - m_emit->bt(GetHostReg16(reg), bit); - m_emit->jc(*label); - break; - - case RegSize_32: - m_emit->bt(GetHostReg32(reg), bit); - m_emit->jc(*label); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label) -{ - if (bit < 8) - { - // same size, probably faster - switch (size) - { - case RegSize_8: - m_emit->test(GetHostReg8(reg), (1u << bit)); - m_emit->jz(*label); - break; - - case RegSize_16: - m_emit->test(GetHostReg16(reg), (1u << bit)); - m_emit->jz(*label); - break; - - case RegSize_32: - m_emit->test(GetHostReg32(reg), (1u << bit)); - m_emit->jz(*label); - break; - - default: - UnreachableCode(); - break; - } - } - else - { - switch (size) - { - case RegSize_8: - m_emit->bt(GetHostReg8(reg), bit); - m_emit->jnc(*label); - break; - - case RegSize_16: - m_emit->bt(GetHostReg16(reg), bit); - m_emit->jnc(*label); - break; - - case RegSize_32: - m_emit->bt(GetHostReg32(reg), bit); - m_emit->jnc(*label); - break; - - default: - UnreachableCode(); - break; - } - } -} - -void CodeGenerator::EmitBindLabel(LabelType* label) -{ - m_emit->L(*label); -} - -void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr) -{ - const s64 displacement = - static_cast(reinterpret_cast(ptr) - reinterpret_cast(m_emit->getCurr())) + 2; - if (Xbyak::inner::IsInInt32(static_cast(displacement))) - m_emit->lea(GetHostReg64(host_reg), m_emit->dword[m_emit->rip + ptr]); - else - m_emit->mov(GetHostReg64(host_reg), reinterpret_cast(ptr)); -} -} // namespace CPU::Recompiler - -#endif // CPU_ARCH_X64 diff --git a/src/core/cpu_recompiler_register_cache.cpp b/src/core/cpu_recompiler_register_cache.cpp deleted file mode 100644 index 7e5329015..000000000 --- a/src/core/cpu_recompiler_register_cache.cpp +++ /dev/null @@ -1,945 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "cpu_recompiler_register_cache.h" -#include "cpu_recompiler_code_generator.h" - -#include "common/log.h" - -#include - -LOG_CHANNEL(Recompiler); - -namespace CPU::Recompiler { - -Value::Value() = default; - -Value::Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_) - : regcache(regcache_), constant_value(constant_), size(size_), flags(flags_) -{ -} - -Value::Value(const Value& other) - : regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size), - flags(other.flags) -{ - AssertMsg(!other.IsScratch(), "Can't copy a temporary register"); -} - -Value::Value(Value&& other) - : regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size), - flags(other.flags) -{ - other.Clear(); -} - -Value::Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_) - : regcache(regcache_), host_reg(reg_), size(size_), flags(flags_) -{ -} - -Value::~Value() -{ - Release(); -} - -Value& Value::operator=(const Value& other) -{ - AssertMsg(!other.IsScratch(), "Can't copy a temporary register"); - - Release(); - regcache = other.regcache; - constant_value = other.constant_value; - host_reg = other.host_reg; - size = other.size; - flags = other.flags; - - return *this; -} - -Value& Value::operator=(Value&& other) -{ - Release(); - regcache = other.regcache; - constant_value = other.constant_value; - host_reg = other.host_reg; - size = other.size; - flags = other.flags; - other.Clear(); - return *this; -} - -void Value::Clear() -{ - regcache = nullptr; - constant_value = 0; - host_reg = {}; - size = RegSize_8; - flags = ValueFlags::None; -} - -void Value::Release() -{ - if (IsScratch()) - { - DebugAssert(IsInHostRegister() && regcache); - regcache->FreeHostReg(host_reg); - } -} - -void Value::ReleaseAndClear() -{ - Release(); - Clear(); -} - -void Value::Discard() -{ - DebugAssert(IsInHostRegister()); - regcache->DiscardHostReg(host_reg); -} - -void Value::Undiscard() -{ - DebugAssert(IsInHostRegister()); - regcache->UndiscardHostReg(host_reg); -} - -RegisterCache::RegisterCache(CodeGenerator& code_generator) : m_code_generator(code_generator) -{ - m_state.guest_reg_order.fill(Reg::count); -} - -RegisterCache::~RegisterCache() -{ - Assert(m_state_stack.empty()); -} - -void RegisterCache::SetHostRegAllocationOrder(std::initializer_list regs) -{ - size_t index = 0; - for (HostReg reg : regs) - { - m_state.host_reg_state[reg] = HostRegState::Usable; - m_host_register_allocation_order[index++] = reg; - } - m_state.available_count = static_cast(index); -} - -void RegisterCache::SetCallerSavedHostRegs(std::initializer_list regs) -{ - for (HostReg reg : regs) - m_state.host_reg_state[reg] |= HostRegState::CallerSaved; -} - -void RegisterCache::SetCalleeSavedHostRegs(std::initializer_list regs) -{ - for (HostReg reg : regs) - m_state.host_reg_state[reg] |= HostRegState::CalleeSaved; -} - -void RegisterCache::SetCPUPtrHostReg(HostReg reg) -{ - m_cpu_ptr_host_register = reg; -} - -bool RegisterCache::IsUsableHostReg(HostReg reg) const -{ - return (m_state.host_reg_state[reg] & HostRegState::Usable) != HostRegState::None; -} - -bool RegisterCache::IsHostRegInUse(HostReg reg) const -{ - return (m_state.host_reg_state[reg] & HostRegState::InUse) != HostRegState::None; -} - -bool RegisterCache::HasFreeHostRegister() const -{ - for (const HostRegState state : m_state.host_reg_state) - { - if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) - return true; - } - - return false; -} - -u32 RegisterCache::GetUsedHostRegisters() const -{ - u32 count = 0; - for (const HostRegState state : m_state.host_reg_state) - { - if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable | HostRegState::InUse)) - count++; - } - - return count; -} - -u32 RegisterCache::GetFreeHostRegisters() const -{ - u32 count = 0; - for (const HostRegState state : m_state.host_reg_state) - { - if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) - count++; - } - - return count; -} - -HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InUse */) -{ - if (m_state.allocator_inhibit_count > 0) - Panic("Allocating when inhibited"); - - // try for a free register in allocation order - for (u32 i = 0; i < m_state.available_count; i++) - { - const HostReg reg = m_host_register_allocation_order[i]; - if ((m_state.host_reg_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable) - { - if (AllocateHostReg(reg, state)) - return reg; - } - } - - // evict one of the cached guest registers - if (!EvictOneGuestRegister()) - Panic("Failed to evict guest register for new allocation"); - - return AllocateHostReg(state); -} - -bool RegisterCache::AllocateHostReg(HostReg reg, HostRegState state /*= HostRegState::InUse*/) -{ - if ((m_state.host_reg_state[reg] & HostRegState::InUse) == HostRegState::InUse) - return false; - - m_state.host_reg_state[reg] |= state; - - if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == - HostRegState::CalleeSaved) - { - // new register we need to save.. - DebugAssert(m_state.callee_saved_order_count < HostReg_Count); - m_code_generator.EmitPushHostReg(reg, GetActiveCalleeSavedRegisterCount()); - m_state.callee_saved_order[m_state.callee_saved_order_count++] = reg; - m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated; - } - - return reg; -} - -void RegisterCache::DiscardHostReg(HostReg reg) -{ - DebugAssert(IsHostRegInUse(reg)); - DEBUG_LOG("Discarding host register {}", m_code_generator.GetHostRegName(reg)); - m_state.host_reg_state[reg] |= HostRegState::Discarded; -} - -void RegisterCache::UndiscardHostReg(HostReg reg) -{ - DebugAssert(IsHostRegInUse(reg)); - DEBUG_LOG("Undiscarding host register {}", m_code_generator.GetHostRegName(reg)); - m_state.host_reg_state[reg] &= ~HostRegState::Discarded; -} - -void RegisterCache::FreeHostReg(HostReg reg) -{ - DebugAssert(IsHostRegInUse(reg)); - DEBUG_LOG("Freeing host register {}", m_code_generator.GetHostRegName(reg)); - m_state.host_reg_state[reg] &= ~HostRegState::InUse; -} - -void RegisterCache::EnsureHostRegFree(HostReg reg) -{ - if (!IsHostRegInUse(reg)) - return; - - for (u8 i = 0; i < static_cast(Reg::count); i++) - { - if (m_state.guest_reg_state[i].IsInHostRegister() && m_state.guest_reg_state[i].GetHostRegister() == reg) - FlushGuestRegister(static_cast(i), true, true); - } -} - -Value RegisterCache::GetCPUPtr() -{ - return Value::FromHostReg(this, m_cpu_ptr_host_register, HostPointerSize); -} - -Value RegisterCache::AllocateScratch(RegSize size, HostReg reg /* = HostReg_Invalid */) -{ - if (reg == HostReg_Invalid) - { - reg = AllocateHostReg(); - } - else - { - Assert(!IsHostRegInUse(reg)); - if (!AllocateHostReg(reg)) - Panic("Failed to allocate specific host register"); - } - - DEBUG_LOG("Allocating host register {} as scratch", m_code_generator.GetHostRegName(reg)); - return Value::FromScratch(this, reg, size); -} - -void RegisterCache::ReserveCallerSavedRegisters() -{ - for (u32 reg = 0; reg < HostReg_Count; reg++) - { - if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == - HostRegState::CalleeSaved) - { - DebugAssert(m_state.callee_saved_order_count < HostReg_Count); - m_code_generator.EmitPushHostReg(static_cast(reg), GetActiveCalleeSavedRegisterCount()); - m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast(reg); - m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated; - } - } -} - -u32 RegisterCache::PushCallerSavedRegisters() const -{ - u32 position = GetActiveCalleeSavedRegisterCount(); - u32 count = 0; - for (u32 i = 0; i < HostReg_Count; i++) - { - if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == - (HostRegState::CallerSaved | HostRegState::InUse)) - { - m_code_generator.EmitPushHostReg(static_cast(i), position + count); - count++; - } - } - - return count; -} - -u32 RegisterCache::PopCallerSavedRegisters() const -{ - u32 count = 0; - for (u32 i = 0; i < HostReg_Count; i++) - { - if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == - (HostRegState::CallerSaved | HostRegState::InUse)) - { - count++; - } - } - if (count == 0) - return 0; - - u32 position = GetActiveCalleeSavedRegisterCount() + count - 1; - u32 i = (HostReg_Count - 1); - do - { - if ((m_state.host_reg_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == - (HostRegState::CallerSaved | HostRegState::InUse)) - { - u32 reg_pair; - for (reg_pair = (i - 1); reg_pair > 0 && reg_pair < HostReg_Count; reg_pair--) - { - if ((m_state.host_reg_state[reg_pair] & - (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == - (HostRegState::CallerSaved | HostRegState::InUse)) - { - m_code_generator.EmitPopHostRegPair(static_cast(reg_pair), static_cast(i), position); - position -= 2; - i = reg_pair; - break; - } - } - - if (reg_pair == 0) - { - m_code_generator.EmitPopHostReg(static_cast(i), position); - position--; - } - } - i--; - } while (i > 0); - return count; -} - -u32 RegisterCache::PopCalleeSavedRegisters(bool commit) -{ - if (m_state.callee_saved_order_count == 0) - return 0; - - u32 count = 0; - u32 i = m_state.callee_saved_order_count; - do - { - const HostReg reg = m_state.callee_saved_order[i - 1]; - DebugAssert((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == - (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)); - - if (i > 1) - { - const HostReg reg2 = m_state.callee_saved_order[i - 2]; - DebugAssert((m_state.host_reg_state[reg2] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == - (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)); - - m_code_generator.EmitPopHostRegPair(reg2, reg, i - 1); - i -= 2; - count += 2; - - if (commit) - { - m_state.host_reg_state[reg] &= ~HostRegState::CalleeSavedAllocated; - m_state.host_reg_state[reg2] &= ~HostRegState::CalleeSavedAllocated; - } - } - else - { - m_code_generator.EmitPopHostReg(reg, i - 1); - if (commit) - m_state.host_reg_state[reg] &= ~HostRegState::CalleeSavedAllocated; - count++; - i--; - } - } while (i > 0); - if (commit) - m_state.callee_saved_order_count = 0; - - return count; -} - -void RegisterCache::ReserveCalleeSavedRegisters() -{ - for (u32 reg = 0; reg < HostReg_Count; reg++) - { - if ((m_state.host_reg_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == - HostRegState::CalleeSaved) - { - DebugAssert(m_state.callee_saved_order_count < HostReg_Count); - - // can we find a paired register? (mainly for ARM) - u32 reg_pair; - for (reg_pair = reg + 1; reg_pair < HostReg_Count; reg_pair++) - { - if ((m_state.host_reg_state[reg_pair] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == - HostRegState::CalleeSaved) - { - m_code_generator.EmitPushHostRegPair(static_cast(reg), static_cast(reg_pair), - GetActiveCalleeSavedRegisterCount()); - - m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast(reg); - m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated; - m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast(reg_pair); - m_state.host_reg_state[reg_pair] |= HostRegState::CalleeSavedAllocated; - reg = reg_pair; - break; - } - } - - if (reg_pair == HostReg_Count) - { - m_code_generator.EmitPushHostReg(static_cast(reg), GetActiveCalleeSavedRegisterCount()); - m_state.callee_saved_order[m_state.callee_saved_order_count++] = static_cast(reg); - m_state.host_reg_state[reg] |= HostRegState::CalleeSavedAllocated; - } - } - } -} - -void RegisterCache::AssumeCalleeSavedRegistersAreSaved() -{ - for (u32 i = 0; i < HostReg_Count; i++) - { - if ((m_state.host_reg_state[i] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == - HostRegState::CalleeSaved) - { - m_state.host_reg_state[i] &= ~HostRegState::CalleeSaved; - } - } -} - -void RegisterCache::PushState() -{ - // need to copy this manually because of the load delay values - RegAllocState save_state; - save_state.host_reg_state = m_state.host_reg_state; - save_state.callee_saved_order = m_state.callee_saved_order; - save_state.guest_reg_state = m_state.guest_reg_state; - save_state.guest_reg_order = m_state.guest_reg_order; - save_state.available_count = m_state.available_count; - save_state.callee_saved_order_count = m_state.callee_saved_order_count; - save_state.guest_reg_order_count = m_state.guest_reg_order_count; - save_state.allocator_inhibit_count = m_state.allocator_inhibit_count; - save_state.load_delay_register = m_state.load_delay_register; - save_state.load_delay_value.regcache = m_state.load_delay_value.regcache; - save_state.load_delay_value.host_reg = m_state.load_delay_value.host_reg; - save_state.load_delay_value.size = m_state.load_delay_value.size; - save_state.load_delay_value.flags = m_state.load_delay_value.flags; - save_state.next_load_delay_register = m_state.next_load_delay_register; - save_state.next_load_delay_value.regcache = m_state.next_load_delay_value.regcache; - save_state.next_load_delay_value.host_reg = m_state.next_load_delay_value.host_reg; - save_state.next_load_delay_value.size = m_state.next_load_delay_value.size; - save_state.next_load_delay_value.flags = m_state.next_load_delay_value.flags; - m_state_stack.push(std::move(save_state)); -} - -void RegisterCache::PopState() -{ - Assert(!m_state_stack.empty()); - - // prevent destructor -> freeing of host reg - m_state.load_delay_value.Clear(); - m_state.next_load_delay_value.Clear(); - - m_state = std::move(m_state_stack.top()); - m_state_stack.pop(); -} - -Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */, - HostReg forced_host_reg /* = HostReg_Invalid */) -{ - // register zero is always zero - if (guest_reg == Reg::zero) - { - // return a scratch value of zero if it's forced - if (force_host_register) - { - Value temp = AllocateScratch(RegSize_32, forced_host_reg); - m_code_generator.EmitXor(temp.host_reg, temp.host_reg, temp); - return temp; - } - - return Value::FromConstantU32(0); - } - - Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; - if (cache_value.IsValid()) - { - if (cache_value.IsInHostRegister()) - { - PushRegisterToOrder(guest_reg); - - // if it's in the wrong register, return it as scratch - if (forced_host_reg == HostReg_Invalid || cache_value.GetHostRegister() == forced_host_reg) - return cache_value; - - Value temp = AllocateScratch(RegSize_32, forced_host_reg); - m_code_generator.EmitCopyValue(forced_host_reg, cache_value); - return temp; - } - else if (force_host_register) - { - // if it's not in a register, it should be constant - DebugAssert(cache_value.IsConstant()); - - HostReg host_reg; - if (forced_host_reg == HostReg_Invalid) - { - host_reg = AllocateHostReg(); - } - else - { - Assert(!IsHostRegInUse(forced_host_reg)); - if (!AllocateHostReg(forced_host_reg)) - Panic("Failed to allocate specific host register"); - host_reg = forced_host_reg; - } - - DEBUG_LOG("Allocated host register {} for constant guest register {} (0x{:X})", - m_code_generator.GetHostRegName(host_reg), GetRegName(guest_reg), cache_value.constant_value); - - m_code_generator.EmitCopyValue(host_reg, cache_value); - cache_value.AddHostReg(this, host_reg); - AppendRegisterToOrder(guest_reg); - - // if we're forcing a host register, we're probably going to be changing the value, - // in which case the constant won't be correct anyway. so just drop it. - cache_value.ClearConstant(); - return cache_value; - } - else - { - // constant - return cache_value; - } - } - - HostReg host_reg; - if (forced_host_reg == HostReg_Invalid) - { - host_reg = AllocateHostReg(); - } - else - { - Assert(!IsHostRegInUse(forced_host_reg)); - if (!AllocateHostReg(forced_host_reg)) - Panic("Failed to allocate specific host register"); - host_reg = forced_host_reg; - } - - m_code_generator.EmitLoadGuestRegister(host_reg, guest_reg); - - DEBUG_LOG("Loading guest register {} to host register {}{}", GetRegName(guest_reg), - m_code_generator.GetHostRegName(host_reg, RegSize_32), cache ? " (cached)" : ""); - - if (cache) - { - // Now in cache. - cache_value.SetHostReg(this, host_reg, RegSize_32); - AppendRegisterToOrder(guest_reg); - return cache_value; - } - else - { - // Skip caching, return the register as a value. - return Value::FromScratch(this, host_reg, RegSize_32); - } -} - -Value RegisterCache::ReadGuestRegisterToScratch(Reg guest_reg) -{ - HostReg host_reg = AllocateHostReg(); - - Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; - if (cache_value.IsValid()) - { - m_code_generator.EmitCopyValue(host_reg, cache_value); - - if (cache_value.IsConstant()) - { - DEBUG_LOG("Copying guest register {} from constant 0x{:08X} to scratch host register {}", GetRegName(guest_reg), - static_cast(cache_value.constant_value), m_code_generator.GetHostRegName(host_reg, RegSize_32)); - } - else - { - DEBUG_LOG("Copying guest register {} from {} to scratch host register {}", GetRegName(guest_reg), - m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32), - m_code_generator.GetHostRegName(host_reg, RegSize_32)); - } - } - else - { - m_code_generator.EmitLoadGuestRegister(host_reg, guest_reg); - - DEBUG_LOG("Loading guest register {} to scratch host register {}", GetRegName(guest_reg), - m_code_generator.GetHostRegName(host_reg, RegSize_32)); - } - - return Value::FromScratch(this, host_reg, RegSize_32); -} - -Value RegisterCache::WriteGuestRegister(Reg guest_reg, Value&& value) -{ - // ignore writes to register zero - DebugAssert(value.size == RegSize_32); - if (guest_reg == Reg::zero) - return std::move(value); - - // cancel any load delay delay - if (m_state.load_delay_register == guest_reg) - { - DEBUG_LOG("Cancelling load delay of register {} because of non-delayed write", GetRegName(guest_reg)); - m_state.load_delay_register = Reg::count; - m_state.load_delay_value.ReleaseAndClear(); - } - - Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; - if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg) - { - // updating the register value. - DEBUG_LOG("Updating guest register {} (in host register {})", GetRegName(guest_reg), - m_code_generator.GetHostRegName(value.host_reg, RegSize_32)); - cache_value = std::move(value); - cache_value.SetDirty(); - return cache_value; - } - - InvalidateGuestRegister(guest_reg); - DebugAssert(!cache_value.IsValid()); - - if (value.IsConstant()) - { - // No need to allocate a host register, and we can defer the store. - cache_value = value; - cache_value.SetDirty(); - return cache_value; - } - - AppendRegisterToOrder(guest_reg); - - // If it's a temporary, we can bind that to the guest register. - if (value.IsScratch()) - { - DEBUG_LOG("Binding scratch register {} to guest register {}", - m_code_generator.GetHostRegName(value.host_reg, RegSize_32), GetRegName(guest_reg)); - - cache_value = std::move(value); - cache_value.flags &= ~ValueFlags::Scratch; - cache_value.SetDirty(); - return Value::FromHostReg(this, cache_value.host_reg, RegSize_32); - } - - // Allocate host register, and copy value to it. - HostReg host_reg = AllocateHostReg(); - m_code_generator.EmitCopyValue(host_reg, value); - cache_value.SetHostReg(this, host_reg, RegSize_32); - cache_value.SetDirty(); - - DEBUG_LOG("Copying non-scratch register {} to {} to guest register {}", - m_code_generator.GetHostRegName(value.host_reg, RegSize_32), - m_code_generator.GetHostRegName(host_reg, RegSize_32), GetRegName(guest_reg)); - - return Value::FromHostReg(this, cache_value.host_reg, RegSize_32); -} - -void RegisterCache::WriteGuestRegisterDelayed(Reg guest_reg, Value&& value) -{ - // ignore writes to register zero - DebugAssert(value.size == RegSize_32); - if (guest_reg == Reg::zero) - return; - - // two load delays in a row? cancel the first one. - if (guest_reg == m_state.load_delay_register) - { - DEBUG_LOG("Cancelling load delay of register {} due to new load delay", GetRegName(guest_reg)); - m_state.load_delay_register = Reg::count; - m_state.load_delay_value.ReleaseAndClear(); - } - - // two load delay case with interpreter load delay - m_code_generator.EmitCancelInterpreterLoadDelayForReg(guest_reg); - - // set up the load delay at the end of this instruction - Value& cache_value = m_state.next_load_delay_value; - Assert(m_state.next_load_delay_register == Reg::count); - m_state.next_load_delay_register = guest_reg; - - // If it's a temporary, we can bind that to the guest register. - if (value.IsScratch()) - { - DEBUG_LOG("Binding scratch register {} to load-delayed guest register {}", - m_code_generator.GetHostRegName(value.host_reg, RegSize_32), GetRegName(guest_reg)); - - cache_value = std::move(value); - return; - } - - // Allocate host register, and copy value to it. - cache_value = AllocateScratch(RegSize_32); - m_code_generator.EmitCopyValue(cache_value.host_reg, value); - - DEBUG_LOG("Copying non-scratch register {} to {} to load-delayed guest register {}", - m_code_generator.GetHostRegName(value.host_reg, RegSize_32), - m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32), GetRegName(guest_reg)); -} - -void RegisterCache::UpdateLoadDelay() -{ - // flush current load delay - if (m_state.load_delay_register != Reg::count) - { - // have to clear first because otherwise it'll release the value - Reg reg = m_state.load_delay_register; - Value value = std::move(m_state.load_delay_value); - m_state.load_delay_register = Reg::count; - WriteGuestRegister(reg, std::move(value)); - } - - // next load delay -> load delay - if (m_state.next_load_delay_register != Reg::count) - { - m_state.load_delay_register = m_state.next_load_delay_register; - m_state.load_delay_value = std::move(m_state.next_load_delay_value); - m_state.next_load_delay_register = Reg::count; - } -} - -void RegisterCache::CancelLoadDelay() -{ - if (m_state.load_delay_register == Reg::count) - return; - - DEBUG_LOG("Cancelling load delay of register {}", GetRegName(m_state.load_delay_register)); - m_state.load_delay_register = Reg::count; - m_state.load_delay_value.ReleaseAndClear(); -} - -void RegisterCache::WriteLoadDelayToCPU(bool clear) -{ - // There shouldn't be a flush at the same time as there's a new load delay. - Assert(m_state.next_load_delay_register == Reg::count); - if (m_state.load_delay_register != Reg::count) - { - DEBUG_LOG("Flushing pending load delay of {}", GetRegName(m_state.load_delay_register)); - m_code_generator.EmitStoreInterpreterLoadDelay(m_state.load_delay_register, m_state.load_delay_value); - if (clear) - { - m_state.load_delay_register = Reg::count; - m_state.load_delay_value.ReleaseAndClear(); - } - } -} - -void RegisterCache::FlushLoadDelay(bool clear) -{ - Assert(m_state.next_load_delay_register == Reg::count); - - if (m_state.load_delay_register != Reg::count) - { - // if this is an exception exit, write the new value to the CPU register file, but keep it tracked for the next - // non-exception-raised path. TODO: push/pop whole state would avoid this issue - m_code_generator.EmitStoreGuestRegister(m_state.load_delay_register, m_state.load_delay_value); - - if (clear) - { - m_state.load_delay_register = Reg::count; - m_state.load_delay_value.ReleaseAndClear(); - } - } -} - -void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty) -{ - Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; - if (cache_value.IsDirty()) - { - if (cache_value.IsInHostRegister()) - { - DEBUG_LOG("Flushing guest register {} from host register {}", GetRegName(guest_reg), - m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32)); - } - else if (cache_value.IsConstant()) - { - DEBUG_LOG("Flushing guest register {} from constant 0x{:X}", GetRegName(guest_reg), cache_value.constant_value); - } - m_code_generator.EmitStoreGuestRegister(guest_reg, cache_value); - if (clear_dirty) - cache_value.ClearDirty(); - } - - if (invalidate) - InvalidateGuestRegister(guest_reg); -} - -void RegisterCache::InvalidateGuestRegister(Reg guest_reg) -{ - Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; - if (!cache_value.IsValid()) - return; - - if (cache_value.IsInHostRegister()) - { - FreeHostReg(cache_value.host_reg); - ClearRegisterFromOrder(guest_reg); - } - - DEBUG_LOG("Invalidating guest register {}", GetRegName(guest_reg)); - cache_value.Clear(); -} - -void RegisterCache::InvalidateAllNonDirtyGuestRegisters() -{ - for (u8 reg = 0; reg < static_cast(Reg::count); reg++) - { - Value& cache_value = m_state.guest_reg_state[reg]; - if (cache_value.IsValid() && !cache_value.IsDirty()) - InvalidateGuestRegister(static_cast(reg)); - } -} - -void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty) -{ - for (u8 reg = 0; reg < static_cast(Reg::count); reg++) - FlushGuestRegister(static_cast(reg), invalidate, clear_dirty); -} - -void RegisterCache::FlushCallerSavedGuestRegisters(bool invalidate, bool clear_dirty) -{ - for (u8 reg = 0; reg < static_cast(Reg::count); reg++) - { - const Value& gr = m_state.guest_reg_state[reg]; - if (!gr.IsInHostRegister() || - (m_state.host_reg_state[gr.GetHostRegister()] & HostRegState::CallerSaved) != HostRegState::CallerSaved) - { - continue; - } - - FlushGuestRegister(static_cast(reg), invalidate, clear_dirty); - } -} - -bool RegisterCache::EvictOneGuestRegister() -{ - if (m_state.guest_reg_order_count == 0) - return false; - - // evict the register used the longest time ago - Reg evict_reg = m_state.guest_reg_order[m_state.guest_reg_order_count - 1]; - DEBUG_LOG("Evicting guest register {}", GetRegName(evict_reg)); - FlushGuestRegister(evict_reg, true, true); - - return HasFreeHostRegister(); -} - -void RegisterCache::ClearRegisterFromOrder(Reg reg) -{ - for (u32 i = 0; i < m_state.guest_reg_order_count; i++) - { - if (m_state.guest_reg_order[i] == reg) - { - // move the registers after backwards into this spot - const u32 count_after = m_state.guest_reg_order_count - i - 1; - if (count_after > 0) - std::memmove(&m_state.guest_reg_order[i], &m_state.guest_reg_order[i + 1], sizeof(Reg) * count_after); - else - m_state.guest_reg_order[i] = Reg::count; - - m_state.guest_reg_order_count--; - return; - } - } - - Panic("Clearing register from order not in order"); -} - -void RegisterCache::PushRegisterToOrder(Reg reg) -{ - for (u32 i = 0; i < m_state.guest_reg_order_count; i++) - { - if (m_state.guest_reg_order[i] == reg) - { - // move the registers after backwards into this spot - const u32 count_before = i; - if (count_before > 0) - std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * count_before); - - m_state.guest_reg_order[0] = reg; - return; - } - } - - Panic("Attempt to push register which is not ordered"); -} - -void RegisterCache::AppendRegisterToOrder(Reg reg) -{ - DebugAssert(m_state.guest_reg_order_count < HostReg_Count); - if (m_state.guest_reg_order_count > 0) - std::memmove(&m_state.guest_reg_order[1], &m_state.guest_reg_order[0], sizeof(Reg) * m_state.guest_reg_order_count); - m_state.guest_reg_order[0] = reg; - m_state.guest_reg_order_count++; -} - -void RegisterCache::InhibitAllocation() -{ - m_state.allocator_inhibit_count++; -} - -void RegisterCache::UninhibitAllocation() -{ - Assert(m_state.allocator_inhibit_count > 0); - m_state.allocator_inhibit_count--; -} - -} // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_register_cache.h b/src/core/cpu_recompiler_register_cache.h deleted file mode 100644 index 717e55a60..000000000 --- a/src/core/cpu_recompiler_register_cache.h +++ /dev/null @@ -1,449 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#pragma once - -#include "cpu_recompiler_types.h" -#include "cpu_types.h" - -#include "common/assert.h" - -#if defined(CPU_ARCH_ARM32) -#include "vixl/aarch32/macro-assembler-aarch32.h" -#elif defined(CPU_ARCH_ARM64) -#include "vixl/aarch64/macro-assembler-aarch64.h" -#endif - -#include -#include -#include -#include - -namespace CPU::Recompiler { - -enum RegSize : u8 -{ - RegSize_8, - RegSize_16, - RegSize_32, - RegSize_64, -}; - -#if defined(CPU_ARCH_X64) - -using HostReg = unsigned; -using CodeEmitter = Xbyak::CodeGenerator; -using LabelType = Xbyak::Label; -enum : u32 -{ - HostReg_Count = 16 -}; -constexpr HostReg HostReg_Invalid = static_cast(HostReg_Count); -constexpr RegSize HostPointerSize = RegSize_64; - -#elif defined(CPU_ARCH_ARM32) - -using HostReg = unsigned; -using CodeEmitter = vixl::aarch32::MacroAssembler; -using LabelType = vixl::aarch32::Label; -enum : u32 -{ - HostReg_Count = vixl::aarch32::kNumberOfRegisters -}; -constexpr HostReg HostReg_Invalid = static_cast(HostReg_Count); -constexpr RegSize HostPointerSize = RegSize_32; - -#elif defined(CPU_ARCH_ARM64) - -using HostReg = unsigned; -using CodeEmitter = vixl::aarch64::MacroAssembler; -using LabelType = vixl::aarch64::Label; -enum : u32 -{ - HostReg_Count = vixl::aarch64::kNumberOfRegisters -}; -constexpr HostReg HostReg_Invalid = static_cast(HostReg_Count); -constexpr RegSize HostPointerSize = RegSize_64; - -#else - -#error Unknown architecture. - -#endif - -class CodeGenerator; -class RegisterCache; - -enum class HostRegState : u8 -{ - None = 0, - Usable = (1 << 1), // Can be allocated - CallerSaved = (1 << 2), // Register is caller-saved, and should be saved/restored after calling a function. - CalleeSaved = (1 << 3), // Register is callee-saved, and should be restored after leaving the block. - InUse = (1 << 4), // In-use, must be saved/restored across function call. - CalleeSavedAllocated = (1 << 5), // Register was callee-saved and allocated, so should be restored before returning. - Discarded = (1 << 6), // Register contents is not used, so do not preserve across function calls. -}; -IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(HostRegState); - -enum class ValueFlags : u8 -{ - None = 0, - Valid = (1 << 0), - Constant = (1 << 1), // The value itself is constant, and not in a register. - InHostRegister = (1 << 2), // The value itself is located in a host register. - Scratch = (1 << 3), // The value is temporary, and will be released after the Value is destroyed. - Dirty = (1 << 4), // For register cache values, the value needs to be written back to the CPU struct. -}; -IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(ValueFlags); - -struct Value -{ - RegisterCache* regcache = nullptr; - u64 constant_value = 0; - HostReg host_reg = {}; - - RegSize size = RegSize_8; - ValueFlags flags = ValueFlags::None; - - Value(); - Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_); - Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_); - Value(const Value& other); - Value(Value&& other); - ~Value(); - - Value& operator=(const Value& other); - Value& operator=(Value&& other); - - bool IsConstant() const { return (flags & ValueFlags::Constant) != ValueFlags::None; } - bool IsValid() const { return (flags & ValueFlags::Valid) != ValueFlags::None; } - bool IsInHostRegister() const { return (flags & ValueFlags::InHostRegister) != ValueFlags::None; } - bool IsScratch() const { return (flags & ValueFlags::Scratch) != ValueFlags::None; } - - /// Returns the host register this value is bound to. - HostReg GetHostRegister() const - { - DebugAssert(IsInHostRegister()); - return host_reg; - } - - /// Returns true if this value is constant and has the specified value. - bool HasConstantValue(u64 cv) const - { - return (((flags & ValueFlags::Constant) != ValueFlags::None) && constant_value == cv); - } - - /// Removes the contents of this value. Use with care, as scratch/temporaries are not released. - void Clear(); - - /// Releases the host register if needed, and clears the contents. - void ReleaseAndClear(); - - /// Flags the value is being discarded. Call Undiscard() to track again. - void Discard(); - void Undiscard(); - - void AddHostReg(RegisterCache* regcache_, HostReg hr) - { - DebugAssert(IsValid()); - regcache = regcache_; - host_reg = hr; - flags |= ValueFlags::InHostRegister; - } - - void SetHostReg(RegisterCache* regcache_, HostReg hr, RegSize size_) - { - regcache = regcache_; - constant_value = 0; - host_reg = hr; - size = size_; - flags = ValueFlags::Valid | ValueFlags::InHostRegister; - } - - void ClearConstant() - { - // By clearing the constant bit, we should already be in a host register. - DebugAssert(IsInHostRegister()); - flags &= ~ValueFlags::Constant; - } - - bool IsDirty() const { return (flags & ValueFlags::Dirty) != ValueFlags::None; } - void SetDirty() { flags |= ValueFlags::Dirty; } - void ClearDirty() { flags &= ~ValueFlags::Dirty; } - - /// Returns the same register viewed as a different size. - Value ViewAsSize(RegSize view_size) const - { - if (view_size == size) - return *this; - - if (IsConstant()) - { - // truncate to size - switch (view_size) - { - case RegSize_8: - return Value::FromConstant(constant_value & UINT64_C(0xFF), RegSize_8); - case RegSize_16: - return Value::FromConstant(constant_value & UINT64_C(0xFFFF), RegSize_16); - case RegSize_32: - return Value::FromConstant(constant_value & UINT64_C(0xFFFFFFFF), RegSize_32); - case RegSize_64: - default: - return Value::FromConstant(constant_value, view_size); - } - } - - if (IsInHostRegister()) - return Value::FromHostReg(regcache, host_reg, view_size); - - // invalid? - return Value(); - } - - /// Returns the constant value as a signed 32-bit integer, suitable as an immediate. - s32 GetS32ConstantValue() const - { - switch (size) - { - case RegSize_8: - return static_cast(SignExtend32(Truncate8(constant_value))); - - case RegSize_16: - return static_cast(SignExtend32(Truncate16(constant_value))); - - case RegSize_32: - case RegSize_64: - default: - return static_cast(constant_value); - } - } - - /// Returns the constant value as a signed 64-bit integer, suitable as an immediate. - s64 GetS64ConstantValue() const - { - switch (size) - { - case RegSize_8: - return static_cast(SignExtend64(Truncate8(constant_value))); - - case RegSize_16: - return static_cast(SignExtend64(Truncate16(constant_value))); - - case RegSize_32: - return static_cast(SignExtend64(Truncate32(constant_value))); - - case RegSize_64: - default: - return static_cast(constant_value); - } - } - - static Value FromHostReg(RegisterCache* regcache, HostReg reg, RegSize size) - { - return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister); - } - static Value FromScratch(RegisterCache* regcache, HostReg reg, RegSize size) - { - return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister | ValueFlags::Scratch); - } - static Value FromConstant(u64 cv, RegSize size) - { - return Value(nullptr, cv, size, ValueFlags::Valid | ValueFlags::Constant); - } - static Value FromConstantU8(u8 value) { return FromConstant(ZeroExtend64(value), RegSize_8); } - static Value FromConstantU16(u16 value) { return FromConstant(ZeroExtend64(value), RegSize_16); } - static Value FromConstantU32(u32 value) { return FromConstant(ZeroExtend64(value), RegSize_32); } - static Value FromConstantS32(s32 value) { return FromConstant(ZeroExtend64(static_cast(value)), RegSize_32); } - static Value FromConstantU64(u64 value) { return FromConstant(value, RegSize_64); } - static Value FromConstantPtr(const void* pointer) - { -#if defined(CPU_ARCH_ARM64) || defined(CPU_ARCH_X64) - return FromConstant(static_cast(reinterpret_cast(pointer)), RegSize_64); -#elif defined(CPU_ARCH_ARM32) - return FromConstant(static_cast(reinterpret_cast(pointer)), RegSize_32); -#else - return FromConstant(0, RegSize_32); -#endif - } - -private: - void Release(); -}; - -class RegisterCache -{ -public: - RegisterCache(CodeGenerator& code_generator); - ~RegisterCache(); - - u32 GetActiveCalleeSavedRegisterCount() const { return m_state.callee_saved_order_count; } - - ////////////////////////////////////////////////////////////////////////// - // Register Allocation - ////////////////////////////////////////////////////////////////////////// - void SetHostRegAllocationOrder(std::initializer_list regs); - void SetCallerSavedHostRegs(std::initializer_list regs); - void SetCalleeSavedHostRegs(std::initializer_list regs); - void SetCPUPtrHostReg(HostReg reg); - - /// Returns true if the register is permitted to be used in the register cache. - bool IsUsableHostReg(HostReg reg) const; - bool IsHostRegInUse(HostReg reg) const; - bool HasFreeHostRegister() const; - u32 GetUsedHostRegisters() const; - u32 GetFreeHostRegisters() const; - - /// Allocates a new host register. If there are no free registers, the guest register which was accessed the longest - /// time ago will be evicted. - HostReg AllocateHostReg(HostRegState state = HostRegState::InUse); - - /// Allocates a specific host register. If this register is not free, returns false. - bool AllocateHostReg(HostReg reg, HostRegState state = HostRegState::InUse); - - /// Flags the host register as discard-able. This means that the contents is no longer required, and will not be - /// pushed when saving caller-saved registers. - void DiscardHostReg(HostReg reg); - - /// Clears the discard-able flag on a host register, so that the contents will be preserved across function calls. - void UndiscardHostReg(HostReg reg); - - /// Frees a host register, making it usable in future allocations. - void FreeHostReg(HostReg reg); - - /// Ensures a host register is free, removing any value cached. - void EnsureHostRegFree(HostReg reg); - - /// Preallocates caller saved registers, enabling later use without stack pushes. - void ReserveCallerSavedRegisters(); - - /// Push/pop volatile host registers. Returns the number of registers pushed/popped. - u32 PushCallerSavedRegisters() const; - u32 PopCallerSavedRegisters() const; - - /// Restore callee-saved registers. Call at the end of the function. - u32 PopCalleeSavedRegisters(bool commit); - - /// Preallocates caller saved registers, enabling later use without stack pushes. - void ReserveCalleeSavedRegisters(); - - /// Removes the callee-saved register flag from all registers. Call when compiling code blocks. - void AssumeCalleeSavedRegistersAreSaved(); - - /// Pushes the register allocator state, use when entering branched code. - void PushState(); - - /// Pops the register allocator state, use when leaving branched code. - void PopState(); - - ////////////////////////////////////////////////////////////////////////// - // Scratch Register Allocation - ////////////////////////////////////////////////////////////////////////// - Value GetCPUPtr(); - Value AllocateScratch(RegSize size, HostReg reg = HostReg_Invalid); - - ////////////////////////////////////////////////////////////////////////// - // Guest Register Caching - ////////////////////////////////////////////////////////////////////////// - - /// Returns true if the specified guest register is cached. - bool IsGuestRegisterCached(Reg guest_reg) const - { - const Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; - return cache_value.IsConstant() || cache_value.IsInHostRegister(); - } - - /// Returns true if the specified guest register is cached and in a host register. - bool IsGuestRegisterInHostRegister(Reg guest_reg) const - { - const Value& cache_value = m_state.guest_reg_state[static_cast(guest_reg)]; - return cache_value.IsInHostRegister(); - } - - /// Returns the host register if the guest register is cached. - std::optional GetHostRegisterForGuestRegister(Reg guest_reg) const - { - if (!m_state.guest_reg_state[static_cast(guest_reg)].IsInHostRegister()) - return std::nullopt; - return m_state.guest_reg_state[static_cast(guest_reg)].GetHostRegister(); - } - - /// Returns true if there is a load delay which will be stored at the end of the instruction. - bool HasLoadDelay() const { return m_state.load_delay_register != Reg::count; } - - Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false, - HostReg forced_host_reg = HostReg_Invalid); - - /// Reads the guest register to a caller-owned scratch register. This will ensure the cache won't invalidate the value - /// from some other write. - Value ReadGuestRegisterToScratch(Reg guest_reg); - - /// Creates a copy of value, and stores it to guest_reg. - Value WriteGuestRegister(Reg guest_reg, Value&& value); - - /// Stores the specified value to the guest register after the next instruction (load delay). - void WriteGuestRegisterDelayed(Reg guest_reg, Value&& value); - - /// Returns the current target for a load delay, or Reg::count. - Reg GetLoadDelayRegister() const { return m_state.load_delay_register; } - const Value& GetLoadDelayValue() const { return m_state.load_delay_value; } - - /// Moves load delay to the next load delay, and writes any previous load delay to the destination register. - void UpdateLoadDelay(); - - /// Cancels any present load delay. - void CancelLoadDelay(); - - /// Writes the load delay to the CPU structure, so it is synced up with the interpreter. - void WriteLoadDelayToCPU(bool clear); - - /// Flushes the load delay, i.e. writes it to the destination register. - void FlushLoadDelay(bool clear); - - void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty); - void InvalidateGuestRegister(Reg guest_reg); - - void InvalidateAllNonDirtyGuestRegisters(); - void FlushAllGuestRegisters(bool invalidate, bool clear_dirty); - void FlushCallerSavedGuestRegisters(bool invalidate, bool clear_dirty); - bool EvictOneGuestRegister(); - - /// Temporarily prevents register allocation. - void InhibitAllocation(); - void UninhibitAllocation(); - -private: - void ClearRegisterFromOrder(Reg reg); - void PushRegisterToOrder(Reg reg); - void AppendRegisterToOrder(Reg reg); - - CodeGenerator& m_code_generator; - - std::array m_host_register_allocation_order{}; - - HostReg m_cpu_ptr_host_register = {}; - - struct RegAllocState - { - std::array host_reg_state{}; - std::array callee_saved_order{}; - std::array(Reg::count)> guest_reg_state{}; - std::array guest_reg_order{}; - - u32 available_count = 0; - u32 callee_saved_order_count = 0; - u32 guest_reg_order_count = 0; - u32 allocator_inhibit_count = 0; - - Reg load_delay_register = Reg::count; - Value load_delay_value{}; - - Reg next_load_delay_register = Reg::count; - Value next_load_delay_value{}; - } m_state; - - std::stack m_state_stack; -}; - -} // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_newrec_compiler_riscv64.cpp b/src/core/cpu_recompiler_riscv64.cpp similarity index 84% rename from src/core/cpu_newrec_compiler_riscv64.cpp rename to src/core/cpu_recompiler_riscv64.cpp index 61ac7604b..35b42290f 100644 --- a/src/core/cpu_newrec_compiler_riscv64.cpp +++ b/src/core/cpu_recompiler_riscv64.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 -#include "cpu_newrec_compiler_riscv64.h" +#include "cpu_recompiler_riscv64.h" #include "cpu_code_cache_private.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" @@ -33,7 +33,7 @@ extern "C" { static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr -namespace CPU::NewRec { +namespace CPU::Recompiler { using namespace biscuit; @@ -53,10 +53,10 @@ using CPU::Recompiler::rvIsCallerSavedRegister; using CPU::Recompiler::rvIsValidSExtITypeImm; using CPU::Recompiler::rvMoveAddressToReg; -RISCV64Compiler s_instance; -Compiler* g_compiler = &s_instance; +RISCV64Recompiler s_instance; +Recompiler* g_compiler = &s_instance; -} // namespace CPU::NewRec +} // namespace CPU::Recompiler bool CPU::Recompiler::rvIsCallerSavedRegister(u32 id) { @@ -332,19 +332,19 @@ u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) return BLOCK_LINK_SIZE; } -CPU::NewRec::RISCV64Compiler::RISCV64Compiler() = default; +CPU::Recompiler::RISCV64Recompiler::RISCV64Recompiler() = default; -CPU::NewRec::RISCV64Compiler::~RISCV64Compiler() = default; +CPU::Recompiler::RISCV64Recompiler::~RISCV64Recompiler() = default; -const void* CPU::NewRec::RISCV64Compiler::GetCurrentCodePointer() +const void* CPU::Recompiler::RISCV64Recompiler::GetCurrentCodePointer() { return rvAsm->GetCursorPointer(); } -void CPU::NewRec::RISCV64Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +void CPU::Recompiler::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, + u8* far_code_buffer, u32 far_code_space) { - Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); + Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); // TODO: don't recreate this every time.. DebugAssert(!m_emitter && !m_far_emitter && !rvAsm); @@ -370,7 +370,7 @@ void CPU::NewRec::RISCV64Compiler::Reset(CodeCache::Block* block, u8* code_buffe } } -void CPU::NewRec::RISCV64Compiler::SwitchToFarCode( +void CPU::Recompiler::RISCV64Recompiler::SwitchToFarCode( bool emit_jump, void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) /* = nullptr */, const biscuit::GPR& rs1 /* = biscuit::zero */, const biscuit::GPR& rs2 /* = biscuit::zero */) @@ -394,7 +394,7 @@ void CPU::NewRec::RISCV64Compiler::SwitchToFarCode( rvAsm = m_far_emitter.get(); } -void CPU::NewRec::RISCV64Compiler::SwitchToNearCode(bool emit_jump) +void CPU::Recompiler::RISCV64Recompiler::SwitchToNearCode(bool emit_jump) { DebugAssert(rvAsm == m_far_emitter.get()); if (emit_jump) @@ -402,19 +402,19 @@ void CPU::NewRec::RISCV64Compiler::SwitchToNearCode(bool emit_jump) rvAsm = m_emitter.get(); } -void CPU::NewRec::RISCV64Compiler::EmitMov(const biscuit::GPR& dst, u32 val) +void CPU::Recompiler::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val) { rvEmitMov(rvAsm, dst, val); } -void CPU::NewRec::RISCV64Compiler::EmitCall(const void* ptr) +void CPU::Recompiler::RISCV64Recompiler::EmitCall(const void* ptr) { rvEmitCall(rvAsm, ptr); } -void CPU::NewRec::RISCV64Compiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm, - void (biscuit::Assembler::*iop)(GPR, GPR, u32), - void (biscuit::Assembler::*rop)(GPR, GPR, GPR)) +void CPU::Recompiler::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm, + void (biscuit::Assembler::*iop)(GPR, GPR, u32), + void (biscuit::Assembler::*rop)(GPR, GPR, GPR)) { DebugAssert(rd != RSCRATCH && rs != RSCRATCH); @@ -428,83 +428,83 @@ void CPU::NewRec::RISCV64Compiler::SafeImmSExtIType(const biscuit::GPR& rd, cons (rvAsm->*rop)(rd, rs, RSCRATCH); } -void CPU::NewRec::RISCV64Compiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::ADDI), &Assembler::ADD); } -void CPU::NewRec::RISCV64Compiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::ADDIW), &Assembler::ADDW); } -void CPU::NewRec::RISCV64Compiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { const u32 nimm = static_cast(-static_cast(imm)); SafeImmSExtIType(rd, rs, nimm, reinterpret_cast(&Assembler::ADDIW), &Assembler::ADDW); } -void CPU::NewRec::RISCV64Compiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND); } -void CPU::NewRec::RISCV64Compiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR); } -void CPU::NewRec::RISCV64Compiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR); } -void CPU::NewRec::RISCV64Compiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::SLTI), &Assembler::SLT); } -void CPU::NewRec::RISCV64Compiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) +void CPU::Recompiler::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) { SafeImmSExtIType(rd, rs, imm, reinterpret_cast(&Assembler::SLTIU), &Assembler::SLTU); } -void CPU::NewRec::RISCV64Compiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::Recompiler::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitSExtB(rvAsm, rd, rs); } -void CPU::NewRec::RISCV64Compiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::Recompiler::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitUExtB(rvAsm, rd, rs); } -void CPU::NewRec::RISCV64Compiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::Recompiler::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitSExtH(rvAsm, rd, rs); } -void CPU::NewRec::RISCV64Compiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::Recompiler::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitUExtH(rvAsm, rd, rs); } -void CPU::NewRec::RISCV64Compiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::Recompiler::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitDSExtW(rvAsm, rd, rs); } -void CPU::NewRec::RISCV64Compiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) +void CPU::Recompiler::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) { rvEmitDUExtW(rvAsm, rd, rs); } -void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::Recompiler::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset // TODO: 64-bit displacement is needed :/ @@ -543,7 +543,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, rvAsm->Bind(&block_unchanged); } -void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate() +void CPU::Recompiler::RISCV64Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -599,8 +599,8 @@ void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate() } } -void CPU::NewRec::RISCV64Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::Recompiler::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RARG1.Index())) rvAsm->MV(RARG1, GPR(arg1reg)); @@ -611,7 +611,7 @@ void CPU::NewRec::RISCV64Compiler::GenerateCall(const void* func, s32 arg1reg /* EmitCall(func); } -void CPU::NewRec::RISCV64Compiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::Recompiler::RISCV64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -628,7 +628,7 @@ void CPU::NewRec::RISCV64Compiler::EndBlock(const std::optional& newpc, boo EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::NewRec::RISCV64Compiler::EndBlockWithException(Exception excode) +void CPU::Recompiler::RISCV64Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -646,8 +646,8 @@ void CPU::NewRec::RISCV64Compiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::NewRec::RISCV64Compiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::Recompiler::RISCV64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, + bool force_run_events) { // event test // pc should've been flushed @@ -711,7 +711,7 @@ void CPU::NewRec::RISCV64Compiler::EndAndLinkBlock(const std::optional& new } } -const void* CPU::NewRec::RISCV64Compiler::EndCompile(u32* code_size, u32* far_code_size) +const void* CPU::Recompiler::RISCV64Recompiler::EndCompile(u32* code_size, u32* far_code_size) { u8* const code = m_emitter->GetBufferPointer(0); *code_size = static_cast(m_emitter->GetCodeBuffer().GetSizeInBytes()); @@ -722,7 +722,7 @@ const void* CPU::NewRec::RISCV64Compiler::EndCompile(u32* code_size, u32* far_co return code; } -const char* CPU::NewRec::RISCV64Compiler::GetHostRegName(u32 reg) const +const char* CPU::Recompiler::RISCV64Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", @@ -730,22 +730,22 @@ const char* CPU::NewRec::RISCV64Compiler::GetHostRegName(u32 reg) const return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::NewRec::RISCV64Compiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::Recompiler::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { EmitMov(GPR(reg), val); } -void CPU::NewRec::RISCV64Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { rvAsm->LW(GPR(reg), PTR(ptr)); } -void CPU::NewRec::RISCV64Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { rvAsm->SW(GPR(reg), PTR(ptr)); } -void CPU::NewRec::RISCV64Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::Recompiler::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { if (val == 0) { @@ -757,23 +757,23 @@ void CPU::NewRec::RISCV64Compiler::StoreConstantToCPUPointer(u32 val, const void rvAsm->SW(RSCRATCH, PTR(ptr)); } -void CPU::NewRec::RISCV64Compiler::CopyHostReg(u32 dst, u32 src) +void CPU::Recompiler::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) rvAsm->MV(GPR(dst), GPR(src)); } -void CPU::NewRec::RISCV64Compiler::AssertRegOrConstS(CompileFlags cf) const +void CPU::Recompiler::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const { DebugAssert(cf.valid_host_s || cf.const_s); } -void CPU::NewRec::RISCV64Compiler::AssertRegOrConstT(CompileFlags cf) const +void CPU::Recompiler::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const { DebugAssert(cf.valid_host_t || cf.const_t); } -biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg) +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg) { if (cf.valid_host_s) { @@ -795,7 +795,7 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegS(CompileFlags cf, const } } -biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg) +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg) { if (cf.valid_host_t) { @@ -817,37 +817,37 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegT(CompileFlags cf, const } } -biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegD(CompileFlags cf) const +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return GPR(cf.host_d); } -biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegS(CompileFlags cf) const +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return GPR(cf.host_s); } -biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegT(CompileFlags cf) const +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return GPR(cf.host_t); } -biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegLO(CompileFlags cf) const +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return GPR(cf.host_lo); } -biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegHI(CompileFlags cf) const +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return GPR(cf.host_hi); } -void CPU::NewRec::RISCV64Compiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf) { if (cf.valid_host_s) { @@ -865,7 +865,7 @@ void CPU::NewRec::RISCV64Compiler::MoveSToReg(const biscuit::GPR& dst, CompileFl } } -void CPU::NewRec::RISCV64Compiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf) { if (cf.valid_host_t) { @@ -883,10 +883,10 @@ void CPU::NewRec::RISCV64Compiler::MoveTToReg(const biscuit::GPR& dst, CompileFl } } -void CPU::NewRec::RISCV64Compiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg) +void CPU::Recompiler::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg) { DebugAssert(reg < Reg::count); - if (const std::optional hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) + if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) rvAsm->MV(dst, GPR(hreg.value())); else if (HasConstantReg(reg)) EmitMov(dst, GetConstantRegU32(reg)); @@ -894,9 +894,9 @@ void CPU::NewRec::RISCV64Compiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast(reg)])); } -void CPU::NewRec::RISCV64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::Recompiler::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, + Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -911,9 +911,9 @@ void CPU::NewRec::RISCV64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func EmitCall(func); } -void CPU::NewRec::RISCV64Compiler::Flush(u32 flags) +void CPU::Recompiler::RISCV64Recompiler::Flush(u32 flags) { - Compiler::Flush(flags); + Recompiler::Flush(flags); if (flags & FLUSH_PC && m_dirty_pc) { @@ -1000,7 +1000,7 @@ void CPU::NewRec::RISCV64Compiler::Flush(u32 flags) } } -void CPU::NewRec::RISCV64Compiler::Compile_Fallback() +void CPU::Recompiler::RISCV64Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); @@ -1028,7 +1028,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_Fallback() #endif } -void CPU::NewRec::RISCV64Compiler::CheckBranchTarget(const biscuit::GPR& pcreg) +void CPU::Recompiler::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg) { if (!g_settings.cpu_recompiler_memory_exceptions) return; @@ -1044,7 +1044,7 @@ void CPU::NewRec::RISCV64Compiler::CheckBranchTarget(const biscuit::GPR& pcreg) SwitchToNearCode(false); } -void CPU::NewRec::RISCV64Compiler::Compile_jr(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_jr(CompileFlags cf) { const GPR pcreg = CFGetRegS(cf); CheckBranchTarget(pcreg); @@ -1055,7 +1055,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::RISCV64Compiler::Compile_jalr(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_jalr(CompileFlags cf) { const GPR pcreg = CFGetRegS(cf); if (MipsD() != Reg::zero) @@ -1068,7 +1068,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::RISCV64Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::Recompiler::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { AssertRegOrConstS(cf); @@ -1098,7 +1098,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_bxx(CompileFlags cf, BranchCondition const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1; if (!cf.valid_host_t) MoveTToReg(RARG1, cf); - if (cond == Compiler::BranchCondition::Equal) + if (cond == Recompiler::BranchCondition::Equal) rvAsm->BEQ(rs, rt, &taken); else rvAsm->BNE(rs, rt, &taken); @@ -1146,7 +1146,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_bxx(CompileFlags cf, BranchCondition EndBlock(taken_pc, true); } -void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf, bool overflow) +void CPU::Recompiler::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow) { const GPR rs = CFGetRegS(cf); const GPR rt = CFGetRegT(cf); @@ -1169,27 +1169,27 @@ void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf, bool overflow) } } -void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_addi(CompileFlags cf) { Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::NewRec::RISCV64Compiler::Compile_addiu(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_addiu(CompileFlags cf) { Compile_addi(cf, false); } -void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::NewRec::RISCV64Compiler::Compile_sltiu(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::Recompiler::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign) { if (sign) SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); @@ -1197,7 +1197,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf, bool sign) SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); } -void CPU::NewRec::RISCV64Compiler::Compile_andi(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_andi(CompileFlags cf) { const GPR rt = CFGetRegT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) @@ -1206,7 +1206,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_andi(CompileFlags cf) EmitMov(rt, 0); } -void CPU::NewRec::RISCV64Compiler::Compile_ori(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_ori(CompileFlags cf) { const GPR rt = CFGetRegT(cf); const GPR rs = CFGetRegS(cf); @@ -1216,7 +1216,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_ori(CompileFlags cf) rvAsm->MV(rt, rs); } -void CPU::NewRec::RISCV64Compiler::Compile_xori(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_xori(CompileFlags cf) { const GPR rt = CFGetRegT(cf); const GPR rs = CFGetRegS(cf); @@ -1226,7 +1226,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_xori(CompileFlags cf) rvAsm->MV(rt, rs); } -void CPU::NewRec::RISCV64Compiler::Compile_shift( +void CPU::Recompiler::RISCV64Recompiler::Compile_shift( CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) { @@ -1238,22 +1238,22 @@ void CPU::NewRec::RISCV64Compiler::Compile_shift( rvAsm->MV(rd, rt); } -void CPU::NewRec::RISCV64Compiler::Compile_sll(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_sll(CompileFlags cf) { Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); } -void CPU::NewRec::RISCV64Compiler::Compile_srl(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_srl(CompileFlags cf) { Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); } -void CPU::NewRec::RISCV64Compiler::Compile_sra(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_sra(CompileFlags cf) { Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); } -void CPU::NewRec::RISCV64Compiler::Compile_variable_shift( +void CPU::Recompiler::RISCV64Recompiler::Compile_variable_shift( CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) { @@ -1279,22 +1279,22 @@ void CPU::NewRec::RISCV64Compiler::Compile_variable_shift( } } -void CPU::NewRec::RISCV64Compiler::Compile_sllv(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); } -void CPU::NewRec::RISCV64Compiler::Compile_srlv(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); } -void CPU::NewRec::RISCV64Compiler::Compile_srav(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); } -void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::Recompiler::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign) { const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1325,17 +1325,17 @@ void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf, bool sign) } } -void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::NewRec::RISCV64Compiler::Compile_multu(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::NewRec::RISCV64Compiler::Compile_div(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_div(CompileFlags cf) { // 36 Volume I: RISC-V User-Level ISA V2.2 const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; @@ -1375,7 +1375,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_div(CompileFlags cf) rvAsm->Bind(&done); } -void CPU::NewRec::RISCV64Compiler::Compile_divu(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_divu(CompileFlags cf) { const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; if (!cf.valid_host_s) @@ -1393,8 +1393,8 @@ void CPU::NewRec::RISCV64Compiler::Compile_divu(CompileFlags cf) rvAsm->REMUW(rhi, rs, rt); } -void CPU::NewRec::RISCV64Compiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, - const biscuit::GPR& reg_to_discard) +void CPU::Recompiler::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, + const biscuit::GPR& reg_to_discard) { SwitchToFarCode(true, &Assembler::BEQ, long_res, res); @@ -1410,9 +1410,9 @@ void CPU::NewRec::RISCV64Compiler::TestOverflow(const biscuit::GPR& long_res, co SwitchToNearCode(false); } -void CPU::NewRec::RISCV64Compiler::Compile_dst_op( +void CPU::Recompiler::RISCV64Recompiler::Compile_dst_op( CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), - void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), + void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow) { AssertRegOrConstS(cf); @@ -1476,29 +1476,29 @@ void CPU::NewRec::RISCV64Compiler::Compile_dst_op( } } -void CPU::NewRec::RISCV64Compiler::Compile_add(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_add(CompileFlags cf) { - Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Compiler::SafeADDIW, &Assembler::ADD, true, + Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::NewRec::RISCV64Compiler::Compile_addu(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_addu(CompileFlags cf) { - Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Compiler::SafeADDIW, &Assembler::ADD, true, false); + Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, false); } -void CPU::NewRec::RISCV64Compiler::Compile_sub(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_sub(CompileFlags cf) { - Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Compiler::SafeSUBIW, &Assembler::SUB, false, + Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::NewRec::RISCV64Compiler::Compile_subu(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_subu(CompileFlags cf) { - Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Compiler::SafeSUBIW, &Assembler::SUB, false, false); + Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, false); } -void CPU::NewRec::RISCV64Compiler::Compile_and(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_and(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1516,10 +1516,10 @@ void CPU::NewRec::RISCV64Compiler::Compile_and(CompileFlags cf) return; } - Compile_dst_op(cf, &Assembler::AND, &RISCV64Compiler::SafeANDI, &Assembler::AND, true, false); + Compile_dst_op(cf, &Assembler::AND, &RISCV64Recompiler::SafeANDI, &Assembler::AND, true, false); } -void CPU::NewRec::RISCV64Compiler::Compile_or(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_or(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1532,10 +1532,10 @@ void CPU::NewRec::RISCV64Compiler::Compile_or(CompileFlags cf) return; } - Compile_dst_op(cf, &Assembler::OR, &RISCV64Compiler::SafeORI, &Assembler::OR, true, false); + Compile_dst_op(cf, &Assembler::OR, &RISCV64Recompiler::SafeORI, &Assembler::OR, true, false); } -void CPU::NewRec::RISCV64Compiler::Compile_xor(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_xor(CompileFlags cf) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1554,26 +1554,26 @@ void CPU::NewRec::RISCV64Compiler::Compile_xor(CompileFlags cf) return; } - Compile_dst_op(cf, &Assembler::XOR, &RISCV64Compiler::SafeXORI, &Assembler::XOR, true, false); + Compile_dst_op(cf, &Assembler::XOR, &RISCV64Recompiler::SafeXORI, &Assembler::XOR, true, false); } -void CPU::NewRec::RISCV64Compiler::Compile_nor(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf)); } -void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::NewRec::RISCV64Compiler::Compile_sltu(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::Recompiler::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -1598,7 +1598,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf, bool sign) } } -biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg( +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::ComputeLoadStoreAddressArg( CompileFlags cf, const std::optional& address, const std::optional& reg) { const u32 imm = inst->i.imm_sext32(); @@ -1639,8 +1639,9 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg( } template -biscuit::GPR CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, - bool use_fastmem, const RegAllocFn& dst_reg_alloc) +biscuit::GPR CPU::Recompiler::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, + bool sign, bool use_fastmem, + const RegAllocFn& dst_reg_alloc) { if (use_fastmem) { @@ -1769,8 +1770,8 @@ biscuit::GPR CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr return dst_reg; } -void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, - MemoryAccessSize size, bool use_fastmem) +void CPU::Recompiler::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, + MemoryAccessSize size, bool use_fastmem) { if (use_fastmem) { @@ -1869,8 +1870,9 @@ void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, c } } -void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, + bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ? std::optional(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1897,8 +1899,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, + bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1991,8 +1994,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, + bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2076,8 +2080,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, + bool use_fastmem, + const std::optional& address) { AssertRegOrConstS(cf); AssertRegOrConstT(cf); @@ -2103,8 +2108,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, + bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -2177,8 +2183,9 @@ void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize } } -void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, + bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -2234,7 +2241,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSiz } } -void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_mtc0(CompileFlags cf) { // TODO: we need better constant setting here.. which will need backprop AssertRegOrConstT(cf); @@ -2314,7 +2321,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf) } } -void CPU::NewRec::RISCV64Compiler::Compile_rfe(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits)); @@ -2327,7 +2334,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_rfe(CompileFlags cf) TestInterrupts(RARG1); } -void CPU::NewRec::RISCV64Compiler::TestInterrupts(const biscuit::GPR& sr) +void CPU::Recompiler::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr) { DebugAssert(sr != RSCRATCH); @@ -2380,7 +2387,7 @@ void CPU::NewRec::RISCV64Compiler::TestInterrupts(const biscuit::GPR& sr) rvAsm->Bind(&no_interrupt); } -void CPU::NewRec::RISCV64Compiler::Compile_mfc2(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2420,7 +2427,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_mfc2(CompileFlags cf) } } -void CPU::NewRec::RISCV64Compiler::Compile_mtc2(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2482,7 +2489,7 @@ void CPU::NewRec::RISCV64Compiler::Compile_mtc2(CompileFlags cf) } } -void CPU::NewRec::RISCV64Compiler::Compile_cop2(CompileFlags cf) +void CPU::Recompiler::RISCV64Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2494,10 +2501,10 @@ void CPU::NewRec::RISCV64Compiler::Compile_cop2(CompileFlags cf) AddGTETicks(func_ticks); } -u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, - TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, - u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, - bool is_load) +u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, + TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, + u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, + bool is_load) { Assembler arm_asm(static_cast(thunk_code), thunk_space); Assembler* rvAsm = &arm_asm; diff --git a/src/core/cpu_newrec_compiler_riscv64.h b/src/core/cpu_recompiler_riscv64.h similarity index 96% rename from src/core/cpu_newrec_compiler_riscv64.h rename to src/core/cpu_recompiler_riscv64.h index 0d41bb035..58f0a860e 100644 --- a/src/core/cpu_newrec_compiler_riscv64.h +++ b/src/core/cpu_recompiler_riscv64.h @@ -3,19 +3,19 @@ #pragma once -#include "cpu_newrec_compiler.h" +#include "cpu_recompiler.h" #include #ifdef CPU_ARCH_RISCV64 -namespace CPU::NewRec { +namespace CPU::Recompiler { -class RISCV64Compiler final : public Compiler +class RISCV64Recompiler final : public Recompiler { public: - RISCV64Compiler(); - ~RISCV64Compiler() override; + RISCV64Recompiler(); + ~RISCV64Recompiler() override; protected: const char* GetHostRegName(u32 reg) const override; @@ -74,7 +74,7 @@ protected: void Compile_divu(CompileFlags cf) override; void TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, const biscuit::GPR& reg_to_discard); void Compile_dst_op(CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), - void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), + void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow); void Compile_add(CompileFlags cf) override; @@ -171,6 +171,6 @@ private: biscuit::Assembler* rvAsm; }; -} // namespace CPU::NewRec +} // namespace CPU::Recompiler #endif // CPU_ARCH_RISCV64 diff --git a/src/core/cpu_newrec_compiler_x64.cpp b/src/core/cpu_recompiler_x64.cpp similarity index 73% rename from src/core/cpu_newrec_compiler_x64.cpp rename to src/core/cpu_recompiler_x64.cpp index 5c427f489..ad534a3a1 100644 --- a/src/core/cpu_newrec_compiler_x64.cpp +++ b/src/core/cpu_recompiler_x64.cpp @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 -#include "cpu_newrec_compiler_x64.h" +#include "cpu_recompiler_x64.h" #include "cpu_code_cache_private.h" #include "cpu_core_private.h" #include "cpu_pgxp.h" @@ -20,17 +20,27 @@ #ifdef CPU_ARCH_X64 +#ifdef ENABLE_HOST_DISASSEMBLY +#include "Zycore/Format.h" +#include "Zycore/Status.h" +#include "Zydis/Zydis.h" +#endif + LOG_CHANNEL(Recompiler); #define RMEMBASE cg->rbx #define RSTATE cg->rbp // #define PTR(x) (cg->rip + (x)) -#define PTR(x) (RSTATE + (u32)(((u8*)(x)) - ((u8*)&g_state))) +#define PTR(x) (RSTATE + (((u8*)(x)) - ((u8*)&g_state))) // PGXP TODO: LWL etc, MFC0 // PGXP TODO: Spyro 1 level gates have issues. +namespace CPU::Recompiler { + +using namespace Xbyak; + static constexpr u32 BACKPATCH_JMP_SIZE = 5; // on win32, we need to reserve an additional 32 bytes shadow space when calling out to C @@ -40,25 +50,294 @@ static constexpr u32 STACK_SHADOW_SIZE = 32; static constexpr u32 STACK_SHADOW_SIZE = 0; #endif -using namespace Xbyak; +static X64Recompiler s_instance; +Recompiler* g_compiler = &s_instance; -using CPU::Recompiler::IsCallerSavedRegister; +} // namespace CPU::Recompiler -// TODO: try using a pointer to state instead of rip-relative.. it might end up faster due to smaller code - -namespace CPU::NewRec { -X64Compiler s_instance; -Compiler* g_compiler = &s_instance; -} // namespace CPU::NewRec - -CPU::NewRec::X64Compiler::X64Compiler() = default; - -CPU::NewRec::X64Compiler::~X64Compiler() = default; - -void CPU::NewRec::X64Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, - u8* far_code_buffer, u32 far_code_space) +bool CPU::Recompiler::IsCallerSavedRegister(u32 id) { - Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); +#ifdef _WIN32 + // The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile. + return (id <= 2 || (id >= 8 && id <= 11)); +#else + // rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11)); +#endif +} + +u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) +{ + using namespace Xbyak; + +#ifdef _WIN32 + // Shadow space for Win32 + constexpr u32 stack_size = 32 + 8; +#else + // Stack still needs to be aligned + constexpr u32 stack_size = 8; +#endif + + DebugAssert(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler); + + CodeGenerator acg(code_size, static_cast(code)); + CodeGenerator* cg = &acg; + + Label dispatch; + Label exit_recompiler; + + g_enter_recompiler = reinterpret_cast(const_cast(cg->getCurr())); + { + // Don't need to save registers, because we fastjmp out when execution is interrupted. + cg->sub(cg->rsp, stack_size); + + // CPU state pointer + cg->lea(cg->rbp, cg->qword[cg->rip + &g_state]); + + // newrec preloads fastmem base + if (CodeCache::IsUsingFastmem()) + cg->mov(cg->rbx, cg->qword[PTR(&g_state.fastmem_base)]); + + // Fall through to event dispatcher + } + + // check events then for frame done + g_check_events_and_dispatch = cg->getCurr(); + { + Label skip_event_check; + cg->mov(RWARG1, cg->dword[PTR(&g_state.pending_ticks)]); + cg->cmp(RWARG1, cg->dword[PTR(&g_state.downcount)]); + cg->jl(skip_event_check); + + g_run_events_and_dispatch = cg->getCurr(); + cg->call(reinterpret_cast(&TimingEvents::RunEvents)); + + cg->L(skip_event_check); + } + + // TODO: align? + g_dispatcher = cg->getCurr(); + { + cg->L(dispatch); + + // rcx <- s_fast_map[pc >> 16] + cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); + cg->lea(RXARG2, cg->dword[PTR(g_code_lut.data())]); + cg->mov(RWARG3, RWARG1); + cg->shr(RWARG3, 16); + cg->mov(RXARG2, cg->qword[RXARG2 + RXARG3 * 8]); + + // call(rcx[pc * 2]) (fast_map[pc >> 2]) + cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]); + } + + g_compile_or_revalidate_block = cg->getCurr(); + { + cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); + cg->call(&CompileOrRevalidateBlock); + cg->jmp(dispatch); + } + + g_discard_and_recompile_block = cg->getCurr(); + { + cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]); + cg->call(&DiscardAndRecompileBlock); + cg->jmp(dispatch); + } + + g_interpret_block = cg->getCurr(); + { + cg->call(CodeCache::GetInterpretUncachedBlockFunction()); + cg->jmp(dispatch); + } + + return static_cast(cg->getSize()); +} + +u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) +{ + u8* ptr = static_cast(code); + *(ptr++) = 0xE9; // jmp + + const ptrdiff_t disp = (reinterpret_cast(dst) - reinterpret_cast(code)) - 5; + DebugAssert(disp >= static_cast(std::numeric_limits::min()) && + disp <= static_cast(std::numeric_limits::max())); + + const s32 disp32 = static_cast(disp); + std::memcpy(ptr, &disp32, sizeof(disp32)); + return 5; +} + +#ifdef ENABLE_HOST_DISASSEMBLY + +static ZydisFormatterFunc s_old_print_address; + +static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter, ZydisFormatterBuffer* buffer, + ZydisFormatterContext* context) +{ + using namespace CPU; + + ZyanU64 address; + ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand, context->runtime_address, &address)); + + char buf[128]; + u32 len = 0; + +#define A(x) static_cast(reinterpret_cast(x)) + + if (address >= A(Bus::g_ram) && address < A(Bus::g_ram + Bus::g_ram_size)) + { + len = snprintf(buf, sizeof(buf), "g_ram+0x%08X", static_cast(address - A(Bus::g_ram))); + } + else if (address >= A(&g_state.regs) && + address < A(reinterpret_cast(&g_state.regs) + sizeof(CPU::Registers))) + { + len = snprintf(buf, sizeof(buf), "g_state.regs.%s", + GetRegName(static_cast(((address - A(&g_state.regs.r[0])) / 4u)))); + } + else if (address >= A(&g_state.cop0_regs) && + address < A(reinterpret_cast(&g_state.cop0_regs) + sizeof(CPU::Cop0Registers))) + { + for (const DebuggerRegisterListEntry& rle : g_debugger_register_list) + { + if (address == static_cast(reinterpret_cast(rle.value_ptr))) + { + len = snprintf(buf, sizeof(buf), "g_state.cop0_regs.%s", rle.name); + break; + } + } + } + else if (address >= A(&g_state.gte_regs) && + address < A(reinterpret_cast(&g_state.gte_regs) + sizeof(GTE::Regs))) + { + for (const DebuggerRegisterListEntry& rle : g_debugger_register_list) + { + if (address == static_cast(reinterpret_cast(rle.value_ptr))) + { + len = snprintf(buf, sizeof(buf), "g_state.gte_regs.%s", rle.name); + break; + } + } + } + else if (address == A(&g_state.load_delay_reg)) + { + len = snprintf(buf, sizeof(buf), "g_state.load_delay_reg"); + } + else if (address == A(&g_state.next_load_delay_reg)) + { + len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_reg"); + } + else if (address == A(&g_state.load_delay_value)) + { + len = snprintf(buf, sizeof(buf), "g_state.load_delay_value"); + } + else if (address == A(&g_state.next_load_delay_value)) + { + len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_value"); + } + else if (address == A(&g_state.pending_ticks)) + { + len = snprintf(buf, sizeof(buf), "g_state.pending_ticks"); + } + else if (address == A(&g_state.downcount)) + { + len = snprintf(buf, sizeof(buf), "g_state.downcount"); + } + +#undef A + + if (len > 0) + { + ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL)); + ZyanString* string; + ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string)); + return ZyanStringAppendFormat(string, "&%s", buf); + } + + return s_old_print_address(formatter, buffer, context); +} + +void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) +{ + ZydisDecoder disas_decoder; + ZydisFormatter disas_formatter; + ZydisDecodedInstruction disas_instruction; + ZydisDecodedOperand disas_operands[ZYDIS_MAX_OPERAND_COUNT]; + ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); + ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL); + s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute; + ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address); + + const u8* ptr = static_cast(start); + TinyString hex; + ZyanUSize remaining = size; + while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(&disas_decoder, ptr, remaining, &disas_instruction, disas_operands))) + { + char buffer[256]; + if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, disas_operands, + ZYDIS_MAX_OPERAND_COUNT, buffer, sizeof(buffer), + static_cast(reinterpret_cast(ptr)), nullptr))) + { + hex.clear(); + for (u32 i = 0; i < 10; i++) + { + if (i < disas_instruction.length) + hex.append_format(" {:02X}", ptr[i]); + else + hex.append(" "); + } + DEBUG_LOG(" {:016X} {} {}", static_cast(reinterpret_cast(ptr)), hex, buffer); + } + + ptr += disas_instruction.length; + remaining -= disas_instruction.length; + } +} + +u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) +{ + ZydisDecoder disas_decoder; + ZydisDecodedInstruction disas_instruction; + ZydisDecoderContext disas_context; + ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64); + + const u8* ptr = static_cast(start); + ZyanUSize remaining = size; + u32 inst_count = 0; + while ( + ZYAN_SUCCESS(ZydisDecoderDecodeInstruction(&disas_decoder, &disas_context, ptr, remaining, &disas_instruction))) + { + ptr += disas_instruction.length; + remaining -= disas_instruction.length; + inst_count++; + } + + return inst_count; +} + +#else + +void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) +{ + ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); +} + +u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) +{ + ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); + return 0; +} + +#endif // ENABLE_HOST_DISASSEMBLY + +CPU::Recompiler::X64Recompiler::X64Recompiler() = default; + +CPU::Recompiler::X64Recompiler::~X64Recompiler() = default; + +void CPU::Recompiler::X64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, + u8* far_code_buffer, u32 far_code_space) +{ + Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); // TODO: don't recreate this every time.. DebugAssert(!m_emitter && !m_far_emitter && !cg); @@ -87,7 +366,7 @@ void CPU::NewRec::X64Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u } } -void CPU::NewRec::X64Compiler::SwitchToFarCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*)) +void CPU::Recompiler::X64Recompiler::SwitchToFarCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*)) { DebugAssert(cg == m_emitter.get()); if (emit_jump) @@ -98,7 +377,8 @@ void CPU::NewRec::X64Compiler::SwitchToFarCode(bool emit_jump, void (Xbyak::Code cg = m_far_emitter.get(); } -void CPU::NewRec::X64Compiler::SwitchToNearCode(bool emit_jump, void (Xbyak::CodeGenerator::*jump_op)(const void*)) +void CPU::Recompiler::X64Recompiler::SwitchToNearCode(bool emit_jump, + void (Xbyak::CodeGenerator::*jump_op)(const void*)) { DebugAssert(cg == m_far_emitter.get()); if (emit_jump) @@ -109,9 +389,9 @@ void CPU::NewRec::X64Compiler::SwitchToNearCode(bool emit_jump, void (Xbyak::Cod cg = m_emitter.get(); } -void CPU::NewRec::X64Compiler::BeginBlock() +void CPU::Recompiler::X64Recompiler::BeginBlock() { - Compiler::BeginBlock(); + Recompiler::BeginBlock(); #if 0 if (m_block->pc == 0xBFC06F0C) @@ -128,7 +408,7 @@ void CPU::NewRec::X64Compiler::BeginBlock() #endif } -void CPU::NewRec::X64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) +void CPU::Recompiler::X64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) { // store it first to reduce code size, because we can offset cg->mov(RXARG1, static_cast(reinterpret_cast(ram_ptr))); @@ -179,7 +459,7 @@ void CPU::NewRec::X64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, cons DebugAssert(size == 0); } -void CPU::NewRec::X64Compiler::GenerateICacheCheckAndUpdate() +void CPU::Recompiler::X64Recompiler::GenerateICacheCheckAndUpdate() { if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) { @@ -220,8 +500,8 @@ void CPU::NewRec::X64Compiler::GenerateICacheCheckAndUpdate() } } -void CPU::NewRec::X64Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, - s32 arg3reg /*= -1*/) +void CPU::Recompiler::X64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, + s32 arg3reg /*= -1*/) { if (arg1reg >= 0 && arg1reg != static_cast(RXARG1.getIdx())) cg->mov(RXARG1, Reg64(arg1reg)); @@ -232,7 +512,7 @@ void CPU::NewRec::X64Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1 cg->call(func); } -void CPU::NewRec::X64Compiler::EndBlock(const std::optional& newpc, bool do_event_test) +void CPU::Recompiler::X64Recompiler::EndBlock(const std::optional& newpc, bool do_event_test) { if (newpc.has_value()) { @@ -246,7 +526,7 @@ void CPU::NewRec::X64Compiler::EndBlock(const std::optional& newpc, bool do EndAndLinkBlock(newpc, do_event_test, false); } -void CPU::NewRec::X64Compiler::EndBlockWithException(Exception excode) +void CPU::Recompiler::X64Recompiler::EndBlockWithException(Exception excode) { // flush regs, but not pc, it's going to get overwritten // flush cycles because of the GTE instruction stuff... @@ -264,8 +544,8 @@ void CPU::NewRec::X64Compiler::EndBlockWithException(Exception excode) EndAndLinkBlock(std::nullopt, true, false); } -void CPU::NewRec::X64Compiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, - bool force_run_events) +void CPU::Recompiler::X64Recompiler::EndAndLinkBlock(const std::optional& newpc, bool do_event_test, + bool force_run_events) { // event test // pc should've been flushed @@ -334,7 +614,7 @@ void CPU::NewRec::X64Compiler::EndAndLinkBlock(const std::optional& newpc, } } -const void* CPU::NewRec::X64Compiler::EndCompile(u32* code_size, u32* far_code_size) +const void* CPU::Recompiler::X64Recompiler::EndCompile(u32* code_size, u32* far_code_size) { const void* code = m_emitter->getCode(); *code_size = static_cast(m_emitter->getSize()); @@ -345,81 +625,81 @@ const void* CPU::NewRec::X64Compiler::EndCompile(u32* code_size, u32* far_code_s return code; } -const void* CPU::NewRec::X64Compiler::GetCurrentCodePointer() +const void* CPU::Recompiler::X64Recompiler::GetCurrentCodePointer() { return cg->getCurr(); } -const char* CPU::NewRec::X64Compiler::GetHostRegName(u32 reg) const +const char* CPU::Recompiler::X64Recompiler::GetHostRegName(u32 reg) const { static constexpr std::array reg64_names = { {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}}; return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; } -void CPU::NewRec::X64Compiler::LoadHostRegWithConstant(u32 reg, u32 val) +void CPU::Recompiler::X64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val) { cg->mov(Reg32(reg), val); } -void CPU::NewRec::X64Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::X64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) { cg->mov(Reg32(reg), cg->dword[PTR(ptr)]); } -void CPU::NewRec::X64Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) +void CPU::Recompiler::X64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) { cg->mov(cg->dword[PTR(ptr)], Reg32(reg)); } -void CPU::NewRec::X64Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) +void CPU::Recompiler::X64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr) { cg->mov(cg->dword[PTR(ptr)], val); } -void CPU::NewRec::X64Compiler::CopyHostReg(u32 dst, u32 src) +void CPU::Recompiler::X64Recompiler::CopyHostReg(u32 dst, u32 src) { if (src != dst) cg->mov(Reg32(dst), Reg32(src)); } -Xbyak::Address CPU::NewRec::X64Compiler::MipsPtr(Reg r) const +Xbyak::Address CPU::Recompiler::X64Recompiler::MipsPtr(Reg r) const { DebugAssert(r < Reg::count); return cg->dword[PTR(&g_state.regs.r[static_cast(r)])]; } -Xbyak::Reg32 CPU::NewRec::X64Compiler::CFGetRegD(CompileFlags cf) const +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegD(CompileFlags cf) const { DebugAssert(cf.valid_host_d); return Reg32(cf.host_d); } -Xbyak::Reg32 CPU::NewRec::X64Compiler::CFGetRegS(CompileFlags cf) const +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegS(CompileFlags cf) const { DebugAssert(cf.valid_host_s); return Reg32(cf.host_s); } -Xbyak::Reg32 CPU::NewRec::X64Compiler::CFGetRegT(CompileFlags cf) const +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegT(CompileFlags cf) const { DebugAssert(cf.valid_host_t); return Reg32(cf.host_t); } -Xbyak::Reg32 CPU::NewRec::X64Compiler::CFGetRegLO(CompileFlags cf) const +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegLO(CompileFlags cf) const { DebugAssert(cf.valid_host_lo); return Reg32(cf.host_lo); } -Xbyak::Reg32 CPU::NewRec::X64Compiler::CFGetRegHI(CompileFlags cf) const +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::CFGetRegHI(CompileFlags cf) const { DebugAssert(cf.valid_host_hi); return Reg32(cf.host_hi); } -Xbyak::Reg32 CPU::NewRec::X64Compiler::MoveSToD(CompileFlags cf) +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveSToD(CompileFlags cf) { DebugAssert(cf.valid_host_d); DebugAssert(!cf.valid_host_t || cf.host_t != cf.host_d); @@ -430,7 +710,7 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::MoveSToD(CompileFlags cf) return rd; } -Xbyak::Reg32 CPU::NewRec::X64Compiler::MoveSToT(CompileFlags cf) +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveSToT(CompileFlags cf) { DebugAssert(cf.valid_host_t); @@ -456,7 +736,7 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::MoveSToT(CompileFlags cf) return rt; } -Xbyak::Reg32 CPU::NewRec::X64Compiler::MoveTToD(CompileFlags cf) +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::MoveTToD(CompileFlags cf) { DebugAssert(cf.valid_host_d); DebugAssert(!cf.valid_host_s || cf.host_s != cf.host_d); @@ -466,7 +746,7 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::MoveTToD(CompileFlags cf) return rd; } -void CPU::NewRec::X64Compiler::MoveSToReg(const Xbyak::Reg32& dst, CompileFlags cf) +void CPU::Recompiler::X64Recompiler::MoveSToReg(const Xbyak::Reg32& dst, CompileFlags cf) { if (cf.valid_host_s) { @@ -487,7 +767,7 @@ void CPU::NewRec::X64Compiler::MoveSToReg(const Xbyak::Reg32& dst, CompileFlags } } -void CPU::NewRec::X64Compiler::MoveTToReg(const Xbyak::Reg32& dst, CompileFlags cf) +void CPU::Recompiler::X64Recompiler::MoveTToReg(const Xbyak::Reg32& dst, CompileFlags cf) { if (cf.valid_host_t) { @@ -508,10 +788,10 @@ void CPU::NewRec::X64Compiler::MoveTToReg(const Xbyak::Reg32& dst, CompileFlags } } -void CPU::NewRec::X64Compiler::MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg) +void CPU::Recompiler::X64Recompiler::MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg) { DebugAssert(reg < Reg::count); - if (const std::optional hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) + if (const std::optional hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg)) cg->mov(dst, Reg32(hreg.value())); else if (HasConstantReg(reg)) cg->mov(dst, GetConstantRegU32(reg)); @@ -519,9 +799,9 @@ void CPU::NewRec::X64Compiler::MoveMIPSRegToReg(const Xbyak::Reg32& dst, Reg reg cg->mov(dst, MipsPtr(reg)); } -void CPU::NewRec::X64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, - Reg arg2reg /* = Reg::count */, - Reg arg3reg /* = Reg::count */) +void CPU::Recompiler::X64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, + Reg arg2reg /* = Reg::count */, + Reg arg3reg /* = Reg::count */) { DebugAssert(g_settings.gpu_pgxp_enable); @@ -536,9 +816,9 @@ void CPU::NewRec::X64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u3 cg->call(func); } -void CPU::NewRec::X64Compiler::Flush(u32 flags) +void CPU::Recompiler::X64Recompiler::Flush(u32 flags) { - Compiler::Flush(flags); + Recompiler::Flush(flags); if (flags & FLUSH_PC && m_dirty_pc) { @@ -619,7 +899,7 @@ void CPU::NewRec::X64Compiler::Flush(u32 flags) } } -void CPU::NewRec::X64Compiler::Compile_Fallback() +void CPU::Recompiler::X64Recompiler::Compile_Fallback() { WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); @@ -643,7 +923,7 @@ void CPU::NewRec::X64Compiler::Compile_Fallback() m_load_delay_dirty = EMULATE_LOAD_DELAYS; } -void CPU::NewRec::X64Compiler::CheckBranchTarget(const Xbyak::Reg32& pcreg) +void CPU::Recompiler::X64Recompiler::CheckBranchTarget(const Xbyak::Reg32& pcreg) { if (!g_settings.cpu_recompiler_memory_exceptions) return; @@ -658,7 +938,7 @@ void CPU::NewRec::X64Compiler::CheckBranchTarget(const Xbyak::Reg32& pcreg) SwitchToNearCode(false); } -void CPU::NewRec::X64Compiler::Compile_jr(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_jr(CompileFlags cf) { if (!cf.valid_host_s) cg->mov(RWARG1, MipsPtr(cf.MipsS())); @@ -672,7 +952,7 @@ void CPU::NewRec::X64Compiler::Compile_jr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::X64Compiler::Compile_jalr(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_jalr(CompileFlags cf) { if (!cf.valid_host_s) cg->mov(RWARG1, MipsPtr(cf.MipsS())); @@ -689,7 +969,7 @@ void CPU::NewRec::X64Compiler::Compile_jalr(CompileFlags cf) EndBlock(std::nullopt, true); } -void CPU::NewRec::X64Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) +void CPU::Recompiler::X64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond) { const u32 taken_pc = GetConditionalBranchTarget(cf); @@ -765,7 +1045,7 @@ void CPU::NewRec::X64Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond EndBlock(taken_pc, true); } -void CPU::NewRec::X64Compiler::Compile_addi(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_addi(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_sext32(); imm != 0) @@ -779,24 +1059,24 @@ void CPU::NewRec::X64Compiler::Compile_addi(CompileFlags cf) } } -void CPU::NewRec::X64Compiler::Compile_addiu(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_addiu(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_sext32(); imm != 0) cg->add(rt, imm); } -void CPU::NewRec::X64Compiler::Compile_slti(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_slti(CompileFlags cf) { Compile_slti(cf, true); } -void CPU::NewRec::X64Compiler::Compile_sltiu(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_sltiu(CompileFlags cf) { Compile_slti(cf, false); } -void CPU::NewRec::X64Compiler::Compile_slti(CompileFlags cf, bool sign) +void CPU::Recompiler::X64Recompiler::Compile_slti(CompileFlags cf, bool sign) { const Reg32 rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG1; @@ -818,7 +1098,7 @@ void CPU::NewRec::X64Compiler::Compile_slti(CompileFlags cf, bool sign) cg->mov(MipsPtr(cf.MipsT()), rt); } -void CPU::NewRec::X64Compiler::Compile_andi(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_andi(CompileFlags cf) { if (const u32 imm = inst->i.imm_zext32(); imm != 0) { @@ -832,42 +1112,42 @@ void CPU::NewRec::X64Compiler::Compile_andi(CompileFlags cf) } } -void CPU::NewRec::X64Compiler::Compile_ori(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_ori(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) cg->or_(rt, imm); } -void CPU::NewRec::X64Compiler::Compile_xori(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_xori(CompileFlags cf) { const Reg32 rt = MoveSToT(cf); if (const u32 imm = inst->i.imm_zext32(); imm != 0) cg->xor_(rt, imm); } -void CPU::NewRec::X64Compiler::Compile_sll(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_sll(CompileFlags cf) { const Reg32 rd = MoveTToD(cf); if (inst->r.shamt > 0) cg->shl(rd, inst->r.shamt); } -void CPU::NewRec::X64Compiler::Compile_srl(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_srl(CompileFlags cf) { const Reg32 rd = MoveTToD(cf); if (inst->r.shamt > 0) cg->shr(rd, inst->r.shamt); } -void CPU::NewRec::X64Compiler::Compile_sra(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_sra(CompileFlags cf) { const Reg32 rd = MoveTToD(cf); if (inst->r.shamt > 0) cg->sar(rd, inst->r.shamt); } -void CPU::NewRec::X64Compiler::Compile_variable_shift( +void CPU::Recompiler::X64Recompiler::Compile_variable_shift( CompileFlags cf, void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Reg8&), void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, int)) { @@ -885,22 +1165,22 @@ void CPU::NewRec::X64Compiler::Compile_variable_shift( } } -void CPU::NewRec::X64Compiler::Compile_sllv(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_sllv(CompileFlags cf) { Compile_variable_shift(cf, &CodeGenerator::shl, &CodeGenerator::shl); } -void CPU::NewRec::X64Compiler::Compile_srlv(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_srlv(CompileFlags cf) { Compile_variable_shift(cf, &CodeGenerator::shr, &CodeGenerator::shr); } -void CPU::NewRec::X64Compiler::Compile_srav(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_srav(CompileFlags cf) { Compile_variable_shift(cf, &CodeGenerator::sar, &CodeGenerator::sar); } -void CPU::NewRec::X64Compiler::Compile_mult(CompileFlags cf, bool sign) +void CPU::Recompiler::X64Recompiler::Compile_mult(CompileFlags cf, bool sign) { // RAX/RDX shouldn't be allocatable.. DebugAssert(!(m_host_regs[Xbyak::Operand::RAX].flags & HR_USABLE) && @@ -932,17 +1212,17 @@ void CPU::NewRec::X64Compiler::Compile_mult(CompileFlags cf, bool sign) cg->mov(MipsPtr(Reg::hi), cg->edx); } -void CPU::NewRec::X64Compiler::Compile_mult(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_mult(CompileFlags cf) { Compile_mult(cf, true); } -void CPU::NewRec::X64Compiler::Compile_multu(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_multu(CompileFlags cf) { Compile_mult(cf, false); } -void CPU::NewRec::X64Compiler::Compile_div(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_div(CompileFlags cf) { // not supported without registers for now.. DebugAssert(cf.valid_host_lo && cf.valid_host_hi); @@ -988,7 +1268,7 @@ void CPU::NewRec::X64Compiler::Compile_div(CompileFlags cf) cg->L(done); } -void CPU::NewRec::X64Compiler::Compile_divu(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_divu(CompileFlags cf) { // not supported without registers for now.. DebugAssert(cf.valid_host_lo && cf.valid_host_hi); @@ -1019,7 +1299,7 @@ void CPU::NewRec::X64Compiler::Compile_divu(CompileFlags cf) cg->L(done); } -void CPU::NewRec::X64Compiler::TestOverflow(const Xbyak::Reg32& result) +void CPU::Recompiler::X64Recompiler::TestOverflow(const Xbyak::Reg32& result) { SwitchToFarCode(true, &Xbyak::CodeGenerator::jo); @@ -1035,7 +1315,7 @@ void CPU::NewRec::X64Compiler::TestOverflow(const Xbyak::Reg32& result) SwitchToNearCode(false); } -void CPU::NewRec::X64Compiler::Compile_dst_op( +void CPU::Recompiler::X64Recompiler::Compile_dst_op( CompileFlags cf, void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*op_const)(const Xbyak::Operand&, u32), bool commutative, bool overflow) { @@ -1121,27 +1401,27 @@ void CPU::NewRec::X64Compiler::Compile_dst_op( } } -void CPU::NewRec::X64Compiler::Compile_add(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_add(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::add, &CodeGenerator::add, true, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::NewRec::X64Compiler::Compile_addu(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_addu(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::add, &CodeGenerator::add, true, false); } -void CPU::NewRec::X64Compiler::Compile_sub(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_sub(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::sub, &CodeGenerator::sub, false, g_settings.cpu_recompiler_memory_exceptions); } -void CPU::NewRec::X64Compiler::Compile_subu(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_subu(CompileFlags cf) { Compile_dst_op(cf, &CodeGenerator::sub, &CodeGenerator::sub, false, false); } -void CPU::NewRec::X64Compiler::Compile_and(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_and(CompileFlags cf) { // special cases - and with self -> self, and with 0 -> 0 const Reg32 regd = CFGetRegD(cf); @@ -1159,7 +1439,7 @@ void CPU::NewRec::X64Compiler::Compile_and(CompileFlags cf) Compile_dst_op(cf, &CodeGenerator::and_, &CodeGenerator::and_, true, false); } -void CPU::NewRec::X64Compiler::Compile_or(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_or(CompileFlags cf) { // or/nor with 0 -> no effect const Reg32 regd = CFGetRegD(cf); @@ -1172,7 +1452,7 @@ void CPU::NewRec::X64Compiler::Compile_or(CompileFlags cf) Compile_dst_op(cf, &CodeGenerator::or_, &CodeGenerator::or_, true, false); } -void CPU::NewRec::X64Compiler::Compile_xor(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_xor(CompileFlags cf) { const Reg32 regd = CFGetRegD(cf); if (cf.MipsS() == cf.MipsT()) @@ -1191,23 +1471,23 @@ void CPU::NewRec::X64Compiler::Compile_xor(CompileFlags cf) Compile_dst_op(cf, &CodeGenerator::xor_, &CodeGenerator::xor_, true, false); } -void CPU::NewRec::X64Compiler::Compile_nor(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_nor(CompileFlags cf) { Compile_or(cf); cg->not_(CFGetRegD(cf)); } -void CPU::NewRec::X64Compiler::Compile_slt(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_slt(CompileFlags cf) { Compile_slt(cf, true); } -void CPU::NewRec::X64Compiler::Compile_sltu(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_sltu(CompileFlags cf) { Compile_slt(cf, false); } -void CPU::NewRec::X64Compiler::Compile_slt(CompileFlags cf, bool sign) +void CPU::Recompiler::X64Recompiler::Compile_slt(CompileFlags cf, bool sign) { const Reg32 rd = CFGetRegD(cf); const Reg32 rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; @@ -1233,10 +1513,9 @@ void CPU::NewRec::X64Compiler::Compile_slt(CompileFlags cf, bool sign) sign ? cg->setl(rd.cvt8()) : cg->setb(rd.cvt8()); } -Xbyak::Reg32 -CPU::NewRec::X64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, - const std::optional& address, - const std::optional& reg /* = std::nullopt */) +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::ComputeLoadStoreAddressArg( + CompileFlags cf, const std::optional& address, + const std::optional& reg /* = std::nullopt */) { const u32 imm = inst->i.imm_sext32(); if (cf.valid_host_s && imm == 0 && !reg.has_value()) @@ -1267,8 +1546,8 @@ CPU::NewRec::X64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, } template -Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, bool sign, - bool use_fastmem, const RegAllocFn& dst_reg_alloc) +Xbyak::Reg32 CPU::Recompiler::X64Recompiler::GenerateLoad(const Xbyak::Reg32& addr_reg, MemoryAccessSize size, + bool sign, bool use_fastmem, const RegAllocFn& dst_reg_alloc) { if (use_fastmem) { @@ -1329,20 +1608,20 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg { case MemoryAccessSize::Byte: { - cg->call(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte)); + cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryByte) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - cg->call(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord)); + cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryHalfWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - cg->call(checked ? reinterpret_cast(&Recompiler::Thunks::ReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord)); + cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::ReadMemoryWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord)); } break; } @@ -1398,8 +1677,8 @@ Xbyak::Reg32 CPU::NewRec::X64Compiler::GenerateLoad(const Xbyak::Reg32& addr_reg return dst_reg; } -void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, - MemoryAccessSize size, bool use_fastmem) +void CPU::Recompiler::X64Recompiler::GenerateStore(const Xbyak::Reg32& addr_reg, const Xbyak::Reg32& value_reg, + MemoryAccessSize size, bool use_fastmem) { if (use_fastmem) { @@ -1450,20 +1729,20 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const { case MemoryAccessSize::Byte: { - cg->call(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte)); + cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryByte) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - cg->call(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryHalfWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - cg->call(checked ? reinterpret_cast(&Recompiler::Thunks::WriteMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord)); + cg->call(checked ? reinterpret_cast(&CPU::Recompiler::Thunks::WriteMemoryWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord)); } break; } @@ -1495,8 +1774,8 @@ void CPU::NewRec::X64Compiler::GenerateStore(const Xbyak::Reg32& addr_reg, const } } -void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::X64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1524,8 +1803,8 @@ void CPU::NewRec::X64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize siz } } -void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::X64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1628,8 +1907,8 @@ void CPU::NewRec::X64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize siz } } -void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::X64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -1714,8 +1993,8 @@ void CPU::NewRec::X64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize si } } -void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::X64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const std::optional addr_reg = g_settings.gpu_pgxp_enable ? std::optional(Reg32(AllocateTempHostReg(HR_CALLEE_SAVED))) : @@ -1739,8 +2018,8 @@ void CPU::NewRec::X64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize siz } } -void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::X64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { DebugAssert(size == MemoryAccessSize::Word && !sign); @@ -1819,8 +2098,8 @@ void CPU::NewRec::X64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize siz } } -void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, - const std::optional& address) +void CPU::Recompiler::X64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, + const std::optional& address) { const u32 index = static_cast(inst->r.rt.GetValue()); const auto [ptr, action] = GetGTERegisterPointer(index, false); @@ -1875,7 +2154,7 @@ void CPU::NewRec::X64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize si FreeHostReg(data_backup.getIdx()); } -void CPU::NewRec::X64Compiler::Compile_mtc0(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_mtc0(CompileFlags cf) { const Cop0Reg reg = static_cast(MipsD()); const u32* ptr = GetCop0RegPtr(reg); @@ -1959,7 +2238,7 @@ void CPU::NewRec::X64Compiler::Compile_mtc0(CompileFlags cf) } } -void CPU::NewRec::X64Compiler::Compile_rfe(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_rfe(CompileFlags cf) { // shift mode bits right two, preserving upper bits static constexpr u32 mode_bits_mask = UINT32_C(0b1111); @@ -1974,7 +2253,7 @@ void CPU::NewRec::X64Compiler::Compile_rfe(CompileFlags cf) TestInterrupts(RWARG1); } -void CPU::NewRec::X64Compiler::TestInterrupts(const Xbyak::Reg32& sr) +void CPU::Recompiler::X64Recompiler::TestInterrupts(const Xbyak::Reg32& sr) { // if Iec == 0 then goto no_interrupt Label no_interrupt; @@ -2022,7 +2301,7 @@ void CPU::NewRec::X64Compiler::TestInterrupts(const Xbyak::Reg32& sr) cg->L(no_interrupt); } -void CPU::NewRec::X64Compiler::Compile_mfc2(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_mfc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const Reg rt = inst->r.rt; @@ -2063,7 +2342,7 @@ void CPU::NewRec::X64Compiler::Compile_mfc2(CompileFlags cf) } } -void CPU::NewRec::X64Compiler::Compile_mtc2(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_mtc2(CompileFlags cf) { const u32 index = inst->cop.Cop2Index(); const auto [ptr, action] = GetGTERegisterPointer(index, true); @@ -2137,7 +2416,7 @@ void CPU::NewRec::X64Compiler::Compile_mtc2(CompileFlags cf) } } -void CPU::NewRec::X64Compiler::Compile_cop2(CompileFlags cf) +void CPU::Recompiler::X64Recompiler::Compile_cop2(CompileFlags cf) { TickCount func_ticks; GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); @@ -2149,10 +2428,10 @@ void CPU::NewRec::X64Compiler::Compile_cop2(CompileFlags cf) AddGTETicks(func_ticks); } -u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, - TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, - u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, - bool is_load) +u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, + TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, + u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, + bool is_load) { CodeGenerator acg(thunk_space, thunk_code); CodeGenerator* cg = &acg; @@ -2201,20 +2480,20 @@ u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* { case MemoryAccessSize::Byte: { - cg->call(is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryByte) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryByte)); + cg->call(is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryByte) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryByte)); } break; case MemoryAccessSize::HalfWord: { - cg->call(is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryHalfWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); + cg->call(is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryHalfWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); } break; case MemoryAccessSize::Word: { - cg->call(is_load ? reinterpret_cast(&Recompiler::Thunks::UncheckedReadMemoryWord) : - reinterpret_cast(&Recompiler::Thunks::UncheckedWriteMemoryWord)); + cg->call(is_load ? reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedReadMemoryWord) : + reinterpret_cast(&CPU::Recompiler::Thunks::UncheckedWriteMemoryWord)); } break; } diff --git a/src/core/cpu_newrec_compiler_x64.h b/src/core/cpu_recompiler_x64.h similarity index 97% rename from src/core/cpu_newrec_compiler_x64.h rename to src/core/cpu_recompiler_x64.h index fe2be408f..aa97b9b2c 100644 --- a/src/core/cpu_newrec_compiler_x64.h +++ b/src/core/cpu_recompiler_x64.h @@ -3,19 +3,19 @@ #pragma once -#include "cpu_newrec_compiler.h" +#include "cpu_recompiler.h" #include #ifdef CPU_ARCH_X64 -namespace CPU::NewRec { +namespace CPU::Recompiler { -class X64Compiler final : public Compiler +class X64Recompiler final : public Recompiler { public: - X64Compiler(); - ~X64Compiler() override; + X64Recompiler(); + ~X64Recompiler() override; protected: const char* GetHostRegName(u32 reg) const override; @@ -141,6 +141,6 @@ private: Xbyak::CodeGenerator* cg; }; -} // namespace CPU::NewRec +} // namespace CPU::Recompiler #endif // CPU_ARCH_X64 diff --git a/src/core/imgui_overlays.cpp b/src/core/imgui_overlays.cpp index 7b5e6e951..3c8239506 100644 --- a/src/core/imgui_overlays.cpp +++ b/src/core/imgui_overlays.cpp @@ -430,11 +430,6 @@ void ImGuiManager::DrawPerformanceOverlay(float& position_y, float scale, float text.append_format("{}{}", first ? "" : "/", "CI"); first = false; } - else if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) - { - text.append_format("{}{}", first ? "" : "/", "NR"); - first = false; - } else { if (g_settings.cpu_recompiler_icache) diff --git a/src/core/settings.cpp b/src/core/settings.cpp index c0c3a1641..f3bce23b7 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -1207,13 +1207,11 @@ static constexpr const std::array s_cpu_execution_mode_names = { "Interpreter", "CachedInterpreter", "Recompiler", - "NewRec", }; static constexpr const std::array s_cpu_execution_mode_display_names = { TRANSLATE_DISAMBIG_NOOP("Settings", "Interpreter (Slowest)", "CPUExecutionMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Cached Interpreter (Faster)", "CPUExecutionMode"), TRANSLATE_DISAMBIG_NOOP("Settings", "Recompiler (Fastest)", "CPUExecutionMode"), - TRANSLATE_DISAMBIG_NOOP("Settings", "New Recompiler (Experimental)", "CPUExecutionMode"), }; std::optional Settings::ParseCPUExecutionMode(const char* str) diff --git a/src/core/settings.h b/src/core/settings.h index ef4ff3002..9dbbefbfc 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -486,11 +486,11 @@ struct Settings static constexpr float DEFAULT_GPU_PGXP_DEPTH_THRESHOLD = 300.0f; static constexpr float GPU_PGXP_DEPTH_THRESHOLD_SCALE = 4096.0f; - // Prefer oldrec over newrec for now. Except on RISC-V, where there is no oldrec. -#if defined(CPU_ARCH_RISCV64) - static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::NewRec; -#else + // Prefer recompiler when supported. +#ifdef ENABLE_RECOMPILER static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::Recompiler; +#else + static constexpr CPUExecutionMode DEFAULT_CPU_EXECUTION_MODE = CPUExecutionMode::CachedInterpreter; #endif // LUT still ends up faster on Apple Silicon for now, because of 16K pages. diff --git a/src/core/types.h b/src/core/types.h index 49e517ce2..fae40cffa 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -48,7 +48,6 @@ enum class CPUExecutionMode : u8 Interpreter, CachedInterpreter, Recompiler, - NewRec, Count };