From 1d6c4a3af1474637aaef08cc7c2fbdfbebe13070 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Tue, 19 Nov 2019 20:30:04 +1000 Subject: [PATCH] CPU: Basic recompiler implementation for x64 (lui, ori, addiu) Disabled by default. --- src/core/CMakeLists.txt | 19 + src/core/bus.cpp | 12 +- src/core/bus.h | 44 +- src/core/bus.inl | 4 + src/core/core.vcxproj | 28 +- src/core/core.vcxproj.filters | 12 + src/core/cpu_code_cache.cpp | 313 ++++ src/core/cpu_code_cache.h | 59 + src/core/cpu_core.cpp | 47 +- src/core/cpu_core.h | 16 +- src/core/cpu_disasm.cpp | 21 +- src/core/cpu_recompiler_code_generator.cpp | 589 +++++++ src/core/cpu_recompiler_code_generator.h | 185 ++ .../cpu_recompiler_code_generator_generic.cpp | 21 + .../cpu_recompiler_code_generator_x64.cpp | 1509 +++++++++++++++++ src/core/cpu_recompiler_register_cache.cpp | 604 +++++++ src/core/cpu_recompiler_register_cache.h | 243 +++ src/core/cpu_recompiler_thunks.cpp | 43 + src/core/cpu_recompiler_thunks.h | 38 + src/core/cpu_recompiler_types.h | 53 + src/core/cpu_types.cpp | 196 +++ src/core/cpu_types.h | 73 + src/core/system.cpp | 16 +- src/core/system.h | 2 + src/core/types.h | 6 + 25 files changed, 4104 insertions(+), 49 deletions(-) create mode 100644 src/core/cpu_code_cache.cpp create mode 100644 src/core/cpu_code_cache.h create mode 100644 src/core/cpu_recompiler_code_generator.cpp create mode 100644 src/core/cpu_recompiler_code_generator.h create mode 100644 src/core/cpu_recompiler_code_generator_generic.cpp create mode 100644 src/core/cpu_recompiler_code_generator_x64.cpp create mode 100644 src/core/cpu_recompiler_register_cache.cpp create mode 100644 src/core/cpu_recompiler_register_cache.h create mode 100644 src/core/cpu_recompiler_thunks.cpp create mode 100644 src/core/cpu_recompiler_thunks.h create mode 100644 src/core/cpu_recompiler_types.h create mode 100644 src/core/cpu_types.cpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index bce02fa74..0ac1200eb 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -6,11 +6,22 @@ add_library(core bus.inl cdrom.cpp cdrom.h + cpu_code_cache.cpp + cpu_code_cache.h cpu_core.cpp cpu_core.h cpu_core.inl cpu_disasm.cpp cpu_disasm.h + cpu_recompiler_code_generator.cpp + cpu_recompiler_code_generator.h + cpu_recompiler_code_generator_generic.cpp + cpu_recompiler_register_cache.cpp + cpu_recompiler_register_cache.h + cpu_recompiler_thunks.cpp + cpu_recompiler_thunks.h + cpu_recompiler_types.h + cpu_types.cpp cpu_types.h digital_controller.cpp digital_controller.h @@ -65,3 +76,11 @@ if(WIN32) gpu_hw_d3d11.h ) endif() + +if(${CPU_ARCH} STREQUAL "x64") + target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak") + target_sources(core PRIVATE cpu_recompiler_code_generator_x64.cpp) + message("Building x64 recompiler") +else() + message("Not building recompiler") +endif() diff --git a/src/core/bus.cpp b/src/core/bus.cpp index 506a65e7b..dcd35797d 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -5,6 +5,7 @@ #include "YBaseLib/String.h" #include "cdrom.h" #include "common/state_wrapper.h" +#include "cpu_code_cache.h" #include "cpu_core.h" #include "cpu_disasm.h" #include "dma.h" @@ -32,10 +33,12 @@ Bus::Bus() = default; Bus::~Bus() = default; -void Bus::Initialize(CPU::Core* cpu, DMA* dma, InterruptController* interrupt_controller, GPU* gpu, CDROM* cdrom, - Pad* pad, Timers* timers, SPU* spu, MDEC* mdec) +void Bus::Initialize(CPU::Core* cpu, CPU::CodeCache* cpu_code_cache, DMA* dma, + InterruptController* interrupt_controller, GPU* gpu, CDROM* cdrom, Pad* pad, Timers* timers, + SPU* spu, MDEC* mdec) { m_cpu = cpu; + m_cpu_code_cache = cpu_code_cache; m_dma = dma; m_interrupt_controller = interrupt_controller; m_gpu = gpu; @@ -518,6 +521,11 @@ void Bus::DoWriteSPU(MemoryAccessSize size, u32 offset, u32 value) m_spu->WriteRegister(offset, Truncate16(value)); } +void Bus::DoInvalidateCodeCache(u32 page_index) +{ + m_cpu_code_cache->FlushBlocksWithPageIndex(page_index); +} + u32 Bus::DoReadDMA(MemoryAccessSize size, u32 offset) { return FIXUP_WORD_READ_VALUE(offset, m_dma->ReadRegister(FIXUP_WORD_READ_OFFSET(offset))); diff --git a/src/core/bus.h b/src/core/bus.h index 68eba02a2..880559d40 100644 --- a/src/core/bus.h +++ b/src/core/bus.h @@ -4,12 +4,14 @@ #include "common/bitfield.h" #include "types.h" #include +#include class StateWrapper; namespace CPU { class Core; -} +class CodeCache; +} // namespace CPU class DMA; class InterruptController; @@ -27,8 +29,8 @@ public: Bus(); ~Bus(); - void Initialize(CPU::Core* cpu, DMA* dma, InterruptController* interrupt_controller, GPU* gpu, CDROM* cdrom, Pad* pad, - Timers* timers, SPU* spu, MDEC* mdec); + void Initialize(CPU::Core* cpu, CPU::CodeCache* cpu_code_cache, DMA* dma, InterruptController* interrupt_controller, + GPU* gpu, CDROM* cdrom, Pad* pad, Timers* timers, SPU* spu, MDEC* mdec); void Reset(); bool DoState(StateWrapper& sw); @@ -52,6 +54,34 @@ public: // changing interfaces void SetGPU(GPU* gpu) { m_gpu = gpu; } + /// Returns the address which should be used for code caching (i.e. removes mirrors). + ALWAYS_INLINE static PhysicalMemoryAddress UnmirrorAddress(PhysicalMemoryAddress address) + { + // RAM + if (address < 0x800000) + return address & UINT32_C(0x1FFFFF); + else + return address; + } + + /// Returns true if the address specified is cacheable (RAM or BIOS). + ALWAYS_INLINE static bool IsCacheableAddress(PhysicalMemoryAddress address) + { + return (address < RAM_MIRROR_END) || (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE)); + } + + /// Returns true if the address specified is writable (RAM). + ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address) { return address < RAM_MIRROR_END; } + + /// Flags a RAM region as code, so we know when to invalidate blocks. + ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; } + + /// Unflags a RAM region as code, the code cache will no longer be notified when writes occur. + ALWAYS_INLINE void ClearRAMCodePage(u32 index) { m_ram_code_bits[index] = false; } + + /// Clears all code bits for RAM regions. + ALWAYS_INLINE void ClearRAMCodePageFlags() { m_ram_code_bits.reset(); } + private: enum : u32 { @@ -204,7 +234,10 @@ private: u32 DoReadSPU(MemoryAccessSize size, u32 offset); void DoWriteSPU(MemoryAccessSize size, u32 offset, u32 value); + void DoInvalidateCodeCache(u32 page_index); + CPU::Core* m_cpu = nullptr; + CPU::CodeCache* m_cpu_code_cache = nullptr; DMA* m_dma = nullptr; InterruptController* m_interrupt_controller = nullptr; GPU* m_gpu = nullptr; @@ -220,8 +253,9 @@ private: std::array m_cdrom_access_time = {}; std::array m_spu_access_time = {}; - std::array m_ram{}; // 2MB RAM - std::array m_bios{}; // 512K BIOS ROM + std::bitset m_ram_code_bits{}; + std::array m_ram{}; // 2MB RAM + std::array m_bios{}; // 512K BIOS ROM std::vector m_exp1_rom; MEMCTRL m_MEMCTRL = {}; diff --git a/src/core/bus.inl b/src/core/bus.inl index 22182cd93..6b6ee0e58 100644 --- a/src/core/bus.inl +++ b/src/core/bus.inl @@ -25,6 +25,10 @@ TickCount Bus::DoRAMAccess(u32 offset, u32& value) } else { + const u32 page_index = offset / CPU_CODE_CACHE_PAGE_SIZE; + if (m_ram_code_bits[page_index]) + DoInvalidateCodeCache(page_index); + if constexpr (size == MemoryAccessSize::Byte) { m_ram[offset] = Truncate8(value); diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 6b1338b25..546cc405b 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -40,6 +40,13 @@ + + + + + + + @@ -67,6 +74,11 @@ + + + + + @@ -251,7 +263,7 @@ ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -273,7 +285,7 @@ ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -295,7 +307,7 @@ ENABLE_VOODOO=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) Default true false @@ -320,7 +332,7 @@ ENABLE_VOODOO=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true ProgramDatabase - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) Default true false @@ -344,7 +356,7 @@ MaxSpeed true ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -367,7 +379,7 @@ MaxSpeed true ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) true true stdcpp17 @@ -391,7 +403,7 @@ MaxSpeed true ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) true false stdcpp17 @@ -414,7 +426,7 @@ MaxSpeed true ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) - $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories) + $(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories) true true stdcpp17 diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 09d0fca8e..ff73ce0d0 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -26,6 +26,13 @@ + + + + + + + @@ -57,6 +64,11 @@ + + + + + diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp new file mode 100644 index 000000000..e42edb53a --- /dev/null +++ b/src/core/cpu_code_cache.cpp @@ -0,0 +1,313 @@ +#include "cpu_code_cache.h" +#include "YBaseLib/Log.h" +#include "cpu_core.h" +#include "cpu_disasm.h" +#include "cpu_recompiler_code_generator.h" +#include "cpu_recompiler_thunks.h" +Log_SetChannel(CPU::CodeCache); + +namespace CPU { + +bool USE_CODE_CACHE = false; +bool USE_RECOMPILER = false; + +CodeCache::CodeCache() = default; + +CodeCache::~CodeCache() = default; + +void CodeCache::Initialize(Core* core, Bus* bus) +{ + m_core = core; + m_bus = bus; + + m_code_buffer = std::make_unique(); + m_asm_functions = std::make_unique(); + m_asm_functions->Generate(m_code_buffer.get()); +} + +void CodeCache::Execute() +{ + while (m_core->m_downcount >= 0) + { + if (m_core->HasPendingInterrupt()) + { + // TODO: Fill in m_next_instruction... + m_core->DispatchInterrupt(); + } + + m_current_block = GetNextBlock(); + if (!m_current_block) + { + Log_WarningPrintf("Falling back to uncached interpreter at 0x%08X", m_core->GetRegs().pc); + InterpretUncachedBlock(); + continue; + } + + if (USE_RECOMPILER) + m_current_block->host_code(m_core); + else + InterpretCachedBlock(*m_current_block); + + if (m_current_block_flushed) + { + m_current_block_flushed = false; + delete m_current_block; + } + + m_current_block = nullptr; + } +} + +void CodeCache::Reset() +{ + m_bus->ClearRAMCodePageFlags(); + for (auto& it : m_ram_block_map) + it.clear(); + + m_blocks.clear(); + m_code_buffer->Reset(); +} + +const CPU::CodeBlock* CodeCache::GetNextBlock() +{ + const u32 address = m_bus->UnmirrorAddress(m_core->m_regs.pc & UINT32_C(0x1FFFFFFF)); + + CodeBlockKey key = {}; + key.SetPC(address); + key.user_mode = m_core->InUserMode(); + + BlockMap::iterator iter = m_blocks.find(key.bits); + if (iter != m_blocks.end()) + return iter->second; + + CodeBlock* block = new CodeBlock(); + block->key = key; + if (CompileBlock(block)) + { + // insert into the page map + if (m_bus->IsRAMAddress(address)) + { + const u32 start_page = block->GetStartPageIndex(); + const u32 end_page = block->GetEndPageIndex(); + for (u32 page = start_page; page < end_page; page++) + { + m_ram_block_map[page].push_back(block); + m_bus->SetRAMCodePage(page); + } + } + } + else + { + Log_ErrorPrintf("Failed to compile block at PC=0x%08X", address); + } + + iter = m_blocks.emplace(key.bits, block).first; + return block; +} + +bool CodeCache::CompileBlock(CodeBlock* block) +{ + u32 pc = block->GetPC(); + bool is_branch_delay_slot = false; + bool is_load_delay_slot = false; + + for (;;) + { + CodeBlockInstruction cbi = {}; + if (!m_bus->IsCacheableAddress(pc) || + m_bus->DispatchAccess(pc, cbi.instruction.bits) < 0 || + !IsInvalidInstruction(cbi.instruction)) + { + break; + } + + cbi.pc = pc; + cbi.is_branch = IsBranchInstruction(cbi.instruction); + cbi.is_branch_delay_slot = is_branch_delay_slot; + cbi.is_load_delay_slot = is_load_delay_slot; + cbi.can_trap = CanInstructionTrap(cbi.instruction, m_core->InUserMode()); + + // instruction is decoded now + block->instructions.push_back(cbi); + pc += sizeof(cbi.instruction.bits); + + // if we're in a branch delay slot, the block is now done + // except if this is a branch in a branch delay slot, then we grab the one after that, and so on... + if (is_branch_delay_slot && !cbi.is_branch) + break; + + // if this is a branch, we grab the next instruction (delay slot), and then exit + is_branch_delay_slot = cbi.is_branch; + + // is this a non-branchy exit? (e.g. syscall) + if (IsExitBlockInstruction(cbi.instruction)) + break; + } + + if (!block->instructions.empty()) + { + block->instructions.back().is_last_instruction = true; + +#ifdef _DEBUG + SmallString disasm; + Log_DebugPrintf("Block at 0x%08X", block->GetPC()); + for (const CodeBlockInstruction& cbi : block->instructions) + { + CPU::DisassembleInstruction(&disasm, cbi.pc, cbi.instruction.bits, nullptr); + Log_DebugPrintf("[%s %s 0x%08X] %08X %s", cbi.is_branch_delay_slot ? "BD" : " ", + cbi.is_load_delay_slot ? "LD" : " ", cbi.pc, cbi.instruction.bits, disasm.GetCharArray()); + } +#endif + } + else + { + Log_WarningPrintf("Empty block compiled at 0x%08X", block->key.GetPC()); + return false; + } + + if (USE_RECOMPILER) + { + // Ensure we're not going to run out of space while compiling this block. + if (m_code_buffer->GetFreeCodeSpace() < (block->instructions.size() * Recompiler::MAX_HOST_BYTES_PER_INSTRUCTION)) + { + Log_WarningPrintf("Out of code space, flushing all blocks."); + Reset(); + } + + Recompiler::CodeGenerator codegen(m_core, m_code_buffer.get(), *m_asm_functions.get()); + if (!codegen.CompileBlock(block, &block->host_code, &block->host_code_size)) + { + Log_ErrorPrintf("Failed to compile host code for block at 0x%08X", block->key.GetPC()); + return false; + } + } + + return true; +} + +void CodeCache::FlushBlocksWithPageIndex(u32 page_index) +{ + DebugAssert(page_index < CPU_CODE_CACHE_PAGE_COUNT); + auto& blocks = m_ram_block_map[page_index]; + while (!blocks.empty()) + FlushBlock(blocks.back()); + + m_bus->ClearRAMCodePage(page_index); +} + +void CodeCache::FlushBlock(CodeBlock* block) +{ + BlockMap::iterator iter = m_blocks.find(block->key.GetPC()); + Assert(iter != m_blocks.end() && iter->second == block); + Log_DevPrintf("Flushing block at address 0x%08X", block->GetPC()); + + // remove from the page map + const u32 start_page = block->GetStartPageIndex(); + const u32 end_page = block->GetEndPageIndex(); + for (u32 page = start_page; page < end_page; page++) + { + auto& page_blocks = m_ram_block_map[page]; + auto page_block_iter = std::find(page_blocks.begin(), page_blocks.end(), block); + Assert(page_block_iter != page_blocks.end()); + page_blocks.erase(page_block_iter); + } + + // remove from block map + m_blocks.erase(iter); + + // flushing block currently executing? + if (m_current_block == block) + { + Log_WarningPrintf("Flushing currently-executing block 0x%08X", block->GetPC()); + m_current_block_flushed = true; + } + else + { + delete block; + } +} + +void CodeCache::InterpretCachedBlock(const CodeBlock& block) +{ + // set up the state so we've already fetched the instruction + DebugAssert((m_core->m_regs.pc & PHYSICAL_MEMORY_ADDRESS_MASK) == block.GetPC()); + + for (const CodeBlockInstruction& cbi : block.instructions) + { + m_core->m_pending_ticks += 1; + m_core->m_downcount -= 1; + + // now executing the instruction we previously fetched + m_core->m_current_instruction.bits = cbi.instruction.bits; + m_core->m_current_instruction_pc = m_core->m_regs.pc; + m_core->m_current_instruction_in_branch_delay_slot = cbi.is_branch_delay_slot; + m_core->m_current_instruction_was_branch_taken = m_core->m_branch_was_taken; + m_core->m_branch_was_taken = false; + m_core->m_exception_raised = false; + + // update pc + DebugAssert((m_core->m_regs.pc & PHYSICAL_MEMORY_ADDRESS_MASK) == cbi.pc); + m_core->m_regs.pc = m_core->m_regs.npc; + m_core->m_regs.npc += 4; + + // execute the instruction we previously fetched + m_core->ExecuteInstruction(); + + // next load delay + m_core->m_load_delay_reg = m_core->m_next_load_delay_reg; + m_core->m_next_load_delay_reg = Reg::count; + m_core->m_load_delay_old_value = m_core->m_next_load_delay_old_value; + m_core->m_next_load_delay_old_value = 0; + + if (m_core->m_exception_raised) + break; + } + + // cleanup so the interpreter can kick in if needed + m_core->m_next_instruction_is_branch_delay_slot = false; +} + +void CodeCache::InterpretUncachedBlock() +{ + // At this point, pc contains the last address executed (in the previous block). The instruction has not been fetched + // yet. pc shouldn't be updated until the fetch occurs, that way the exception occurs in the delay slot. + bool in_branch_delay_slot = false; + for (;;) + { + m_core->m_pending_ticks += 1; + m_core->m_downcount -= 1; + + // now executing the instruction we previously fetched + m_core->m_current_instruction.bits = m_core->m_next_instruction.bits; + m_core->m_current_instruction_pc = m_core->m_regs.pc; + m_core->m_current_instruction_in_branch_delay_slot = m_core->m_next_instruction_is_branch_delay_slot; + m_core->m_current_instruction_was_branch_taken = m_core->m_branch_was_taken; + m_core->m_next_instruction_is_branch_delay_slot = false; + m_core->m_branch_was_taken = false; + m_core->m_exception_raised = false; + + // Fetch the next instruction, except if we're in a branch delay slot. The "fetch" is done in the next block. + if (!m_core->FetchInstruction()) + break; + + // execute the instruction we previously fetched + m_core->ExecuteInstruction(); + + // next load delay + m_core->m_load_delay_reg = m_core->m_next_load_delay_reg; + m_core->m_next_load_delay_reg = Reg::count; + m_core->m_load_delay_old_value = m_core->m_next_load_delay_old_value; + m_core->m_next_load_delay_old_value = 0; + + const bool branch = IsBranchInstruction(m_core->m_current_instruction); + if (m_core->m_exception_raised || (!branch && in_branch_delay_slot) || + IsExitBlockInstruction(m_core->m_current_instruction)) + { + break; + } + + in_branch_delay_slot = branch; + } +} + +} // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_code_cache.h b/src/core/cpu_code_cache.h new file mode 100644 index 000000000..f68ea2bb1 --- /dev/null +++ b/src/core/cpu_code_cache.h @@ -0,0 +1,59 @@ +#pragma once +#include "common/bitfield.h" +#include "cpu_types.h" +#include +#include +#include +#include + +class JitCodeBuffer; + +class Bus; + +namespace CPU { +class Core; + +namespace Recompiler { +class ASMFunctions; +} + +class CodeCache +{ +public: + CodeCache(); + ~CodeCache(); + + void Initialize(Core* core, Bus* bus); + void Reset(); + void Execute(); + + /// Flushes all blocks which are in the range of the specified code page. + void FlushBlocksWithPageIndex(u32 page_index); + +private: + using BlockMap = std::unordered_map; + + const CodeBlock* GetNextBlock(); + bool CompileBlock(CodeBlock* block); + void FlushBlock(CodeBlock* block); + void InterpretCachedBlock(const CodeBlock& block); + void InterpretUncachedBlock(); + + Core* m_core; + Bus* m_bus; + + const CodeBlock* m_current_block = nullptr; + bool m_current_block_flushed = false; + + std::unique_ptr m_code_buffer; + std::unique_ptr m_asm_functions; + + BlockMap m_blocks; + + std::array, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map; +}; + +extern bool USE_CODE_CACHE; +extern bool USE_RECOMPILER; + +} // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index ad4d8e2a3..79625bbe1 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -33,7 +33,6 @@ void WriteToExecutionLog(const char* format, ...) va_end(ap); } - Core::Core() = default; Core::~Core() = default; @@ -320,6 +319,7 @@ void Core::RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE) // flush the pipeline - we don't want to execute the previously fetched instruction m_regs.npc = GetExceptionVector(excode); + m_exception_raised = true; FlushPipeline(); } @@ -333,21 +333,25 @@ void Core::ClearExternalInterrupt(u8 bit) m_cop0_regs.cause.Ip &= static_cast(~(1u << bit)); } -bool Core::DispatchInterrupts() +bool Core::HasPendingInterrupt() +{ + // const bool do_interrupt = m_cop0_regs.sr.IEc && ((m_cop0_regs.cause.Ip & m_cop0_regs.sr.Im) != 0); + const bool do_interrupt = + m_cop0_regs.sr.IEc && (((m_cop0_regs.cause.bits & m_cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0); + + return do_interrupt; +} + +void Core::DispatchInterrupt() { // If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next // instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering.. if (m_next_instruction.IsCop2Instruction()) - return false; + return; - // const bool do_interrupt = m_cop0_regs.sr.IEc && ((m_cop0_regs.cause.Ip & m_cop0_regs.sr.Im) != 0); - const bool do_interrupt = - m_cop0_regs.sr.IEc && (((m_cop0_regs.cause.bits & m_cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0); - if (!do_interrupt) - return false; - - RaiseException(Exception::INT); - return true; + // Interrupt raising occurs before the start of the instruction. + RaiseException(Exception::INT, m_regs.pc, m_next_instruction_is_branch_delay_slot, m_branch_was_taken, + m_next_instruction.cop.cop_n); } void Core::FlushLoadDelay() @@ -366,9 +370,15 @@ void Core::FlushPipeline() // not in a branch delay slot m_branch_was_taken = false; m_next_instruction_is_branch_delay_slot = false; + m_current_instruction_pc = m_regs.pc; // prefetch the next instruction FetchInstruction(); + + // and set it as the next one to execute + m_current_instruction.bits = m_next_instruction.bits; + m_current_instruction_in_branch_delay_slot = false; + m_current_instruction_was_branch_taken = false; } u32 Core::ReadReg(Reg rs) @@ -567,6 +577,9 @@ void Core::Execute() { while (m_downcount >= 0) { + if (HasPendingInterrupt()) + DispatchInterrupt(); + m_pending_ticks += 1; m_downcount -= 1; @@ -577,9 +590,10 @@ void Core::Execute() m_current_instruction_was_branch_taken = m_branch_was_taken; m_next_instruction_is_branch_delay_slot = false; m_branch_was_taken = false; + m_exception_raised = false; // fetch the next instruction - if (DispatchInterrupts() || !FetchInstruction()) + if (!FetchInstruction()) continue; #if 0 // GTE flag test debugging @@ -621,19 +635,20 @@ void Core::ExecuteInstruction() const Instruction inst = m_current_instruction; #if 0 - if (inst_pc == 0xBFC06FF0) + if (m_current_instruction_pc == 0xBFC06FF0) { - TRACE_EXECUTION = true; + //TRACE_EXECUTION = true; + LOG_EXECUTION = true; __debugbreak(); } #endif -#ifdef _DEBUG +//#ifdef _DEBUG if (TRACE_EXECUTION) PrintInstruction(inst.bits, m_current_instruction_pc, this); if (LOG_EXECUTION) LogInstruction(inst.bits, m_current_instruction_pc, this); -#endif +//#endif switch (inst.op) { diff --git a/src/core/cpu_core.h b/src/core/cpu_core.h index cc8080419..c0469025b 100644 --- a/src/core/cpu_core.h +++ b/src/core/cpu_core.h @@ -12,6 +12,14 @@ class Bus; namespace CPU { +class CodeCache; + +namespace Recompiler +{ +class CodeGenerator; +class Thunks; +} + class Core { public: @@ -20,6 +28,10 @@ public: static constexpr PhysicalMemoryAddress DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00); static constexpr PhysicalMemoryAddress DCACHE_OFFSET_MASK = UINT32_C(0x000003FF); static constexpr PhysicalMemoryAddress DCACHE_SIZE = UINT32_C(0x00000400); + + friend CodeCache; + friend Recompiler::CodeGenerator; + friend Recompiler::Thunks; Core(); ~Core(); @@ -102,7 +114,8 @@ private: u32 GetExceptionVector(Exception excode) const; void RaiseException(Exception excode); void RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE); - bool DispatchInterrupts(); + bool HasPendingInterrupt(); + void DispatchInterrupt(); // flushes any load delays if present void FlushLoadDelay(); @@ -141,6 +154,7 @@ private: bool m_current_instruction_was_branch_taken = false; bool m_next_instruction_is_branch_delay_slot = false; bool m_branch_was_taken = false; + bool m_exception_raised = false; // load delays Reg m_load_delay_reg = Reg::count; diff --git a/src/core/cpu_disasm.cpp b/src/core/cpu_disasm.cpp index 6acc15ad0..9ae1d3021 100644 --- a/src/core/cpu_disasm.cpp +++ b/src/core/cpu_disasm.cpp @@ -23,10 +23,6 @@ struct TableEntry const char* format; }; -static const std::array s_reg_names = { - {"$zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"}}; - static const std::array s_base_table = {{ "", // 0 "UNKNOWN", // 1 @@ -188,11 +184,10 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core if (std::strncmp(str, "rs", 2) == 0) { - dest->AppendString(s_reg_names[static_cast(inst.r.rs.GetValue())]); + dest->AppendString(GetRegName(inst.r.rs)); if (state) { - comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", - s_reg_names[static_cast(inst.r.rs.GetValue())], + comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", GetRegName(inst.r.rs), state->GetRegs().r[static_cast(inst.r.rs.GetValue())]); } @@ -200,11 +195,10 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core } else if (std::strncmp(str, "rt", 2) == 0) { - dest->AppendString(s_reg_names[static_cast(inst.r.rt.GetValue())]); + dest->AppendString(GetRegName(inst.r.rt)); if (state) { - comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", - s_reg_names[static_cast(inst.r.rt.GetValue())], + comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", GetRegName(inst.r.rt), state->GetRegs().r[static_cast(inst.r.rt.GetValue())]); } @@ -212,11 +206,10 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core } else if (std::strncmp(str, "rd", 2) == 0) { - dest->AppendString(s_reg_names[static_cast(inst.r.rd.GetValue())]); + dest->AppendString(GetRegName(inst.r.rd)); if (state) { - comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", - s_reg_names[static_cast(inst.r.rd.GetValue())], + comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", GetRegName(inst.r.rd), state->GetRegs().r[static_cast(inst.r.rd.GetValue())]); } @@ -247,7 +240,7 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core else if (std::strncmp(str, "offsetrs", 8) == 0) { const s32 offset = static_cast(inst.i.imm_sext32()); - dest->AppendFormattedString("%d(%s)", offset, s_reg_names[static_cast(inst.i.rs.GetValue())]); + dest->AppendFormattedString("%d(%s)", offset, GetRegName(inst.i.rs)); if (state) { comment.AppendFormattedString("%saddr=0x%08X", comment.IsEmpty() ? "" : ", ", diff --git a/src/core/cpu_recompiler_code_generator.cpp b/src/core/cpu_recompiler_code_generator.cpp new file mode 100644 index 000000000..13da8d3b5 --- /dev/null +++ b/src/core/cpu_recompiler_code_generator.cpp @@ -0,0 +1,589 @@ +#include "cpu_recompiler_code_generator.h" +#include "YBaseLib/Log.h" +#include "cpu_disasm.h" +Log_SetChannel(CPU::Recompiler); + +namespace CPU::Recompiler { + +CodeGenerator::CodeGenerator(Core* cpu, JitCodeBuffer* code_buffer, const ASMFunctions& asm_functions) + : m_cpu(cpu), m_code_buffer(code_buffer), m_asm_functions(asm_functions), m_register_cache(*this), + m_emit(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()) +{ + InitHostRegs(); +} + +CodeGenerator::~CodeGenerator() = default; + +u32 CodeGenerator::CalculateRegisterOffset(Reg reg) +{ + return uint32(offsetof(Core, m_regs.r[0]) + (static_cast(reg) * sizeof(u32))); +} + +bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, + u32* out_host_code_size) +{ + // TODO: Align code buffer. + + m_block = block; + m_block_start = block->instructions.data(); + m_block_end = block->instructions.data() + block->instructions.size(); + + m_current_instruction_in_branch_delay_slot_dirty = true; + m_branch_was_taken_dirty = true; + m_current_instruction_was_branch_taken_dirty = false; + m_load_delay_dirty = true; + + EmitBeginBlock(); + BlockPrologue(); + + const CodeBlockInstruction* cbi = m_block_start; + while (cbi != m_block_end) + { +#ifndef Y_BUILD_CONFIG_RELEASE + SmallString disasm; + DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits, nullptr); + Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray()); +#endif + + if (!CompileInstruction(*cbi)) + { + m_block_end = nullptr; + m_block_start = nullptr; + m_block = nullptr; + return false; + } + + cbi++; + } + + BlockEpilogue(); + EmitEndBlock(); + + FinalizeBlock(out_host_code, out_host_code_size); + + DebugAssert(m_register_cache.GetUsedHostRegisters() == 0); + + m_block_end = nullptr; + m_block_start = nullptr; + m_block = nullptr; + return true; +} + +bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi) +{ + bool result; + switch (cbi.instruction.op) + { +#if 1 + case InstructionOp::lui: + result = Compile_lui(cbi); + break; + + case InstructionOp::ori: + result = Compile_ori(cbi); + break; + + case InstructionOp::addiu: + result = Compile_addiu(cbi); + break; + + case InstructionOp::funct: + { + switch (cbi.instruction.r.funct) + { + case InstructionFunct::sll: + result = Compile_sll(cbi); + break; + + default: + result = Compile_Fallback(cbi); + break; + } + } + break; +#endif + + default: + result = Compile_Fallback(cbi); + break; + } + + // release temporary effective addresses + for (Value& value : m_operand_memory_addresses) + value.ReleaseAndClear(); + + return result; +} + +Value CodeGenerator::ConvertValueSize(const Value& value, RegSize size, bool sign_extend) +{ + DebugAssert(value.size != size); + + if (value.IsConstant()) + { + // compile-time conversion, woo! + switch (size) + { + case RegSize_8: + return Value::FromConstantU8(value.constant_value & 0xFF); + + case RegSize_16: + { + switch (value.size) + { + case RegSize_8: + return Value::FromConstantU16(sign_extend ? SignExtend16(Truncate8(value.constant_value)) : + ZeroExtend16(Truncate8(value.constant_value))); + + default: + return Value::FromConstantU16(value.constant_value & 0xFFFF); + } + } + break; + + case RegSize_32: + { + switch (value.size) + { + case RegSize_8: + return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate8(value.constant_value)) : + ZeroExtend32(Truncate8(value.constant_value))); + case RegSize_16: + return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate16(value.constant_value)) : + ZeroExtend32(Truncate16(value.constant_value))); + + case RegSize_32: + return value; + + default: + break; + } + } + break; + + default: + break; + } + + UnreachableCode(); + return Value{}; + } + + Value new_value = m_register_cache.AllocateScratch(size); + if (size < value.size) + { + EmitCopyValue(new_value.host_reg, value); + } + else + { + if (sign_extend) + EmitSignExtend(new_value.host_reg, size, value.host_reg, value.size); + else + EmitZeroExtend(new_value.host_reg, size, value.host_reg, value.size); + } + + return new_value; +} + +void CodeGenerator::ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend) +{ + DebugAssert(value->size != size); + + // We don't want to mess up the register cache value, so generate a new value if it's not scratch. + if (value->IsConstant() || !value->IsScratch()) + { + *value = ConvertValueSize(*value, size, sign_extend); + return; + } + + DebugAssert(value->IsInHostRegister() && value->IsScratch()); + + // If the size is smaller and the value is in a register, we can just "view" the lower part. + if (size < value->size) + { + value->size = size; + } + else + { + if (sign_extend) + EmitSignExtend(value->host_reg, size, value->host_reg, value->size); + else + EmitZeroExtend(value->host_reg, size, value->host_reg, value->size); + } + + value->size = size; +} + +Value CodeGenerator::AddValues(const Value& lhs, const Value& rhs) +{ + DebugAssert(lhs.size == rhs.size); + if (lhs.IsConstant() && rhs.IsConstant()) + { + // compile-time + u64 new_cv = lhs.constant_value + rhs.constant_value; + switch (lhs.size) + { + case RegSize_8: + return Value::FromConstantU8(Truncate8(new_cv)); + + case RegSize_16: + return Value::FromConstantU16(Truncate16(new_cv)); + + case RegSize_32: + return Value::FromConstantU32(Truncate32(new_cv)); + + case RegSize_64: + return Value::FromConstantU64(new_cv); + + default: + return Value(); + } + } + + Value res = m_register_cache.AllocateScratch(lhs.size); + if (lhs.HasConstantValue(0)) + { + EmitCopyValue(res.host_reg, rhs); + return res; + } + else if (rhs.HasConstantValue(0)) + { + EmitCopyValue(res.host_reg, lhs); + return res; + } + else + { + EmitCopyValue(res.host_reg, lhs); + EmitAdd(res.host_reg, rhs); + return res; + } +} + +Value CodeGenerator::ShlValues(const Value& lhs, const Value& rhs) +{ + DebugAssert(lhs.size == rhs.size); + if (lhs.IsConstant() && rhs.IsConstant()) + { + // compile-time + u64 new_cv = lhs.constant_value << rhs.constant_value; + switch (lhs.size) + { + case RegSize_8: + return Value::FromConstantU8(Truncate8(new_cv)); + + case RegSize_16: + return Value::FromConstantU16(Truncate16(new_cv)); + + case RegSize_32: + return Value::FromConstantU32(Truncate32(new_cv)); + + case RegSize_64: + return Value::FromConstantU64(new_cv); + + default: + return Value(); + } + } + + Value res = m_register_cache.AllocateScratch(lhs.size); + EmitCopyValue(res.host_reg, lhs); + if (!rhs.HasConstantValue(0)) + EmitShl(res.host_reg, res.size, rhs); + return res; +} + +Value CodeGenerator::OrValues(const Value& lhs, const Value& rhs) +{ + DebugAssert(lhs.size == rhs.size); + if (lhs.IsConstant() && rhs.IsConstant()) + { + // compile-time + u64 new_cv = lhs.constant_value | rhs.constant_value; + switch (lhs.size) + { + case RegSize_8: + return Value::FromConstantU8(Truncate8(new_cv)); + + case RegSize_16: + return Value::FromConstantU16(Truncate16(new_cv)); + + case RegSize_32: + return Value::FromConstantU32(Truncate32(new_cv)); + + case RegSize_64: + return Value::FromConstantU64(new_cv); + + default: + return Value(); + } + } + + Value res = m_register_cache.AllocateScratch(lhs.size); + EmitCopyValue(res.host_reg, lhs); + if (lhs.HasConstantValue(0)) + { + EmitCopyValue(res.host_reg, rhs); + return res; + } + else if (rhs.HasConstantValue(0)) + { + EmitCopyValue(res.host_reg, lhs); + return res; + } + + EmitCopyValue(res.host_reg, lhs); + EmitOr(res.host_reg, rhs); + return res; +} + +void CodeGenerator::BlockPrologue() +{ + EmitStoreCPUStructField(offsetof(Core, m_exception_raised), Value::FromConstantU8(0)); + + // fetching of the first instruction... + + // sync m_current_instruction_pc so we can simply add to it + SyncCurrentInstructionPC(); + + // and the same for m_regs.pc + SyncPC(); + + EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(4)); +} + +void CodeGenerator::BlockEpilogue() +{ +#if defined(_DEBUG) && defined(Y_CPU_X64) + m_emit.nop(); +#endif + + m_register_cache.FlushAllGuestRegisters(true, false); + + // if the last instruction wasn't a fallback, we need to add its fetch + if (m_delayed_pc_add > 0) + { + EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(m_delayed_pc_add)); + m_delayed_pc_add = 0; + } + + AddPendingCycles(); + + // TODO: correct value for is_branch_delay_slot - branches in branch delay slot. + EmitStoreCPUStructField(offsetof(Core, m_next_instruction_is_branch_delay_slot), Value::FromConstantU8(0)); +} + +void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, + bool force_sync /* = false */) +{ +#if defined(_DEBUG) && defined(Y_CPU_X64) + m_emit.nop(); +#endif + + // reset dirty flags + if (m_branch_was_taken_dirty) + { + Value temp = m_register_cache.AllocateScratch(RegSize_8); + EmitLoadCPUStructField(temp.host_reg, RegSize_8, offsetof(Core, m_branch_was_taken)); + EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), temp); + EmitStoreCPUStructField(offsetof(Core, m_branch_was_taken), Value::FromConstantU8(0)); + m_current_instruction_was_branch_taken_dirty = true; + m_branch_was_taken_dirty = false; + } + else if (m_current_instruction_was_branch_taken_dirty) + { + EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), Value::FromConstantU8(0)); + m_current_instruction_was_branch_taken_dirty = false; + } + + if (m_current_instruction_in_branch_delay_slot_dirty && !cbi.is_branch_delay_slot) + { + EmitStoreCPUStructField(offsetof(Core, m_current_instruction_in_branch_delay_slot), Value::FromConstantU8(0)); + m_current_instruction_in_branch_delay_slot_dirty = false; + } + + if (cbi.is_branch_delay_slot) + { + // m_regs.pc should be synced for the next block, as the branch wrote to npc + SyncCurrentInstructionPC(); + SyncPC(); + + // m_current_instruction_in_branch_delay_slot = true + EmitStoreCPUStructField(offsetof(Core, m_current_instruction_in_branch_delay_slot), Value::FromConstantU8(1)); + m_current_instruction_in_branch_delay_slot_dirty = true; + } + + if (!CanInstructionTrap(cbi.instruction, m_block->key.user_mode) && !force_sync) + { + // Defer updates for non-faulting instructions. + m_delayed_pc_add += INSTRUCTION_SIZE; + m_delayed_cycles_add += cycles; + return; + } + + if (m_delayed_pc_add > 0) + { + // m_current_instruction_pc += m_delayed_pc_add + EmitAddCPUStructField(offsetof(Core, m_current_instruction_pc), Value::FromConstantU32(m_delayed_pc_add)); + + // m_regs.pc += m_delayed_pc_add + EmitAddCPUStructField(offsetof(Core, m_regs.pc), Value::FromConstantU32(m_delayed_pc_add)); + + // m_regs.npc += m_delayed_pc_add + // TODO: This can go once we recompile branch instructions and unconditionally set npc + EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(m_delayed_pc_add)); + + m_delayed_pc_add = 0; + } + + if (!cbi.is_branch) + m_delayed_pc_add = INSTRUCTION_SIZE; + + m_delayed_cycles_add += cycles; + AddPendingCycles(); +} + +void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi) +{ + // copy if the previous instruction was a load, reset the current value on the next instruction + if (m_load_delay_dirty) + { + // cpu->m_load_delay_reg = cpu->m_next_load_delay_reg; + // cpu->m_next_load_delay_reg = Reg::count; + { + Value temp = m_register_cache.AllocateScratch(RegSize_8); + EmitLoadCPUStructField(temp.host_reg, RegSize_8, offsetof(Core, m_next_load_delay_reg)); + EmitStoreCPUStructField(offsetof(Core, m_next_load_delay_reg), + Value::FromConstantU8(static_cast(Reg::count))); + EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), temp); + } + + // cpu->m_load_delay_old_value = cpu->m_next_load_delay_old_value; + // cpu->m_next_load_delay_old_value = 0; + { + Value temp = m_register_cache.AllocateScratch(RegSize_32); + EmitLoadCPUStructField(temp.host_reg, RegSize_32, offsetof(Core, m_next_load_delay_old_value)); + EmitStoreCPUStructField(offsetof(Core, m_next_load_delay_old_value), Value::FromConstantU32(0)); + EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), temp); + } + + m_load_delay_dirty = false; + m_next_load_delay_dirty = true; + } + else if (m_next_load_delay_dirty) + { + // cpu->m_load_delay_reg = Reg::count; + // cpu->m_load_delay_old_value = 0; + EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), Value::FromConstantU8(static_cast(Reg::count))); + EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), Value::FromConstantU32(0)); + + m_next_load_delay_dirty = false; + } +} + +void CodeGenerator::SyncCurrentInstructionPC() +{ + // m_current_instruction_pc = m_regs.pc + Value pc_value = m_register_cache.AllocateScratch(RegSize_32); + EmitLoadCPUStructField(pc_value.host_reg, RegSize_32, offsetof(Core, m_regs.pc)); + EmitStoreCPUStructField(offsetof(Core, m_current_instruction_pc), pc_value); +} + +void CodeGenerator::SyncPC() +{ + // m_regs.pc = m_regs.npc + Value npc_value = m_register_cache.AllocateScratch(RegSize_32); + EmitLoadCPUStructField(npc_value.host_reg, RegSize_32, offsetof(Core, m_regs.npc)); + EmitStoreCPUStructField(offsetof(Core, m_regs.pc), npc_value); +} + +void CodeGenerator::AddPendingCycles() +{ + if (m_delayed_cycles_add == 0) + return; + + EmitAddCPUStructField(offsetof(Core, m_pending_ticks), Value::FromConstantU32(m_delayed_cycles_add)); + EmitAddCPUStructField(offsetof(Core, m_downcount), Value::FromConstantU32(~u32(m_delayed_cycles_add - 1))); + m_delayed_cycles_add = 0; +} + +bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1, true); + + // flush and invalidate all guest registers, since the fallback could change any of them + m_register_cache.FlushAllGuestRegisters(true, true); + + EmitStoreCPUStructField(offsetof(Core, m_current_instruction.bits), Value::FromConstantU32(cbi.instruction.bits)); + + // emit the function call + if (CanInstructionTrap(cbi.instruction, m_block->key.user_mode)) + { + // TODO: Use carry flag or something here too + Value return_value = m_register_cache.AllocateScratch(RegSize_8); + EmitFunctionCall(&return_value, &Thunks::InterpretInstruction, m_register_cache.GetCPUPtr()); + EmitBlockExitOnBool(return_value); + } + else + { + EmitFunctionCall(nullptr, &Thunks::InterpretInstruction, m_register_cache.GetCPUPtr()); + } + + m_current_instruction_in_branch_delay_slot_dirty = cbi.is_branch; + m_branch_was_taken_dirty = cbi.is_branch; + m_load_delay_dirty = true; + InstructionEpilogue(cbi); + return true; +} + +bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + // rt <- (imm << 16) + m_register_cache.WriteGuestRegister(cbi.instruction.i.rt, + Value::FromConstantU32(cbi.instruction.i.imm_zext32() << 16)); + + InstructionEpilogue(cbi); + return true; +} + +bool CodeGenerator::Compile_ori(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + // rt <- rs | zext(imm) + m_register_cache.WriteGuestRegister(cbi.instruction.i.rt, + OrValues(m_register_cache.ReadGuestRegister(cbi.instruction.i.rs), + Value::FromConstantU32(cbi.instruction.i.imm_zext32()))); + + InstructionEpilogue(cbi); + return true; +} + +bool CodeGenerator::Compile_sll(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + // rd <- rt << shamt + m_register_cache.WriteGuestRegister(cbi.instruction.r.rd, + ShlValues(m_register_cache.ReadGuestRegister(cbi.instruction.r.rt), + Value::FromConstantU32(cbi.instruction.r.shamt))); + + InstructionEpilogue(cbi); + return true; +} + +bool CodeGenerator::Compile_addiu(const CodeBlockInstruction& cbi) +{ + InstructionPrologue(cbi, 1); + + // rt <- rs + sext(imm) + m_register_cache.WriteGuestRegister(cbi.instruction.i.rt, + AddValues(m_register_cache.ReadGuestRegister(cbi.instruction.i.rs), + Value::FromConstantU32(cbi.instruction.i.imm_sext32()))); + + InstructionEpilogue(cbi); + return true; +} + +} // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_code_generator.h b/src/core/cpu_recompiler_code_generator.h new file mode 100644 index 000000000..effff6b0a --- /dev/null +++ b/src/core/cpu_recompiler_code_generator.h @@ -0,0 +1,185 @@ +#pragma once +#include +#include +#include + +#include "common/jit_code_buffer.h" + +#include "cpu_recompiler_register_cache.h" +#include "cpu_recompiler_thunks.h" +#include "cpu_recompiler_types.h" +#include "cpu_types.h" + +// ABI selection +#if defined(Y_CPU_X64) +#if defined(Y_PLATFORM_WINDOWS) +#define ABI_WIN64 1 +#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_OSX) +#define ABI_SYSV 1 +#else +#error Unknown ABI. +#endif +#endif + +namespace CPU::Recompiler { + +class CodeGenerator +{ +public: + CodeGenerator(Core* cpu, JitCodeBuffer* code_buffer, const ASMFunctions& asm_functions); + ~CodeGenerator(); + + static u32 CalculateRegisterOffset(Reg reg); + static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize); + static void AlignCodeBuffer(JitCodeBuffer* code_buffer); + + RegisterCache& GetRegisterCache() { return m_register_cache; } + CodeEmitter& GetCodeEmitter() { return m_emit; } + + bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size); + + ////////////////////////////////////////////////////////////////////////// + // Code Generation + ////////////////////////////////////////////////////////////////////////// + void EmitBeginBlock(); + void EmitEndBlock(); + void EmitBlockExitOnBool(const Value& value); + void FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size); + + void EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size); + void EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size); + void EmitCopyValue(HostReg to_reg, const Value& value); + void EmitAdd(HostReg to_reg, const Value& value); + void EmitSub(HostReg to_reg, const Value& value); + void EmitCmp(HostReg to_reg, const Value& value); + void EmitInc(HostReg to_reg, RegSize size); + void EmitDec(HostReg to_reg, RegSize size); + void EmitShl(HostReg to_reg, RegSize size, const Value& amount_value); + void EmitShr(HostReg to_reg, RegSize size, const Value& amount_value); + void EmitSar(HostReg to_reg, RegSize size, const Value& amount_value); + void EmitAnd(HostReg to_reg, const Value& value); + void EmitOr(HostReg to_reg, const Value& value); + void EmitXor(HostReg to_reg, const Value& value); + void EmitTest(HostReg to_reg, const Value& value); + void EmitNot(HostReg to_reg, RegSize size); + + void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg); + void EmitStoreGuestRegister(Reg guest_reg, const Value& value); + void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset); + void EmitStoreCPUStructField(u32 offset, const Value& value); + void EmitAddCPUStructField(u32 offset, const Value& value); + + u32 PrepareStackForCall(); + void RestoreStackAfterCall(u32 adjust_size); + + void EmitFunctionCallPtr(Value* return_value, const void* ptr); + void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1); + void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2); + void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, + const Value& arg3); + void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, + const Value& arg3, const Value& arg4); + + template + void EmitFunctionCall(Value* return_value, const FunctionType ptr) + { + EmitFunctionCallPtr(return_value, reinterpret_cast(ptr)); + } + + template + void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1) + { + EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1); + } + + template + void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2) + { + EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1, arg2); + } + + template + void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2, + const Value& arg3) + { + EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1, arg2, arg3); + } + + template + void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2, + const Value& arg3, const Value& arg4) + { + EmitFunctionCallPtr(return_value, reinterpret_cast(ptr), arg1, arg2, arg3, arg4); + } + + // Host register saving. + void EmitPushHostReg(HostReg reg); + void EmitPopHostReg(HostReg reg); + + // Flags copying from host. +#if defined(Y_CPU_X64) + void ReadFlagsFromHost(Value* value); + Value ReadFlagsFromHost(); +#endif + + // Value ops + Value AddValues(const Value& lhs, const Value& rhs); + Value MulValues(const Value& lhs, const Value& rhs); + Value ShlValues(const Value& lhs, const Value& rhs); + Value OrValues(const Value& lhs, const Value& rhs); + +private: + // Host register setup + void InitHostRegs(); + + Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend); + void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend); + + ////////////////////////////////////////////////////////////////////////// + // Code Generation Helpers + ////////////////////////////////////////////////////////////////////////// + // branch target, memory address, etc + void BlockPrologue(); + void BlockEpilogue(); + void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles, + bool force_sync = false); + void InstructionEpilogue(const CodeBlockInstruction& cbi); + void SyncCurrentInstructionPC(); + void SyncPC(); + void AddPendingCycles(); + + ////////////////////////////////////////////////////////////////////////// + // Instruction Code Generators + ////////////////////////////////////////////////////////////////////////// + bool CompileInstruction(const CodeBlockInstruction& cbi); + bool Compile_Fallback(const CodeBlockInstruction& cbi); + bool Compile_lui(const CodeBlockInstruction& cbi); + bool Compile_ori(const CodeBlockInstruction& cbi); + bool Compile_sll(const CodeBlockInstruction& cbi); + bool Compile_addiu(const CodeBlockInstruction& cbi); + + Core* m_cpu; + JitCodeBuffer* m_code_buffer; + const ASMFunctions& m_asm_functions; + const CodeBlock* m_block = nullptr; + const CodeBlockInstruction* m_block_start = nullptr; + const CodeBlockInstruction* m_block_end = nullptr; + RegisterCache m_register_cache; + CodeEmitter m_emit; + + u32 m_delayed_pc_add = 0; + TickCount m_delayed_cycles_add = 0; + + std::array m_operand_memory_addresses{}; + + Xbyak::Label m_block_exit_label; + + // whether various flags need to be reset. + bool m_current_instruction_in_branch_delay_slot_dirty = false; + bool m_branch_was_taken_dirty = false; + bool m_current_instruction_was_branch_taken_dirty = false; + bool m_next_load_delay_dirty = false; + bool m_load_delay_dirty = false; +}; + +} // namespace CPU_X86::Recompiler diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp new file mode 100644 index 000000000..a7628b12f --- /dev/null +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -0,0 +1,21 @@ +#include "cpu_recompiler_code_generator.h" + +namespace CPU::Recompiler { + +#if !defined(Y_CPU_X64) +void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer) {} +#endif + +void CodeGenerator::EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg) +{ + EmitLoadCPUStructField(host_reg, RegSize_32, CalculateRegisterOffset(guest_reg)); +} + +void CodeGenerator::EmitStoreGuestRegister(Reg guest_reg, const Value& value) +{ + DebugAssert(value.size == RegSize_32); + EmitStoreCPUStructField(CalculateRegisterOffset(guest_reg), value); +} + + +} // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_recompiler_code_generator_x64.cpp b/src/core/cpu_recompiler_code_generator_x64.cpp new file mode 100644 index 000000000..4f47d8f68 --- /dev/null +++ b/src/core/cpu_recompiler_code_generator_x64.cpp @@ -0,0 +1,1509 @@ +#include "cpu_recompiler_code_generator.h" +#include "cpu_recompiler_thunks.h" + +namespace CPU::Recompiler { + +#if defined(ABI_WIN64) +constexpr HostReg RCPUPTR = Xbyak::Operand::RBP; +constexpr HostReg RRETURN = Xbyak::Operand::RAX; +constexpr HostReg RARG1 = Xbyak::Operand::RCX; +constexpr HostReg RARG2 = Xbyak::Operand::RDX; +constexpr HostReg RARG3 = Xbyak::Operand::R8; +constexpr HostReg RARG4 = Xbyak::Operand::R9; +constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 32; +constexpr u64 FUNCTION_CALL_STACK_ALIGNMENT = 16; +#elif defined(ABI_SYSV) +constexpr HostReg RCPUPTR = Xbyak::Operand::RBP; +constexpr HostReg RRETURN = Xbyak::Operand::RAX; +constexpr HostReg RARG1 = Xbyak::Operand::RDI; +constexpr HostReg RARG2 = Xbyak::Operand::RSI; +constexpr HostReg RARG3 = Xbyak::Operand::RDX; +constexpr HostReg RARG4 = Xbyak::Operand::RCX; +constexpr u32 FUNCTION_CALL_SHADOW_SPACE = 0; +constexpr u64 FUNCTION_CALL_STACK_ALIGNMENT = 16; +#endif + +static const Xbyak::Reg8 GetHostReg8(HostReg reg) +{ + return Xbyak::Reg8(reg, reg >= Xbyak::Operand::SPL); +} + +static const Xbyak::Reg8 GetHostReg8(const Value& value) +{ + DebugAssert(value.size == RegSize_8 && value.IsInHostRegister()); + return Xbyak::Reg8(value.host_reg, value.host_reg >= Xbyak::Operand::SPL); +} + +static const Xbyak::Reg16 GetHostReg16(HostReg reg) +{ + return Xbyak::Reg16(reg); +} + +static const Xbyak::Reg16 GetHostReg16(const Value& value) +{ + DebugAssert(value.size == RegSize_16 && value.IsInHostRegister()); + return Xbyak::Reg16(value.host_reg); +} + +static const Xbyak::Reg32 GetHostReg32(HostReg reg) +{ + return Xbyak::Reg32(reg); +} + +static const Xbyak::Reg32 GetHostReg32(const Value& value) +{ + DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); + return Xbyak::Reg32(value.host_reg); +} + +static const Xbyak::Reg64 GetHostReg64(HostReg reg) +{ + return Xbyak::Reg64(reg); +} + +static const Xbyak::Reg64 GetHostReg64(const Value& value) +{ + DebugAssert(value.size == RegSize_64 && value.IsInHostRegister()); + return Xbyak::Reg64(value.host_reg); +} + +static const Xbyak::Reg64 GetCPUPtrReg() +{ + return GetHostReg64(RCPUPTR); +} + +const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/) +{ + static constexpr std::array reg8_names = { + {"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"}}; + static constexpr std::array reg16_names = { + {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"}}; + static constexpr std::array reg32_names = {{"eax", "ecx", "edx", "ebx", "esp", "ebp", + "esi", "edi", "r8d", "r9d", "r10d", "r11d", + "r12d", "r13d", "r14d", "r15d"}}; + static constexpr std::array reg64_names = { + {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}}; + if (reg >= static_cast(HostReg_Count)) + return ""; + + switch (size) + { + case RegSize_8: + return reg8_names[reg]; + case RegSize_16: + return reg16_names[reg]; + case RegSize_32: + return reg32_names[reg]; + case RegSize_64: + return reg64_names[reg]; + default: + return ""; + } +} + +void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer) +{ + code_buffer->Align(16, 0x90); +} + +void CodeGenerator::InitHostRegs() +{ +#if defined(ABI_WIN64) + // TODO: function calls mess up the parameter registers if we use them.. fix it + // allocate nonvolatile before volatile + m_register_cache.SetHostRegAllocationOrder( + {Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI, /*Xbyak::Operand::RSP, */ + Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, /*Xbyak::Operand::RCX, + Xbyak::Operand::RDX, Xbyak::Operand::R8, Xbyak::Operand::R9, */ + Xbyak::Operand::R10, Xbyak::Operand::R11, + /*Xbyak::Operand::RAX*/}); + m_register_cache.SetCallerSavedHostRegs({Xbyak::Operand::RAX, Xbyak::Operand::RCX, Xbyak::Operand::RDX, + Xbyak::Operand::R8, Xbyak::Operand::R9, Xbyak::Operand::R10, + Xbyak::Operand::R11}); + m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, + Xbyak::Operand::RSI, Xbyak::Operand::RSP, Xbyak::Operand::R12, + Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15}); + m_register_cache.SetCPUPtrHostReg(RCPUPTR); +#elif defined(ABI_SYSV) + m_register_cache.SetHostRegAllocationOrder( + {Xbyak::Operand::RBX, /*Xbyak::Operand::RSP, */ Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13, + Xbyak::Operand::R14, Xbyak::Operand::R15, + /*Xbyak::Operand::RAX, */ /*Xbyak::Operand::RDI, */ /*Xbyak::Operand::RSI, */ + /*Xbyak::Operand::RDX, */ /*Xbyak::Operand::RCX, */ Xbyak::Operand::R8, Xbyak::Operand::R9, Xbyak::Operand::R10, + Xbyak::Operand::R11}); + m_register_cache.SetCallerSavedHostRegs({Xbyak::Operand::RAX, Xbyak::Operand::RDI, Xbyak::Operand::RSI, + Xbyak::Operand::RDX, Xbyak::Operand::RCX, Xbyak::Operand::R8, + Xbyak::Operand::R9, Xbyak::Operand::R10, Xbyak::Operand::R11}); + m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RSP, Xbyak::Operand::RBP, + Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, + Xbyak::Operand::R15}); + m_register_cache.SetCPUPtrHostReg(RCPUPTR); +#endif +} + +void CodeGenerator::EmitBeginBlock() +{ + // Store the CPU struct pointer. + const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR); + DebugAssert(cpu_reg_allocated); + m_emit.mov(GetCPUPtrReg(), GetHostReg64(RARG1)); +} + +void CodeGenerator::EmitEndBlock() +{ + m_emit.L(m_block_exit_label); + m_register_cache.FreeHostReg(RCPUPTR); + m_register_cache.PopCalleeSavedRegisters(); + + m_emit.ret(); +} + +void CodeGenerator::EmitBlockExitOnBool(const Value& value) +{ + Assert(!value.IsConstant() && value.IsInHostRegister()); + + Xbyak::Label continue_label; + m_emit.test(GetHostReg8(value), GetHostReg8(value)); + m_emit.jz(continue_label); + + // flush current state + m_register_cache.FlushAllGuestRegisters(false, false); + m_emit.jmp(m_block_exit_label, Xbyak::CodeGenerator::T_NEAR); + + m_emit.L(continue_label); +} + +void CodeGenerator::FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size) +{ + m_emit.ready(); + + const u32 size = static_cast(m_emit.getSize()); + *out_host_code = m_emit.getCode(); + *out_host_code_size = size; + m_code_buffer->CommitCode(size); + m_emit.reset(); +} + +void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) +{ + switch (to_size) + { + case RegSize_16: + { + switch (from_size) + { + case RegSize_8: + m_emit.movsx(GetHostReg16(to_reg), GetHostReg8(from_reg)); + return; + } + } + break; + + case RegSize_32: + { + switch (from_size) + { + case RegSize_8: + m_emit.movsx(GetHostReg32(to_reg), GetHostReg8(from_reg)); + return; + case RegSize_16: + m_emit.movsx(GetHostReg32(to_reg), GetHostReg16(from_reg)); + return; + } + } + break; + } + + Panic("Unknown sign-extend combination"); +} + +void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) +{ + switch (to_size) + { + case RegSize_16: + { + switch (from_size) + { + case RegSize_8: + m_emit.movzx(GetHostReg16(to_reg), GetHostReg8(from_reg)); + return; + } + } + break; + + case RegSize_32: + { + switch (from_size) + { + case RegSize_8: + m_emit.movzx(GetHostReg32(to_reg), GetHostReg8(from_reg)); + return; + case RegSize_16: + m_emit.movzx(GetHostReg32(to_reg), GetHostReg16(from_reg)); + return; + } + } + break; + } + + Panic("Unknown sign-extend combination"); +} + +void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value) +{ + // TODO: mov x, 0 -> xor x, x + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + + switch (value.size) + { + case RegSize_8: + { + if (value.HasConstantValue(0)) + m_emit.xor_(GetHostReg8(to_reg), GetHostReg8(to_reg)); + else if (value.IsConstant()) + m_emit.mov(GetHostReg8(to_reg), value.constant_value); + else + m_emit.mov(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); + } + break; + + case RegSize_16: + { + if (value.HasConstantValue(0)) + m_emit.xor_(GetHostReg16(to_reg), GetHostReg16(to_reg)); + else if (value.IsConstant()) + m_emit.mov(GetHostReg16(to_reg), value.constant_value); + else + m_emit.mov(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); + } + break; + + case RegSize_32: + { + if (value.HasConstantValue(0)) + m_emit.xor_(GetHostReg32(to_reg), GetHostReg32(to_reg)); + else if (value.IsConstant()) + m_emit.mov(GetHostReg32(to_reg), value.constant_value); + else + m_emit.mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); + } + break; + + case RegSize_64: + { + if (value.HasConstantValue(0)) + m_emit.xor_(GetHostReg64(to_reg), GetHostReg64(to_reg)); + else if (value.IsConstant()) + m_emit.mov(GetHostReg64(to_reg), value.constant_value); + else + m_emit.mov(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); + } + break; + } +} + +void CodeGenerator::EmitAdd(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.add(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); + else + m_emit.add(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.add(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); + else + m_emit.add(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.add(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit.add(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit.mov(GetHostReg64(temp.host_reg), value.constant_value); + m_emit.add(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); + } + else + { + m_emit.add(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit.add(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); + } + } + break; + } +} + +void CodeGenerator::EmitSub(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.sub(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); + else + m_emit.sub(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.sub(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); + else + m_emit.sub(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.sub(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit.sub(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit.mov(GetHostReg64(temp.host_reg), value.constant_value); + m_emit.sub(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); + } + else + { + m_emit.sub(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit.sub(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); + } + } + break; + } +} + +void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.cmp(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value))); + else + m_emit.cmp(GetHostReg8(to_reg), GetHostReg8(value.host_reg)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.cmp(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value))); + else + m_emit.cmp(GetHostReg16(to_reg), GetHostReg16(value.host_reg)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.cmp(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit.cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit.mov(GetHostReg64(temp.host_reg), value.constant_value); + m_emit.cmp(GetHostReg64(to_reg), GetHostReg64(temp.host_reg)); + } + else + { + m_emit.cmp(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit.cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); + } + } + break; + } +} + +void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) +{ + switch (size) + { + case RegSize_8: + m_emit.inc(GetHostReg8(to_reg)); + break; + case RegSize_16: + m_emit.inc(GetHostReg16(to_reg)); + break; + case RegSize_32: + m_emit.inc(GetHostReg32(to_reg)); + break; + default: + UnreachableCode(); + break; + } +} + +void CodeGenerator::EmitDec(HostReg to_reg, RegSize size) +{ + switch (size) + { + case RegSize_8: + m_emit.dec(GetHostReg8(to_reg)); + break; + case RegSize_16: + m_emit.dec(GetHostReg16(to_reg)); + break; + case RegSize_32: + m_emit.dec(GetHostReg32(to_reg)); + break; + default: + UnreachableCode(); + break; + } +} + +void CodeGenerator::EmitShl(HostReg to_reg, RegSize size, const Value& amount_value) +{ + DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister()); + + // We have to use CL for the shift amount :( + const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) && + (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX)); + if (save_cl) + m_emit.push(m_emit.rcx); + + if (!amount_value.IsConstant()) + m_emit.mov(m_emit.cl, GetHostReg8(amount_value.host_reg)); + + switch (size) + { + case RegSize_8: + { + if (amount_value.IsConstant()) + m_emit.shl(GetHostReg8(to_reg), Truncate8(amount_value.constant_value)); + else + m_emit.shl(GetHostReg8(to_reg), m_emit.cl); + } + break; + + case RegSize_16: + { + if (amount_value.IsConstant()) + m_emit.shl(GetHostReg16(to_reg), Truncate8(amount_value.constant_value)); + else + m_emit.shl(GetHostReg16(to_reg), m_emit.cl); + } + break; + + case RegSize_32: + { + if (amount_value.IsConstant()) + m_emit.shl(GetHostReg32(to_reg), Truncate32(amount_value.constant_value)); + else + m_emit.shl(GetHostReg32(to_reg), m_emit.cl); + } + break; + + case RegSize_64: + { + if (amount_value.IsConstant()) + m_emit.shl(GetHostReg64(to_reg), Truncate32(amount_value.constant_value)); + else + m_emit.shl(GetHostReg64(to_reg), m_emit.cl); + } + break; + } + + if (save_cl) + m_emit.pop(m_emit.rcx); +} + +void CodeGenerator::EmitShr(HostReg to_reg, RegSize size, const Value& amount_value) +{ + DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister()); + + // We have to use CL for the shift amount :( + const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) && + (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX)); + if (save_cl) + m_emit.push(m_emit.rcx); + + if (!amount_value.IsConstant()) + m_emit.mov(m_emit.cl, GetHostReg8(amount_value.host_reg)); + + switch (size) + { + case RegSize_8: + { + if (amount_value.IsConstant()) + m_emit.shr(GetHostReg8(to_reg), Truncate8(amount_value.constant_value)); + else + m_emit.shr(GetHostReg8(to_reg), m_emit.cl); + } + break; + + case RegSize_16: + { + if (amount_value.IsConstant()) + m_emit.shr(GetHostReg16(to_reg), Truncate8(amount_value.constant_value)); + else + m_emit.shr(GetHostReg16(to_reg), m_emit.cl); + } + break; + + case RegSize_32: + { + if (amount_value.IsConstant()) + m_emit.shr(GetHostReg32(to_reg), Truncate32(amount_value.constant_value)); + else + m_emit.shr(GetHostReg32(to_reg), m_emit.cl); + } + break; + + case RegSize_64: + { + if (amount_value.IsConstant()) + m_emit.shr(GetHostReg64(to_reg), Truncate32(amount_value.constant_value)); + else + m_emit.shr(GetHostReg64(to_reg), m_emit.cl); + } + break; + } + + if (save_cl) + m_emit.pop(m_emit.rcx); +} + +void CodeGenerator::EmitSar(HostReg to_reg, RegSize size, const Value& amount_value) +{ + DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister()); + + // We have to use CL for the shift amount :( + const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) && + (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX)); + if (save_cl) + m_emit.push(m_emit.rcx); + + if (!amount_value.IsConstant()) + m_emit.mov(m_emit.cl, GetHostReg8(amount_value.host_reg)); + + switch (size) + { + case RegSize_8: + { + if (amount_value.IsConstant()) + m_emit.sar(GetHostReg8(to_reg), Truncate8(amount_value.constant_value)); + else + m_emit.sar(GetHostReg8(to_reg), m_emit.cl); + } + break; + + case RegSize_16: + { + if (amount_value.IsConstant()) + m_emit.sar(GetHostReg16(to_reg), Truncate8(amount_value.constant_value)); + else + m_emit.sar(GetHostReg16(to_reg), m_emit.cl); + } + break; + + case RegSize_32: + { + if (amount_value.IsConstant()) + m_emit.sar(GetHostReg32(to_reg), Truncate32(amount_value.constant_value)); + else + m_emit.sar(GetHostReg32(to_reg), m_emit.cl); + } + break; + + case RegSize_64: + { + if (amount_value.IsConstant()) + m_emit.sar(GetHostReg64(to_reg), Truncate32(amount_value.constant_value)); + else + m_emit.sar(GetHostReg64(to_reg), m_emit.cl); + } + break; + } + + if (save_cl) + m_emit.pop(m_emit.rcx); +} + +void CodeGenerator::EmitAnd(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.and_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); + else + m_emit.and_(GetHostReg8(to_reg), GetHostReg8(value)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.and_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); + else + m_emit.and_(GetHostReg16(to_reg), GetHostReg16(value)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.and_(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit.and_(GetHostReg32(to_reg), GetHostReg32(value)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit.mov(GetHostReg64(temp), value.constant_value); + m_emit.and_(GetHostReg64(to_reg), GetHostReg64(temp)); + } + else + { + m_emit.and_(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit.and_(GetHostReg64(to_reg), GetHostReg64(value)); + } + } + break; + } +} + +void CodeGenerator::EmitOr(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.or_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); + else + m_emit.or_(GetHostReg8(to_reg), GetHostReg8(value)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.or_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); + else + m_emit.or_(GetHostReg16(to_reg), GetHostReg16(value)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.or_(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit.or_(GetHostReg32(to_reg), GetHostReg32(value)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit.mov(GetHostReg64(temp), value.constant_value); + m_emit.or_(GetHostReg64(to_reg), GetHostReg64(temp)); + } + else + { + m_emit.or_(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit.or_(GetHostReg64(to_reg), GetHostReg64(value)); + } + } + break; + } +} + +void CodeGenerator::EmitXor(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.xor_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); + else + m_emit.xor_(GetHostReg8(to_reg), GetHostReg8(value)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.xor_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); + else + m_emit.xor_(GetHostReg16(to_reg), GetHostReg16(value)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.xor_(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit.xor_(GetHostReg32(to_reg), GetHostReg32(value)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit.mov(GetHostReg64(temp), value.constant_value); + m_emit.xor_(GetHostReg64(to_reg), GetHostReg64(temp)); + } + else + { + m_emit.xor_(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit.xor_(GetHostReg64(to_reg), GetHostReg64(value)); + } + } + break; + } +} + +void CodeGenerator::EmitTest(HostReg to_reg, const Value& value) +{ + DebugAssert(value.IsConstant() || value.IsInHostRegister()); + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.test(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF))); + else + m_emit.test(GetHostReg8(to_reg), GetHostReg8(value)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.test(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF))); + else + m_emit.test(GetHostReg16(to_reg), GetHostReg16(value)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.test(GetHostReg32(to_reg), Truncate32(value.constant_value)); + else + m_emit.test(GetHostReg32(to_reg), GetHostReg32(value)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + m_emit.mov(GetHostReg64(temp), value.constant_value); + m_emit.test(GetHostReg64(to_reg), GetHostReg64(temp)); + } + else + { + m_emit.test(GetHostReg64(to_reg), Truncate32(value.constant_value)); + } + } + else + { + m_emit.test(GetHostReg64(to_reg), GetHostReg64(value)); + } + } + break; + } +} + +void CodeGenerator::EmitNot(HostReg to_reg, RegSize size) +{ + switch (size) + { + case RegSize_8: + m_emit.not_(GetHostReg8(to_reg)); + break; + + case RegSize_16: + m_emit.not_(GetHostReg16(to_reg)); + break; + + case RegSize_32: + m_emit.not_(GetHostReg32(to_reg)); + break; + + case RegSize_64: + m_emit.not_(GetHostReg64(to_reg)); + break; + + default: + break; + } +} + +u32 CodeGenerator::PrepareStackForCall() +{ + // we assume that the stack is unaligned at this point + const u32 num_callee_saved = m_register_cache.GetActiveCalleeSavedRegisterCount(); + const u32 num_caller_saved = m_register_cache.PushCallerSavedRegisters(); + const u32 current_offset = 8 + (num_callee_saved + num_caller_saved) * 8; + const u32 aligned_offset = Common::AlignUp(current_offset + FUNCTION_CALL_SHADOW_SPACE, 16); + const u32 adjust_size = aligned_offset - current_offset; + if (adjust_size > 0) + m_emit.sub(m_emit.rsp, adjust_size); + + return adjust_size; +} + +void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) +{ + if (adjust_size > 0) + m_emit.add(m_emit.rsp, adjust_size); + + m_register_cache.PopCallerSavedRegisters(); +} + +void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr) +{ + if (return_value) + return_value->Discard(); + + // shadow space allocate + const u32 adjust_size = PrepareStackForCall(); + + // actually call the function + m_emit.mov(GetHostReg64(RRETURN), reinterpret_cast(ptr)); + m_emit.call(GetHostReg64(RRETURN)); + + // shadow space release + RestoreStackAfterCall(adjust_size); + + // copy out return value if requested + if (return_value) + { + return_value->Undiscard(); + EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); + } +} + +void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1) +{ + if (return_value) + return_value->Discard(); + + // shadow space allocate + const u32 adjust_size = PrepareStackForCall(); + + // push arguments + EmitCopyValue(RARG1, arg1); + + // actually call the function + if (Xbyak::inner::IsInInt32(reinterpret_cast(ptr) - reinterpret_cast(m_emit.getCurr()))) + { + m_emit.call(ptr); + } + else + { + m_emit.mov(GetHostReg64(RRETURN), reinterpret_cast(ptr)); + m_emit.call(GetHostReg64(RRETURN)); + } + + // shadow space release + RestoreStackAfterCall(adjust_size); + + // copy out return value if requested + if (return_value) + { + return_value->Undiscard(); + EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); + } +} + +void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2) +{ + if (return_value) + return_value->Discard(); + + // shadow space allocate + const u32 adjust_size = PrepareStackForCall(); + + // push arguments + EmitCopyValue(RARG1, arg1); + EmitCopyValue(RARG2, arg2); + + // actually call the function + if (Xbyak::inner::IsInInt32(reinterpret_cast(ptr) - reinterpret_cast(m_emit.getCurr()))) + { + m_emit.call(ptr); + } + else + { + m_emit.mov(GetHostReg64(RRETURN), reinterpret_cast(ptr)); + m_emit.call(GetHostReg64(RRETURN)); + } + + // shadow space release + RestoreStackAfterCall(adjust_size); + + // copy out return value if requested + if (return_value) + { + return_value->Undiscard(); + EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); + } +} + +void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, + const Value& arg3) +{ + if (return_value) + m_register_cache.DiscardHostReg(return_value->GetHostRegister()); + + // shadow space allocate + const u32 adjust_size = PrepareStackForCall(); + + // push arguments + EmitCopyValue(RARG1, arg1); + EmitCopyValue(RARG2, arg2); + EmitCopyValue(RARG3, arg3); + + // actually call the function + if (Xbyak::inner::IsInInt32(reinterpret_cast(ptr) - reinterpret_cast(m_emit.getCurr()))) + { + m_emit.call(ptr); + } + else + { + m_emit.mov(GetHostReg64(RRETURN), reinterpret_cast(ptr)); + m_emit.call(GetHostReg64(RRETURN)); + } + + // shadow space release + RestoreStackAfterCall(adjust_size); + + // copy out return value if requested + if (return_value) + { + return_value->Undiscard(); + EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); + } +} + +void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, + const Value& arg3, const Value& arg4) +{ + if (return_value) + return_value->Discard(); + + // shadow space allocate + const u32 adjust_size = PrepareStackForCall(); + + // push arguments + EmitCopyValue(RARG1, arg1); + EmitCopyValue(RARG2, arg2); + EmitCopyValue(RARG3, arg3); + EmitCopyValue(RARG4, arg4); + + // actually call the function + if (Xbyak::inner::IsInInt32(reinterpret_cast(ptr) - reinterpret_cast(m_emit.getCurr()))) + { + m_emit.call(ptr); + } + else + { + m_emit.mov(GetHostReg64(RRETURN), reinterpret_cast(ptr)); + m_emit.call(GetHostReg64(RRETURN)); + } + + // shadow space release + RestoreStackAfterCall(adjust_size); + + // copy out return value if requested + if (return_value) + { + return_value->Undiscard(); + EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); + } +} + +void CodeGenerator::EmitPushHostReg(HostReg reg) +{ + m_emit.push(GetHostReg64(reg)); +} + +void CodeGenerator::EmitPopHostReg(HostReg reg) +{ + m_emit.pop(GetHostReg64(reg)); +} + +void CodeGenerator::ReadFlagsFromHost(Value* value) +{ + // this is a 64-bit push/pop, we ignore the upper 32 bits + DebugAssert(value->IsInHostRegister()); + m_emit.pushf(); + m_emit.pop(GetHostReg64(value->host_reg)); +} + +Value CodeGenerator::ReadFlagsFromHost() +{ + Value temp = m_register_cache.AllocateScratch(RegSize_32); + ReadFlagsFromHost(&temp); + return temp; +} + +void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset) +{ + switch (guest_size) + { + case RegSize_8: + m_emit.mov(GetHostReg8(host_reg), m_emit.byte[GetCPUPtrReg() + offset]); + break; + + case RegSize_16: + m_emit.mov(GetHostReg16(host_reg), m_emit.word[GetCPUPtrReg() + offset]); + break; + + case RegSize_32: + m_emit.mov(GetHostReg32(host_reg), m_emit.dword[GetCPUPtrReg() + offset]); + break; + + case RegSize_64: + m_emit.mov(GetHostReg64(host_reg), m_emit.qword[GetCPUPtrReg() + offset]); + break; + + default: + { + UnreachableCode(); + } + break; + } +} + +void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value) +{ + DebugAssert(value.IsInHostRegister() || value.IsConstant()); + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant()) + m_emit.mov(m_emit.byte[GetCPUPtrReg() + offset], value.constant_value); + else + m_emit.mov(m_emit.byte[GetCPUPtrReg() + offset], GetHostReg8(value.host_reg)); + } + break; + + case RegSize_16: + { + if (value.IsConstant()) + m_emit.mov(m_emit.word[GetCPUPtrReg() + offset], value.constant_value); + else + m_emit.mov(m_emit.word[GetCPUPtrReg() + offset], GetHostReg16(value.host_reg)); + } + break; + + case RegSize_32: + { + if (value.IsConstant()) + m_emit.mov(m_emit.dword[GetCPUPtrReg() + offset], value.constant_value); + else + m_emit.mov(m_emit.dword[GetCPUPtrReg() + offset], GetHostReg32(value.host_reg)); + } + break; + + case RegSize_64: + { + if (value.IsConstant()) + { + // we need a temporary to load the value if it doesn't fit in 32-bits + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + EmitCopyValue(temp.host_reg, value); + m_emit.mov(m_emit.qword[GetCPUPtrReg() + offset], GetHostReg64(temp.host_reg)); + } + else + { + m_emit.mov(m_emit.qword[GetCPUPtrReg() + offset], value.constant_value); + } + } + else + { + m_emit.mov(m_emit.qword[GetCPUPtrReg() + offset], GetHostReg64(value.host_reg)); + } + } + break; + + default: + { + UnreachableCode(); + } + break; + } +} + +void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) +{ + DebugAssert(value.IsInHostRegister() || value.IsConstant()); + switch (value.size) + { + case RegSize_8: + { + if (value.IsConstant() && value.constant_value == 1) + m_emit.inc(m_emit.byte[GetCPUPtrReg() + offset]); + else if (value.IsConstant()) + m_emit.add(m_emit.byte[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); + else + m_emit.add(m_emit.byte[GetCPUPtrReg() + offset], GetHostReg8(value.host_reg)); + } + break; + + case RegSize_16: + { + if (value.IsConstant() && value.constant_value == 1) + m_emit.inc(m_emit.word[GetCPUPtrReg() + offset]); + else if (value.IsConstant()) + m_emit.add(m_emit.word[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); + else + m_emit.add(m_emit.word[GetCPUPtrReg() + offset], GetHostReg16(value.host_reg)); + } + break; + + case RegSize_32: + { + if (value.IsConstant() && value.constant_value == 1) + m_emit.inc(m_emit.dword[GetCPUPtrReg() + offset]); + else if (value.IsConstant()) + m_emit.add(m_emit.dword[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); + else + m_emit.add(m_emit.dword[GetCPUPtrReg() + offset], GetHostReg32(value.host_reg)); + } + break; + + case RegSize_64: + { + if (value.IsConstant() && value.constant_value == 1) + { + m_emit.inc(m_emit.qword[GetCPUPtrReg() + offset]); + } + else if (value.IsConstant()) + { + // we need a temporary to load the value if it doesn't fit in 32-bits + if (!Xbyak::inner::IsInInt32(value.constant_value)) + { + Value temp = m_register_cache.AllocateScratch(RegSize_64); + EmitCopyValue(temp.host_reg, value); + m_emit.add(m_emit.qword[GetCPUPtrReg() + offset], GetHostReg64(temp.host_reg)); + } + else + { + m_emit.add(m_emit.qword[GetCPUPtrReg() + offset], Truncate32(value.constant_value)); + } + } + else + { + m_emit.add(m_emit.qword[GetCPUPtrReg() + offset], GetHostReg64(value.host_reg)); + } + } + break; + + default: + { + UnreachableCode(); + } + break; + } +} + +#if 0 +class ThunkGenerator +{ +public: + template + static DataType (*CompileMemoryReadFunction(JitCodeBuffer* code_buffer))(u8, u32) + { + using FunctionType = DataType (*)(u8, u32); + const auto rret = GetHostReg64(RRETURN); + const auto rcpuptr = GetHostReg64(RCPUPTR); + const auto rarg1 = GetHostReg32(RARG1); + const auto rarg2 = GetHostReg32(RARG2); + const auto rarg3 = GetHostReg32(RARG3); + const auto scratch = GetHostReg64(RARG3); + + Xbyak::CodeGenerator emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()); + + // ensure function starts at aligned 16 bytes + emitter.align(); + FunctionType ret = emitter.getCurr(); + + // TODO: We can skip these if the base address is zero and the size is 4GB. + Xbyak::Label raise_gpf_label; + + static_assert(sizeof(CPU::SegmentCache) == 16); + emitter.movzx(rarg1, rarg1.cvt8()); + emitter.shl(rarg1, 4); + emitter.lea(rret, emitter.byte[rcpuptr + rarg1.cvt64() + offsetof(CPU, m_segment_cache[0])]); + + // if segcache->access_mask & Read == 0 + emitter.test(emitter.byte[rret + offsetof(CPU::SegmentCache, access_mask)], static_cast(AccessTypeMask::Read)); + emitter.jz(raise_gpf_label); + + // if offset < limit_low + emitter.cmp(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, limit_low)]); + emitter.jb(raise_gpf_label); + + // if offset + (size - 1) > limit_high + // offset += segcache->base_address + if constexpr (sizeof(DataType) > 1) + { + emitter.lea(scratch, emitter.qword[rarg2.cvt64() + (sizeof(DataType) - 1)]); + emitter.add(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, base_address)]); + emitter.mov(rret.cvt32(), emitter.dword[rret + offsetof(CPU::SegmentCache, limit_high)]); + emitter.cmp(scratch, rret); + emitter.ja(raise_gpf_label); + } + else + { + emitter.cmp(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, limit_high)]); + emitter.ja(raise_gpf_label); + emitter.add(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, base_address)]); + } + + // swap segment with CPU + emitter.mov(rarg1, rcpuptr); + + // go ahead with the memory read + if constexpr (std::is_same_v) + { + emitter.mov(rret, reinterpret_cast(static_cast(&CPU::ReadMemoryByte))); + } + else if constexpr (std::is_same_v) + { + emitter.mov(rret, + reinterpret_cast(static_cast(&CPU::ReadMemoryWord))); + } + else + { + emitter.mov(rret, + reinterpret_cast(static_cast(&CPU::ReadMemoryDWord))); + } + + emitter.jmp(rret); + + // RAISE GPF BRANCH + emitter.L(raise_gpf_label); + + // register swap since the CPU has to come first + emitter.cmp(rarg1, (Segment_SS << 4)); + emitter.mov(rarg1, Interrupt_StackFault); + emitter.mov(rarg2, Interrupt_GeneralProtectionFault); + emitter.cmove(rarg2, rarg1); + emitter.xor_(rarg3, rarg3); + emitter.mov(rarg1, rcpuptr); + + // cpu->RaiseException(ss ? Interrupt_StackFault : Interrupt_GeneralProtectionFault, 0) + emitter.mov(rret, reinterpret_cast(static_cast(&CPU::RaiseException))); + emitter.jmp(rret); + + emitter.ready(); + code_buffer->CommitCode(emitter.getSize()); + return ret; + } + + template + static void (*CompileMemoryWriteFunction(JitCodeBuffer* code_buffer))(u8, u32, DataType) + { + using FunctionType = void (*)(u8, u32, DataType); + const auto rret = GetHostReg64(RRETURN); + const auto rcpuptr = GetHostReg64(RCPUPTR); + const auto rarg1 = GetHostReg32(RARG1); + const auto rarg2 = GetHostReg32(RARG2); + const auto rarg3 = GetHostReg32(RARG3); + const auto scratch = GetHostReg64(RARG4); + + Xbyak::CodeGenerator emitter(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer()); + + // ensure function starts at aligned 16 bytes + emitter.align(); + FunctionType ret = emitter.getCurr(); + + // TODO: We can skip these if the base address is zero and the size is 4GB. + Xbyak::Label raise_gpf_label; + + static_assert(sizeof(CPU::SegmentCache) == 16); + emitter.movzx(rarg1, rarg1.cvt8()); + emitter.shl(rarg1, 4); + emitter.lea(rret, emitter.byte[rcpuptr + rarg1.cvt64() + offsetof(CPU, m_segment_cache[0])]); + + // if segcache->access_mask & Read == 0 + emitter.test(emitter.byte[rret + offsetof(CPU::SegmentCache, access_mask)], + static_cast(AccessTypeMask::Write)); + emitter.jz(raise_gpf_label); + + // if offset < limit_low + emitter.cmp(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, limit_low)]); + emitter.jb(raise_gpf_label); + + // if offset + (size - 1) > limit_high + // offset += segcache->base_address + if constexpr (sizeof(DataType) > 1) + { + emitter.lea(scratch, emitter.qword[rarg2.cvt64() + (sizeof(DataType) - 1)]); + emitter.add(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, base_address)]); + emitter.mov(rret.cvt32(), emitter.dword[rret + offsetof(CPU::SegmentCache, limit_high)]); + emitter.cmp(scratch, rret.cvt64()); + emitter.ja(raise_gpf_label); + } + else + { + emitter.cmp(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, limit_high)]); + emitter.ja(raise_gpf_label); + emitter.add(rarg2, emitter.dword[rret + offsetof(CPU::SegmentCache, base_address)]); + } + + // swap segment with CPU + emitter.mov(rarg1, rcpuptr); + + // go ahead with the memory read + if constexpr (std::is_same_v) + { + emitter.mov( + rret, reinterpret_cast(static_cast(&CPU::WriteMemoryByte))); + } + else if constexpr (std::is_same_v) + { + emitter.mov( + rret, reinterpret_cast(static_cast(&CPU::WriteMemoryWord))); + } + else + { + emitter.mov( + rret, reinterpret_cast(static_cast(&CPU::WriteMemoryDWord))); + } + + emitter.jmp(rret); + + // RAISE GPF BRANCH + emitter.L(raise_gpf_label); + + // register swap since the CPU has to come first + emitter.cmp(rarg1, (Segment_SS << 4)); + emitter.mov(rarg1, Interrupt_StackFault); + emitter.mov(rarg2, Interrupt_GeneralProtectionFault); + emitter.cmove(rarg2, rarg1); + emitter.xor_(rarg3, rarg3); + emitter.mov(rarg1, rcpuptr); + + // cpu->RaiseException(ss ? Interrupt_StackFault : Interrupt_GeneralProtectionFault, 0) + emitter.mov(rret, reinterpret_cast(static_cast(&CPU::RaiseException))); + emitter.jmp(rret); + + emitter.ready(); + code_buffer->CommitCode(emitter.getSize()); + return ret; + } +}; + +#endif + +void ASMFunctions::Generate(JitCodeBuffer* code_buffer) +{ +#if 0 + read_memory_byte = ThunkGenerator::CompileMemoryReadFunction(code_buffer); + read_memory_word = ThunkGenerator::CompileMemoryReadFunction(code_buffer); + read_memory_dword = ThunkGenerator::CompileMemoryReadFunction(code_buffer); + write_memory_byte = ThunkGenerator::CompileMemoryWriteFunction(code_buffer); + write_memory_word = ThunkGenerator::CompileMemoryWriteFunction(code_buffer); + write_memory_dword = ThunkGenerator::CompileMemoryWriteFunction(code_buffer); +#endif +} + +} // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_register_cache.cpp b/src/core/cpu_recompiler_register_cache.cpp new file mode 100644 index 000000000..3d8ae9f0f --- /dev/null +++ b/src/core/cpu_recompiler_register_cache.cpp @@ -0,0 +1,604 @@ +#include "cpu_recompiler_register_cache.h" +#include "YBaseLib/Log.h" +#include "cpu_recompiler_code_generator.h" +#include +Log_SetChannel(CPU::Recompiler); + +namespace CPU::Recompiler { + +Value::Value() = default; + +Value::Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_) + : regcache(regcache_), constant_value(constant_), size(size_), flags(flags_) +{ +} + +Value::Value(const Value& other) + : regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size), + flags(other.flags) +{ + AssertMsg(!other.IsScratch(), "Can't copy a temporary register"); +} + +Value::Value(Value&& other) + : regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size), + flags(other.flags) +{ + other.Clear(); +} + +Value::Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_) + : regcache(regcache_), host_reg(reg_), size(size_), flags(flags_) +{ +} + +Value::~Value() +{ + Release(); +} + +Value& Value::operator=(const Value& other) +{ + AssertMsg(!other.IsScratch(), "Can't copy a temporary register"); + + Release(); + regcache = other.regcache; + constant_value = other.constant_value; + host_reg = other.host_reg; + size = other.size; + flags = other.flags; + + return *this; +} + +Value& Value::operator=(Value&& other) +{ + Release(); + regcache = other.regcache; + constant_value = other.constant_value; + host_reg = other.host_reg; + size = other.size; + flags = other.flags; + other.Clear(); + return *this; +} + +void Value::Clear() +{ + regcache = nullptr; + constant_value = 0; + host_reg = {}; + size = RegSize_8; + flags = ValueFlags::None; +} + +void Value::Release() +{ + if (IsScratch()) + { + DebugAssert(IsInHostRegister() && regcache); + regcache->FreeHostReg(host_reg); + } +} + +void Value::ReleaseAndClear() +{ + Release(); + Clear(); +} + +void Value::Discard() +{ + DebugAssert(IsInHostRegister()); + regcache->DiscardHostReg(host_reg); +} + +void Value::Undiscard() +{ + DebugAssert(IsInHostRegister()); + regcache->UndiscardHostReg(host_reg); +} + +RegisterCache::RegisterCache(CodeGenerator& code_generator) : m_code_generator(code_generator) +{ + m_guest_register_order.fill(Reg::count); +} + +RegisterCache::~RegisterCache() = default; + +void RegisterCache::SetHostRegAllocationOrder(std::initializer_list regs) +{ + size_t index = 0; + for (HostReg reg : regs) + { + m_host_register_state[reg] = HostRegState::Usable; + m_host_register_allocation_order[index++] = reg; + } + m_host_register_available_count = static_cast(index); +} + +void RegisterCache::SetCallerSavedHostRegs(std::initializer_list regs) +{ + for (HostReg reg : regs) + m_host_register_state[reg] |= HostRegState::CallerSaved; +} + +void RegisterCache::SetCalleeSavedHostRegs(std::initializer_list regs) +{ + for (HostReg reg : regs) + m_host_register_state[reg] |= HostRegState::CalleeSaved; +} + +void RegisterCache::SetCPUPtrHostReg(HostReg reg) +{ + m_cpu_ptr_host_register = reg; +} + +bool RegisterCache::IsUsableHostReg(HostReg reg) const +{ + return (m_host_register_state[reg] & HostRegState::Usable) != HostRegState::None; +} + +bool RegisterCache::IsHostRegInUse(HostReg reg) const +{ + return (m_host_register_state[reg] & HostRegState::InUse) != HostRegState::None; +} + +bool RegisterCache::HasFreeHostRegister() const +{ + for (const HostRegState state : m_host_register_state) + { + if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) + return true; + } + + return false; +} + +u32 RegisterCache::GetUsedHostRegisters() const +{ + u32 count = 0; + for (const HostRegState state : m_host_register_state) + { + if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable | HostRegState::InUse)) + count++; + } + + return count; +} + +u32 RegisterCache::GetFreeHostRegisters() const +{ + u32 count = 0; + for (const HostRegState state : m_host_register_state) + { + if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable)) + count++; + } + + return count; +} + +HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InUse */) +{ + // try for a free register in allocation order + for (u32 i = 0; i < m_host_register_available_count; i++) + { + const HostReg reg = m_host_register_allocation_order[i]; + if ((m_host_register_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable) + { + if (AllocateHostReg(reg, state)) + return reg; + } + } + + // evict one of the cached guest registers + if (!EvictOneGuestRegister()) + Panic("Failed to evict guest register for new allocation"); + + return AllocateHostReg(state); +} + +bool RegisterCache::AllocateHostReg(HostReg reg, HostRegState state /*= HostRegState::InUse*/) +{ + if ((m_host_register_state[reg] & HostRegState::InUse) == HostRegState::InUse) + return false; + + m_host_register_state[reg] |= state; + + if ((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == + HostRegState::CalleeSaved) + { + // new register we need to save.. + DebugAssert(m_host_register_callee_saved_order_count < HostReg_Count); + m_host_register_callee_saved_order[m_host_register_callee_saved_order_count++] = reg; + m_host_register_state[reg] |= HostRegState::CalleeSavedAllocated; + m_code_generator.EmitPushHostReg(reg); + } + + return reg; +} + +void RegisterCache::DiscardHostReg(HostReg reg) +{ + DebugAssert(IsHostRegInUse(reg)); + Log_DebugPrintf("Discarding host register %s", m_code_generator.GetHostRegName(reg)); + m_host_register_state[reg] |= HostRegState::Discarded; +} + +void RegisterCache::UndiscardHostReg(HostReg reg) +{ + DebugAssert(IsHostRegInUse(reg)); + Log_DebugPrintf("Undiscarding host register %s", m_code_generator.GetHostRegName(reg)); + m_host_register_state[reg] &= ~HostRegState::Discarded; +} + +void RegisterCache::FreeHostReg(HostReg reg) +{ + DebugAssert(IsHostRegInUse(reg)); + Log_DebugPrintf("Freeing host register %s", m_code_generator.GetHostRegName(reg)); + m_host_register_state[reg] &= ~HostRegState::InUse; +} + +void RegisterCache::EnsureHostRegFree(HostReg reg) +{ + if (!IsHostRegInUse(reg)) + return; + + for (u8 i = 0; i < static_cast(Reg::count); i++) + { + if (m_guest_reg_cache[i].IsInHostRegister() && m_guest_reg_cache[i].GetHostRegister() == reg) + FlushGuestRegister(m_guest_reg_cache[i], static_cast(i), true, true); + } +} + +Value RegisterCache::GetCPUPtr() +{ + return Value::FromHostReg(this, m_cpu_ptr_host_register, HostPointerSize); +} + +Value RegisterCache::AllocateScratch(RegSize size, HostReg reg /* = HostReg_Invalid */) +{ + if (reg == HostReg_Invalid) + { + reg = AllocateHostReg(); + } + else + { + Assert(!IsHostRegInUse(reg)); + if (!AllocateHostReg(reg)) + Panic("Failed to allocate specific host register"); + } + + Log_DebugPrintf("Allocating host register %s as scratch", m_code_generator.GetHostRegName(reg)); + return Value::FromScratch(this, reg, size); +} + +u32 RegisterCache::PushCallerSavedRegisters() const +{ + u32 count = 0; + for (u32 i = 0; i < HostReg_Count; i++) + { + if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == + (HostRegState::CallerSaved | HostRegState::InUse)) + { + m_code_generator.EmitPushHostReg(static_cast(i)); + count++; + } + } + + return count; +} + +u32 RegisterCache::PopCallerSavedRegisters() const +{ + u32 count = 0; + u32 i = (HostReg_Count - 1); + do + { + if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) == + (HostRegState::CallerSaved | HostRegState::InUse)) + { + m_code_generator.EmitPopHostReg(static_cast(i)); + count++; + } + i--; + } while (i > 0); + return count; +} + +u32 RegisterCache::PopCalleeSavedRegisters() +{ + if (m_host_register_callee_saved_order_count == 0) + return 0; + + u32 count = 0; + u32 i = m_host_register_callee_saved_order_count; + do + { + const HostReg reg = m_host_register_callee_saved_order[i - 1]; + DebugAssert((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) == + (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)); + + m_code_generator.EmitPopHostReg(reg); + m_host_register_state[reg] &= ~HostRegState::CalleeSavedAllocated; + count++; + i--; + } while (i > 0); + return count; +} + +Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */, + HostReg forced_host_reg /* = HostReg_Invalid */) +{ + return ReadGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg, cache, force_host_register, + forced_host_reg); +} + +Value RegisterCache::ReadGuestRegister(Value& cache_value, Reg guest_reg, bool cache, bool force_host_register, + HostReg forced_host_reg) +{ + // register zero is always zero + if (guest_reg == Reg::zero) + return Value::FromConstantU32(0); + + if (cache_value.IsValid()) + { + if (cache_value.IsInHostRegister()) + { + PushRegisterToOrder(guest_reg); + + // if it's in the wrong register, return it as scratch + if (forced_host_reg == HostReg_Invalid || cache_value.GetHostRegister() == forced_host_reg) + return cache_value; + + Value temp = AllocateScratch(RegSize_32, forced_host_reg); + m_code_generator.EmitCopyValue(forced_host_reg, cache_value); + return temp; + } + else if (force_host_register) + { + // if it's not in a register, it should be constant + DebugAssert(cache_value.IsConstant()); + + HostReg host_reg; + if (forced_host_reg == HostReg_Invalid) + { + host_reg = AllocateHostReg(); + } + else + { + Assert(!IsHostRegInUse(forced_host_reg)); + if (!AllocateHostReg(forced_host_reg)) + Panic("Failed to allocate specific host register"); + host_reg = forced_host_reg; + } + + Log_DebugPrintf("Allocated host register %s for constant guest register %s (0x%" PRIX64 ")", + m_code_generator.GetHostRegName(host_reg), GetRegName(guest_reg), cache_value.constant_value); + + m_code_generator.EmitCopyValue(host_reg, cache_value); + cache_value.AddHostReg(this, host_reg); + AppendRegisterToOrder(guest_reg); + + // if we're forcing a host register, we're probably going to be changing the value, + // in which case the constant won't be correct anyway. so just drop it. + cache_value.ClearConstant(); + return cache_value; + } + else + { + // constant + return cache_value; + } + } + + HostReg host_reg; + if (forced_host_reg == HostReg_Invalid) + { + host_reg = AllocateHostReg(); + } + else + { + Assert(!IsHostRegInUse(forced_host_reg)); + if (!AllocateHostReg(forced_host_reg)) + Panic("Failed to allocate specific host register"); + host_reg = forced_host_reg; + } + + m_code_generator.EmitLoadGuestRegister(host_reg, guest_reg); + + Log_DebugPrintf("Loading guest register %s to host register %s%s", GetRegName(guest_reg), + m_code_generator.GetHostRegName(host_reg, RegSize_32), cache ? " (cached)" : ""); + + if (cache) + { + // Now in cache. + cache_value.SetHostReg(this, host_reg, RegSize_32); + AppendRegisterToOrder(guest_reg); + return cache_value; + } + else + { + // Skip caching, return the register as a value. + return Value::FromScratch(this, host_reg, RegSize_32); + } +} + +Value RegisterCache::WriteGuestRegister(Reg guest_reg, Value&& value) +{ + return WriteGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg, std::move(value)); +} + +Value RegisterCache::WriteGuestRegister(Value& cache_value, Reg guest_reg, Value&& value) +{ + // ignore writes to register zero + if (guest_reg == Reg::zero) + return std::move(value); + + DebugAssert(value.size == RegSize_32); + if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg) + { + // updating the register value. + Log_DebugPrintf("Updating guest register %s (in host register %s)", GetRegName(guest_reg), + m_code_generator.GetHostRegName(value.host_reg, RegSize_32)); + cache_value = std::move(value); + cache_value.SetDirty(); + return cache_value; + } + + InvalidateGuestRegister(cache_value, guest_reg); + DebugAssert(!cache_value.IsValid()); + + if (value.IsConstant()) + { + // No need to allocate a host register, and we can defer the store. + cache_value = value; + cache_value.SetDirty(); + return cache_value; + } + + AppendRegisterToOrder(guest_reg); + + // If it's a temporary, we can bind that to the guest register. + if (value.IsScratch()) + { + Log_DebugPrintf("Binding scratch register %s to guest register %s", + m_code_generator.GetHostRegName(value.host_reg, RegSize_32), GetRegName(guest_reg)); + + cache_value = std::move(value); + cache_value.flags &= ~ValueFlags::Scratch; + cache_value.SetDirty(); + return Value::FromHostReg(this, cache_value.host_reg, RegSize_32); + } + + // Allocate host register, and copy value to it. + HostReg host_reg = AllocateHostReg(); + m_code_generator.EmitCopyValue(host_reg, value); + cache_value.SetHostReg(this, host_reg, RegSize_32); + cache_value.SetDirty(); + + Log_DebugPrintf("Copying non-scratch register %s to %s to guest register %s", + m_code_generator.GetHostRegName(value.host_reg, RegSize_32), + m_code_generator.GetHostRegName(host_reg, RegSize_32), GetRegName(guest_reg)); + + return Value::FromHostReg(this, cache_value.host_reg, RegSize_32); +} + +void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty) +{ + FlushGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg, invalidate, clear_dirty); +} + +void RegisterCache::FlushGuestRegister(Value& cache_value, Reg guest_reg, bool invalidate, bool clear_dirty) +{ + if (cache_value.IsDirty()) + { + if (cache_value.IsInHostRegister()) + { + Log_DebugPrintf("Flushing guest register %s from host register %s", GetRegName(guest_reg), + m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32)); + } + else if (cache_value.IsConstant()) + { + Log_DebugPrintf("Flushing guest register %s from constant 0x%" PRIX64, GetRegName(guest_reg), + cache_value.constant_value); + } + m_code_generator.EmitStoreGuestRegister(guest_reg, cache_value); + if (clear_dirty) + cache_value.ClearDirty(); + } + + if (invalidate) + InvalidateGuestRegister(cache_value, guest_reg); +} + +void RegisterCache::InvalidateGuestRegister(Reg guest_reg) +{ + InvalidateGuestRegister(m_guest_reg_cache[static_cast(guest_reg)], guest_reg); +} + +void RegisterCache::InvalidateGuestRegister(Value& cache_value, Reg guest_reg) +{ + if (!cache_value.IsValid()) + return; + + if (cache_value.IsInHostRegister()) + { + FreeHostReg(cache_value.host_reg); + ClearRegisterFromOrder(guest_reg); + } + + Log_DebugPrintf("Invalidating guest register %s", GetRegName(guest_reg)); + cache_value.Clear(); +} + +void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty) +{ + for (u8 reg = 0; reg < static_cast(Reg::count); reg++) + FlushGuestRegister(static_cast(reg), invalidate, clear_dirty); +} + +bool RegisterCache::EvictOneGuestRegister() +{ + if (m_guest_register_order_count == 0) + return false; + + // evict the register used the longest time ago + Reg evict_reg = m_guest_register_order[m_guest_register_order_count - 1]; + Log_ProfilePrintf("Evicting guest register %s", GetRegName(evict_reg)); + FlushGuestRegister(evict_reg, true, true); + + return HasFreeHostRegister(); +} + +void RegisterCache::ClearRegisterFromOrder(Reg reg) +{ + for (u32 i = 0; i < m_guest_register_order_count; i++) + { + if (m_guest_register_order[i] == reg) + { + // move the registers after backwards into this spot + const u32 count_after = m_guest_register_order_count - i - 1; + if (count_after > 0) + std::memmove(&m_guest_register_order[i], &m_guest_register_order[i + 1], sizeof(Reg) * count_after); + else + m_guest_register_order[i] = Reg::count; + + m_guest_register_order_count--; + return; + } + } + + Panic("Clearing register from order not in order"); +} + +void RegisterCache::PushRegisterToOrder(Reg reg) +{ + for (u32 i = 0; i < m_guest_register_order_count; i++) + { + if (m_guest_register_order[i] == reg) + { + // move the registers after backwards into this spot + const u32 count_before = i; + if (count_before > 0) + std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * count_before); + + m_guest_register_order[0] = reg; + return; + } + } + + Panic("Attempt to push register which is not ordered"); +} + +void RegisterCache::AppendRegisterToOrder(Reg reg) +{ + DebugAssert(m_guest_register_order_count < HostReg_Count); + if (m_guest_register_order_count > 0) + std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * m_guest_register_order_count); + m_guest_register_order[0] = reg; + m_guest_register_order_count++; +} + +} // namespace CPU::Recompiler diff --git a/src/core/cpu_recompiler_register_cache.h b/src/core/cpu_recompiler_register_cache.h new file mode 100644 index 000000000..b01637d25 --- /dev/null +++ b/src/core/cpu_recompiler_register_cache.h @@ -0,0 +1,243 @@ +#pragma once +#include "YBaseLib/Assert.h" +#include "cpu_recompiler_types.h" +#include "cpu_types.h" + +#include +#include +#include + +namespace CPU::Recompiler { + +enum class HostRegState : u8 +{ + None = 0, + Usable = (1 << 1), // Can be allocated + CallerSaved = (1 << 2), // Register is caller-saved, and should be saved/restored after calling a function. + CalleeSaved = (1 << 3), // Register is callee-saved, and should be restored after leaving the block. + InUse = (1 << 4), // In-use, must be saved/restored across function call. + CalleeSavedAllocated = (1 << 5), // Register was callee-saved and allocated, so should be restored before returning. + Discarded = (1 << 6), // Register contents is not used, so do not preserve across function calls. +}; +IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(HostRegState); + +enum class ValueFlags : u8 +{ + None = 0, + Valid = (1 << 0), + Constant = (1 << 1), // The value itself is constant, and not in a register. + InHostRegister = (1 << 2), // The value itself is located in a host register. + Scratch = (1 << 3), // The value is temporary, and will be released after the Value is destroyed. + Dirty = (1 << 4), // For register cache values, the value needs to be written back to the CPU struct. +}; +IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(ValueFlags); + +struct Value +{ + RegisterCache* regcache = nullptr; + u64 constant_value = 0; + HostReg host_reg = {}; + + RegSize size = RegSize_8; + ValueFlags flags = ValueFlags::None; + + Value(); + Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_); + Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_); + Value(const Value& other); + Value(Value&& other); + ~Value(); + + Value& operator=(const Value& other); + Value& operator=(Value&& other); + + bool IsConstant() const { return (flags & ValueFlags::Constant) != ValueFlags::None; } + bool IsValid() const { return (flags & ValueFlags::Valid) != ValueFlags::None; } + bool IsInHostRegister() const { return (flags & ValueFlags::InHostRegister) != ValueFlags::None; } + bool IsScratch() const { return (flags & ValueFlags::Scratch) != ValueFlags::None; } + + /// Returns the host register this value is bound to. + HostReg GetHostRegister() const + { + DebugAssert(IsInHostRegister()); + return host_reg; + } + + /// Returns true if this value is constant and has the specified value. + bool HasConstantValue(u64 cv) const + { + return (((flags & ValueFlags::Constant) != ValueFlags::None) && constant_value == cv); + } + + /// Removes the contents of this value. Use with care, as scratch/temporaries are not released. + void Clear(); + + /// Releases the host register if needed, and clears the contents. + void ReleaseAndClear(); + + /// Flags the value is being discarded. Call Undiscard() to track again. + void Discard(); + void Undiscard(); + + void AddHostReg(RegisterCache* regcache_, HostReg hr) + { + DebugAssert(IsValid()); + regcache = regcache_; + host_reg = hr; + flags |= ValueFlags::InHostRegister; + } + + void SetHostReg(RegisterCache* regcache_, HostReg hr, RegSize size_) + { + regcache = regcache_; + constant_value = 0; + host_reg = hr; + size = size_; + flags = ValueFlags::Valid | ValueFlags::InHostRegister; + } + + void ClearConstant() + { + // By clearing the constant bit, we should already be in a host register. + DebugAssert(IsInHostRegister()); + flags &= ~ValueFlags::Constant; + } + + bool IsDirty() const { return (flags & ValueFlags::Dirty) != ValueFlags::None; } + void SetDirty() { flags |= ValueFlags::Dirty; } + void ClearDirty() { flags &= ~ValueFlags::Dirty; } + + static Value FromHostReg(RegisterCache* regcache, HostReg reg, RegSize size) + { + return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister); + } + static Value FromScratch(RegisterCache* regcache, HostReg reg, RegSize size) + { + return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister | ValueFlags::Scratch); + } + static Value FromConstant(u64 cv, RegSize size) + { + return Value(nullptr, cv, size, ValueFlags::Valid | ValueFlags::Constant); + } + static Value FromConstantU8(u8 value) { return FromConstant(ZeroExtend64(value), RegSize_8); } + static Value FromConstantU16(u16 value) { return FromConstant(ZeroExtend64(value), RegSize_16); } + static Value FromConstantU32(u32 value) { return FromConstant(ZeroExtend64(value), RegSize_32); } + static Value FromConstantU64(u64 value) { return FromConstant(value, RegSize_64); } + +private: + void Release(); +}; + +class RegisterCache +{ +public: + RegisterCache(CodeGenerator& code_generator); + ~RegisterCache(); + + u32 GetActiveCalleeSavedRegisterCount() const { return m_host_register_callee_saved_order_count; } + + ////////////////////////////////////////////////////////////////////////// + // Register Allocation + ////////////////////////////////////////////////////////////////////////// + void SetHostRegAllocationOrder(std::initializer_list regs); + void SetCallerSavedHostRegs(std::initializer_list regs); + void SetCalleeSavedHostRegs(std::initializer_list regs); + void SetCPUPtrHostReg(HostReg reg); + + /// Returns true if the register is permitted to be used in the register cache. + bool IsUsableHostReg(HostReg reg) const; + bool IsHostRegInUse(HostReg reg) const; + bool HasFreeHostRegister() const; + u32 GetUsedHostRegisters() const; + u32 GetFreeHostRegisters() const; + + /// Allocates a new host register. If there are no free registers, the guest register which was accessed the longest + /// time ago will be evicted. + HostReg AllocateHostReg(HostRegState state = HostRegState::InUse); + + /// Allocates a specific host register. If this register is not free, returns false. + bool AllocateHostReg(HostReg reg, HostRegState state = HostRegState::InUse); + + /// Flags the host register as discard-able. This means that the contents is no longer required, and will not be + /// pushed when saving caller-saved registers. + void DiscardHostReg(HostReg reg); + + /// Clears the discard-able flag on a host register, so that the contents will be preserved across function calls. + void UndiscardHostReg(HostReg reg); + + /// Frees a host register, making it usable in future allocations. + void FreeHostReg(HostReg reg); + + /// Ensures a host register is free, removing any value cached. + void EnsureHostRegFree(HostReg reg); + + /// Push/pop volatile host registers. Returns the number of registers pushed/popped. + u32 PushCallerSavedRegisters() const; + u32 PopCallerSavedRegisters() const; + + /// Restore callee-saved registers. Call at the end of the function. + u32 PopCalleeSavedRegisters(); + + ////////////////////////////////////////////////////////////////////////// + // Scratch Register Allocation + ////////////////////////////////////////////////////////////////////////// + Value GetCPUPtr(); + Value AllocateScratch(RegSize size, HostReg reg = HostReg_Invalid); + + ////////////////////////////////////////////////////////////////////////// + // Guest Register Caching + ////////////////////////////////////////////////////////////////////////// + + /// Returns true if the specified guest register is cached. + bool IsGuestRegisterInHostReg(Reg guest_reg) const + { + return m_guest_reg_cache[static_cast(guest_reg)].IsInHostRegister(); + } + + /// Returns the host register if the guest register is cached. + std::optional GetHostRegisterForGuestRegister(Reg guest_reg) const + { + if (!m_guest_reg_cache[static_cast(guest_reg)].IsInHostRegister()) + return std::nullopt; + return m_guest_reg_cache[static_cast(guest_reg)].GetHostRegister(); + } + + Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false, + HostReg forced_host_reg = HostReg_Invalid); + + /// Creates a copy of value, and stores it to guest_reg. + Value WriteGuestRegister(Reg guest_reg, Value&& value); + + void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty); + void InvalidateGuestRegister(Reg guest_reg); + + void FlushAllGuestRegisters(bool invalidate, bool clear_dirty); + bool EvictOneGuestRegister(); + +private: + Value ReadGuestRegister(Value& cache_value, Reg guest_reg, bool cache, bool force_host_register, + HostReg forced_host_reg); + Value WriteGuestRegister(Value& cache_value, Reg guest_reg, Value&& value); + void FlushGuestRegister(Value& cache_value, Reg guest_reg, bool invalidate, bool clear_dirty); + void InvalidateGuestRegister(Value& cache_value, Reg guest_reg); + void ClearRegisterFromOrder(Reg reg); + void PushRegisterToOrder(Reg reg); + void AppendRegisterToOrder(Reg reg); + + CodeGenerator& m_code_generator; + + HostReg m_cpu_ptr_host_register = {}; + std::array m_host_register_state{}; + std::array m_host_register_allocation_order{}; + u32 m_host_register_available_count = 0; + + std::array(Reg::count)> m_guest_reg_cache{}; + + std::array m_guest_register_order{}; + u32 m_guest_register_order_count = 0; + + std::array m_host_register_callee_saved_order{}; + u32 m_host_register_callee_saved_order_count = 0; +}; + +} // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_recompiler_thunks.cpp b/src/core/cpu_recompiler_thunks.cpp new file mode 100644 index 000000000..cfdfa1e61 --- /dev/null +++ b/src/core/cpu_recompiler_thunks.cpp @@ -0,0 +1,43 @@ +#include "cpu_recompiler_thunks.h" + +namespace CPU::Recompiler { + +// TODO: Port thunks to "ASM routines", i.e. code in the jit buffer. + +bool Thunks::ReadMemoryByte(Core* cpu, u32 address, u8* value) +{ + return cpu->ReadMemoryByte(address, value); +} + +bool Thunks::ReadMemoryHalfWord(Core* cpu, u32 address, u16* value) +{ + return cpu->ReadMemoryHalfWord(address, value); +} + +bool Thunks::ReadMemoryWord(Core* cpu, u32 address, u32* value) +{ + return cpu->ReadMemoryWord(address, value); +} + +bool Thunks::WriteMemoryByte(Core* cpu, u32 address, u8 value) +{ + return cpu->WriteMemoryByte(address, value); +} + +bool Thunks::WriteMemoryHalfWord(Core* cpu, u32 address, u16 value) +{ + return cpu->WriteMemoryHalfWord(address, value); +} + +bool Thunks::WriteMemoryWord(Core* cpu, u32 address, u32 value) +{ + return cpu->WriteMemoryWord(address, value); +} + +bool Thunks::InterpretInstruction(Core* cpu) +{ + cpu->ExecuteInstruction(); + return cpu->m_exception_raised; +} + +} // namespace CPU::Recompiler \ No newline at end of file diff --git a/src/core/cpu_recompiler_thunks.h b/src/core/cpu_recompiler_thunks.h new file mode 100644 index 000000000..4104d2cca --- /dev/null +++ b/src/core/cpu_recompiler_thunks.h @@ -0,0 +1,38 @@ +#pragma once +#include "common/jit_code_buffer.h" +#include "cpu_core.h" +#include + +namespace CPU::Recompiler { + +class Thunks +{ +public: + ////////////////////////////////////////////////////////////////////////// + // Trampolines for calling back from the JIT + // Needed because we can't cast member functions to void*... + // TODO: Abuse carry flag or something else for exception + ////////////////////////////////////////////////////////////////////////// + static bool ReadMemoryByte(Core* cpu, u32 address, u8* value); + static bool ReadMemoryHalfWord(Core* cpu, u32 address, u16* value); + static bool ReadMemoryWord(Core* cpu, u32 address, u32* value); + static bool WriteMemoryByte(Core* cpu, u32 address, u8 value); + static bool WriteMemoryHalfWord(Core* cpu, u32 address, u16 value); + static bool WriteMemoryWord(Core* cpu, u32 address, u32 value); + static bool InterpretInstruction(Core* cpu); +}; + +class ASMFunctions +{ +public: + bool (*read_memory_byte)(u32 address, u8* value); + bool (*read_memory_word)(u32 address, u16* value); + bool (*read_memory_dword)(u32 address, u32* value); + void (*write_memory_byte)(u32 address, u8 value); + void (*write_memory_word)(u32 address, u16 value); + void (*write_memory_dword)(u32 address, u32 value); + + void Generate(JitCodeBuffer* code_buffer); +}; + +} // namespace CPU_X86::Recompiler diff --git a/src/core/cpu_recompiler_types.h b/src/core/cpu_recompiler_types.h new file mode 100644 index 000000000..d9a5f6ea3 --- /dev/null +++ b/src/core/cpu_recompiler_types.h @@ -0,0 +1,53 @@ +#pragma once +#include "cpu_types.h" + +#if defined(Y_CPU_X64) +#define XBYAK_NO_OP_NAMES 1 +#include "xbyak.h" +#endif + +namespace CPU { + +class Core; +class CodeCache; + +namespace Recompiler { + +class CodeGenerator; +class RegisterCache; + +enum RegSize : u8 +{ + RegSize_8, + RegSize_16, + RegSize_32, + RegSize_64, +}; + +#if defined(Y_CPU_X64) +using HostReg = Xbyak::Operand::Code; +using CodeEmitter = Xbyak::CodeGenerator; +enum : u32 +{ + HostReg_Count = 16 +}; +constexpr HostReg HostReg_Invalid = static_cast(HostReg_Count); +constexpr RegSize HostPointerSize = RegSize_64; + +// A reasonable "maximum" number of bytes per instruction. +constexpr u32 MAX_HOST_BYTES_PER_INSTRUCTION = 128; + +#else +using HostReg = void; +using CodeEmitter = void; +enum : u32 +{ + HostReg_Count = 0 +}; +constexpr HostReg HostReg_Invalid = static_cast(HostReg_Count); +constexpr OperandSize HostPointerSize = OperandSize_64; +#endif + +} // namespace Recompiler + +} // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_types.cpp b/src/core/cpu_types.cpp new file mode 100644 index 000000000..927f83572 --- /dev/null +++ b/src/core/cpu_types.cpp @@ -0,0 +1,196 @@ +#include "cpu_types.h" +#include "YBaseLib/Assert.h" +#include + +namespace CPU { +static const std::array s_reg_names = { + {"$zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"}}; + +const char* GetRegName(Reg reg) +{ + DebugAssert(reg < Reg::count); + return s_reg_names[static_cast(reg)]; +} + +bool IsBranchInstruction(const Instruction& instruction) +{ + switch (instruction.op) + { + case InstructionOp::j: + case InstructionOp::jal: + case InstructionOp::b: + case InstructionOp::beq: + case InstructionOp::bgtz: + case InstructionOp::blez: + case InstructionOp::bne: + return true; + + case InstructionOp::funct: + { + switch (instruction.r.funct) + { + case InstructionFunct::jr: + case InstructionFunct::jalr: + return true; + + default: + return false; + } + } + + default: + return false; + } +} + +bool IsExitBlockInstruction(const Instruction& instruction) +{ + switch (instruction.op) + { + case InstructionOp::funct: + { + switch (instruction.r.funct) + { + case InstructionFunct::syscall: + case InstructionFunct::break_: + return true; + + default: + return false; + } + } + + default: + return false; + } +} + +bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode) +{ + switch (instruction.op) + { + case InstructionOp::lui: + case InstructionOp::andi: + case InstructionOp::ori: + case InstructionOp::xori: + case InstructionOp::addiu: + case InstructionOp::slti: + case InstructionOp::sltiu: + return false; + + case InstructionOp::cop0: + case InstructionOp::cop2: + case InstructionOp::lwc2: + case InstructionOp::swc2: + return in_user_mode; + + // swc0/lwc0/cop1/cop3 are essentially no-ops + case InstructionOp::cop1: + case InstructionOp::cop3: + case InstructionOp::lwc0: + case InstructionOp::lwc1: + case InstructionOp::lwc3: + case InstructionOp::swc0: + case InstructionOp::swc1: + case InstructionOp::swc3: + return false; + + case InstructionOp::addi: + case InstructionOp::lb: + case InstructionOp::lh: + case InstructionOp::lw: + case InstructionOp::lbu: + case InstructionOp::lhu: + case InstructionOp::lwl: + case InstructionOp::lwr: + case InstructionOp::sb: + case InstructionOp::sh: + case InstructionOp::sw: + case InstructionOp::swl: + case InstructionOp::swr: + return true; + + // These can fault on the branch address. Perhaps we should move this to the next instruction? + case InstructionOp::j: + case InstructionOp::jal: + case InstructionOp::b: + case InstructionOp::beq: + case InstructionOp::bgtz: + case InstructionOp::blez: + case InstructionOp::bne: + return true; + + case InstructionOp::funct: + { + switch (instruction.r.funct) + { + case InstructionFunct::sll: + case InstructionFunct::srl: + case InstructionFunct::sra: + case InstructionFunct::sllv: + case InstructionFunct::srlv: + case InstructionFunct::srav: + case InstructionFunct::and_: + case InstructionFunct::or_: + case InstructionFunct::xor_: + case InstructionFunct::nor: + case InstructionFunct::addu: + case InstructionFunct::subu: + case InstructionFunct::slt: + case InstructionFunct::sltu: + case InstructionFunct::mfhi: + case InstructionFunct::mthi: + case InstructionFunct::mflo: + case InstructionFunct::mtlo: + case InstructionFunct::mult: + case InstructionFunct::multu: + case InstructionFunct::div: + case InstructionFunct::divu: + return false; + + case InstructionFunct::jr: + case InstructionFunct::jalr: + return true; + + case InstructionFunct::add: + case InstructionFunct::sub: + case InstructionFunct::syscall: + case InstructionFunct::break_: + default: + return true; + } + } + + default: + return true; + } +} + +bool IsLoadDelayingInstruction(const Instruction& instruction) +{ + switch (instruction.op) + { + case InstructionOp::lb: + case InstructionOp::lh: + case InstructionOp::lw: + case InstructionOp::lbu: + case InstructionOp::lhu: + return true; + + case InstructionOp::lwl: + case InstructionOp::lwr: + return false; + + default: + return false; + } +} + +bool IsInvalidInstruction(const Instruction& instruction) +{ + // TODO + return true; +} + +} // namespace CPU \ No newline at end of file diff --git a/src/core/cpu_types.h b/src/core/cpu_types.h index 1cc597af9..e7a74ef97 100644 --- a/src/core/cpu_types.h +++ b/src/core/cpu_types.h @@ -4,6 +4,18 @@ namespace CPU { +class Core; + +// Memory address mask used for fetching as well as loadstores (removes cached/uncached/user/kernel bits). +enum : PhysicalMemoryAddress +{ + PHYSICAL_MEMORY_ADDRESS_MASK = 0x1FFFFFFF +}; +enum : u32 +{ + INSTRUCTION_SIZE = sizeof(u32) +}; + enum class Reg : u8 { zero, @@ -41,6 +53,8 @@ enum class Reg : u8 count }; +const char* GetRegName(Reg reg); + enum class InstructionOp : u8 { funct = 0, @@ -192,6 +206,13 @@ union Instruction } }; +// Instruction helpers. +bool IsBranchInstruction(const Instruction& instruction); +bool IsExitBlockInstruction(const Instruction& instruction); +bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode); +bool IsLoadDelayingInstruction(const Instruction& instruction); +bool IsInvalidInstruction(const Instruction& instruction); + struct Registers { union @@ -349,4 +370,56 @@ struct Cop0Registers } dcic; }; +union CodeBlockKey +{ + u32 bits; + + BitField user_mode; + BitField aligned_pc; + + ALWAYS_INLINE u32 GetPC() const { return aligned_pc << 2; } + ALWAYS_INLINE void SetPC(u32 pc) { aligned_pc = pc >> 2; } + + ALWAYS_INLINE CodeBlockKey& operator=(const CodeBlockKey& rhs) + { + bits = rhs.bits; + return *this; + } + + ALWAYS_INLINE bool operator==(const CodeBlockKey& rhs) const { return bits == rhs.bits; } + ALWAYS_INLINE bool operator!=(const CodeBlockKey& rhs) const { return bits != rhs.bits; } + ALWAYS_INLINE bool operator<(const CodeBlockKey& rhs) const { return bits < rhs.bits; } +}; + +struct CodeBlockInstruction +{ + Instruction instruction; + u32 pc; + + bool is_branch : 1; + bool is_branch_delay_slot : 1; + bool is_load_delay_slot : 1; + bool is_last_instruction : 1; + bool can_trap : 1; +}; + +struct CodeBlock +{ + CodeBlockKey key; + + std::vector instructions; + + using HostCodePointer = void(*)(Core*); + HostCodePointer host_code; + u32 host_code_size; + + const u32 GetPC() const { return key.GetPC(); } + const u32 GetSizeInBytes() const { return static_cast(instructions.size()) * sizeof(Instruction); } + const u32 GetStartPageIndex() const { return (key.GetPC() / CPU_CODE_CACHE_PAGE_SIZE); } + const u32 GetEndPageIndex() const + { + return ((key.GetPC() + GetSizeInBytes() + (CPU_CODE_CACHE_PAGE_SIZE - 1)) / CPU_CODE_CACHE_PAGE_SIZE); + } +}; + } // namespace CPU diff --git a/src/core/system.cpp b/src/core/system.cpp index a0e03ab0b..c0877df77 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -5,6 +5,7 @@ #include "bus.h" #include "cdrom.h" #include "common/state_wrapper.h" +#include "cpu_code_cache.h" #include "cpu_core.h" #include "dma.h" #include "gpu.h" @@ -23,6 +24,7 @@ Log_SetChannel(System); System::System(HostInterface* host_interface) : m_host_interface(host_interface) { m_cpu = std::make_unique(); + m_cpu_code_cache = std::make_unique(); m_bus = std::make_unique(); m_dma = std::make_unique(); m_interrupt_controller = std::make_unique(); @@ -169,8 +171,9 @@ bool System::Boot(const char* filename) void System::InitializeComponents() { m_cpu->Initialize(m_bus.get()); - m_bus->Initialize(m_cpu.get(), m_dma.get(), m_interrupt_controller.get(), m_gpu.get(), m_cdrom.get(), m_pad.get(), - m_timers.get(), m_spu.get(), m_mdec.get()); + m_cpu_code_cache->Initialize(m_cpu.get(), m_bus.get()); + m_bus->Initialize(m_cpu.get(), m_cpu_code_cache.get(), m_dma.get(), m_interrupt_controller.get(), m_gpu.get(), + m_cdrom.get(), m_pad.get(), m_timers.get(), m_spu.get(), m_mdec.get()); m_dma->Initialize(this, m_bus.get(), m_interrupt_controller.get(), m_gpu.get(), m_cdrom.get(), m_spu.get(), m_mdec.get()); @@ -235,6 +238,9 @@ bool System::DoState(StateWrapper& sw) if (!sw.DoMarker("CPU") || !m_cpu->DoState(sw)) return false; + if (sw.IsReading()) + m_cpu_code_cache->Reset(); + if (!sw.DoMarker("Bus") || !m_bus->DoState(sw)) return false; @@ -268,6 +274,7 @@ bool System::DoState(StateWrapper& sw) void System::Reset() { m_cpu->Reset(); + m_cpu_code_cache->Reset(); m_bus->Reset(); m_dma->Reset(); m_interrupt_controller->Reset(); @@ -299,7 +306,10 @@ void System::RunFrame() u32 current_frame_number = m_frame_number; while (current_frame_number == m_frame_number) { - m_cpu->Execute(); + if (CPU::USE_CODE_CACHE) + m_cpu_code_cache->Execute(); + else + m_cpu->Execute(); Synchronize(); } } diff --git a/src/core/system.h b/src/core/system.h index 87197f998..5954db3d2 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -10,6 +10,7 @@ class StateWrapper; namespace CPU { class Core; +class CodeCache; } class Bus; @@ -97,6 +98,7 @@ private: HostInterface* m_host_interface; std::unique_ptr m_cpu; + std::unique_ptr m_cpu_code_cache; std::unique_ptr m_bus; std::unique_ptr m_dma; std::unique_ptr m_interrupt_controller; diff --git a/src/core/types.h b/src/core/types.h index 41307092a..cfef32361 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -38,3 +38,9 @@ enum class GPURenderer : u8 Software, Count }; + +enum : u32 +{ + CPU_CODE_CACHE_PAGE_SIZE = 1024, + CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE +};