CPU: Basic recompiler implementation for x64 (lui, ori, addiu)

Disabled by default.
This commit is contained in:
Connor McLaughlin 2019-11-19 20:30:04 +10:00
parent 0e8ff85f04
commit 1d6c4a3af1
25 changed files with 4104 additions and 49 deletions

View File

@ -6,11 +6,22 @@ add_library(core
bus.inl
cdrom.cpp
cdrom.h
cpu_code_cache.cpp
cpu_code_cache.h
cpu_core.cpp
cpu_core.h
cpu_core.inl
cpu_disasm.cpp
cpu_disasm.h
cpu_recompiler_code_generator.cpp
cpu_recompiler_code_generator.h
cpu_recompiler_code_generator_generic.cpp
cpu_recompiler_register_cache.cpp
cpu_recompiler_register_cache.h
cpu_recompiler_thunks.cpp
cpu_recompiler_thunks.h
cpu_recompiler_types.h
cpu_types.cpp
cpu_types.h
digital_controller.cpp
digital_controller.h
@ -65,3 +76,11 @@ if(WIN32)
gpu_hw_d3d11.h
)
endif()
if(${CPU_ARCH} STREQUAL "x64")
target_include_directories(core PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../dep/xbyak/xbyak")
target_sources(core PRIVATE cpu_recompiler_code_generator_x64.cpp)
message("Building x64 recompiler")
else()
message("Not building recompiler")
endif()

View File

@ -5,6 +5,7 @@
#include "YBaseLib/String.h"
#include "cdrom.h"
#include "common/state_wrapper.h"
#include "cpu_code_cache.h"
#include "cpu_core.h"
#include "cpu_disasm.h"
#include "dma.h"
@ -32,10 +33,12 @@ Bus::Bus() = default;
Bus::~Bus() = default;
void Bus::Initialize(CPU::Core* cpu, DMA* dma, InterruptController* interrupt_controller, GPU* gpu, CDROM* cdrom,
Pad* pad, Timers* timers, SPU* spu, MDEC* mdec)
void Bus::Initialize(CPU::Core* cpu, CPU::CodeCache* cpu_code_cache, DMA* dma,
InterruptController* interrupt_controller, GPU* gpu, CDROM* cdrom, Pad* pad, Timers* timers,
SPU* spu, MDEC* mdec)
{
m_cpu = cpu;
m_cpu_code_cache = cpu_code_cache;
m_dma = dma;
m_interrupt_controller = interrupt_controller;
m_gpu = gpu;
@ -518,6 +521,11 @@ void Bus::DoWriteSPU(MemoryAccessSize size, u32 offset, u32 value)
m_spu->WriteRegister(offset, Truncate16(value));
}
void Bus::DoInvalidateCodeCache(u32 page_index)
{
m_cpu_code_cache->FlushBlocksWithPageIndex(page_index);
}
u32 Bus::DoReadDMA(MemoryAccessSize size, u32 offset)
{
return FIXUP_WORD_READ_VALUE(offset, m_dma->ReadRegister(FIXUP_WORD_READ_OFFSET(offset)));

View File

@ -4,12 +4,14 @@
#include "common/bitfield.h"
#include "types.h"
#include <array>
#include <bitset>
class StateWrapper;
namespace CPU {
class Core;
}
class CodeCache;
} // namespace CPU
class DMA;
class InterruptController;
@ -27,8 +29,8 @@ public:
Bus();
~Bus();
void Initialize(CPU::Core* cpu, DMA* dma, InterruptController* interrupt_controller, GPU* gpu, CDROM* cdrom, Pad* pad,
Timers* timers, SPU* spu, MDEC* mdec);
void Initialize(CPU::Core* cpu, CPU::CodeCache* cpu_code_cache, DMA* dma, InterruptController* interrupt_controller,
GPU* gpu, CDROM* cdrom, Pad* pad, Timers* timers, SPU* spu, MDEC* mdec);
void Reset();
bool DoState(StateWrapper& sw);
@ -52,6 +54,34 @@ public:
// changing interfaces
void SetGPU(GPU* gpu) { m_gpu = gpu; }
/// Returns the address which should be used for code caching (i.e. removes mirrors).
ALWAYS_INLINE static PhysicalMemoryAddress UnmirrorAddress(PhysicalMemoryAddress address)
{
// RAM
if (address < 0x800000)
return address & UINT32_C(0x1FFFFF);
else
return address;
}
/// Returns true if the address specified is cacheable (RAM or BIOS).
ALWAYS_INLINE static bool IsCacheableAddress(PhysicalMemoryAddress address)
{
return (address < RAM_MIRROR_END) || (address >= BIOS_BASE && address < (BIOS_BASE + BIOS_SIZE));
}
/// Returns true if the address specified is writable (RAM).
ALWAYS_INLINE static bool IsRAMAddress(PhysicalMemoryAddress address) { return address < RAM_MIRROR_END; }
/// Flags a RAM region as code, so we know when to invalidate blocks.
ALWAYS_INLINE void SetRAMCodePage(u32 index) { m_ram_code_bits[index] = true; }
/// Unflags a RAM region as code, the code cache will no longer be notified when writes occur.
ALWAYS_INLINE void ClearRAMCodePage(u32 index) { m_ram_code_bits[index] = false; }
/// Clears all code bits for RAM regions.
ALWAYS_INLINE void ClearRAMCodePageFlags() { m_ram_code_bits.reset(); }
private:
enum : u32
{
@ -204,7 +234,10 @@ private:
u32 DoReadSPU(MemoryAccessSize size, u32 offset);
void DoWriteSPU(MemoryAccessSize size, u32 offset, u32 value);
void DoInvalidateCodeCache(u32 page_index);
CPU::Core* m_cpu = nullptr;
CPU::CodeCache* m_cpu_code_cache = nullptr;
DMA* m_dma = nullptr;
InterruptController* m_interrupt_controller = nullptr;
GPU* m_gpu = nullptr;
@ -220,8 +253,9 @@ private:
std::array<TickCount, 3> m_cdrom_access_time = {};
std::array<TickCount, 3> m_spu_access_time = {};
std::array<u8, 2097152> m_ram{}; // 2MB RAM
std::array<u8, 524288> m_bios{}; // 512K BIOS ROM
std::bitset<CPU_CODE_CACHE_PAGE_COUNT> m_ram_code_bits{};
std::array<u8, RAM_SIZE> m_ram{}; // 2MB RAM
std::array<u8, BIOS_SIZE> m_bios{}; // 512K BIOS ROM
std::vector<u8> m_exp1_rom;
MEMCTRL m_MEMCTRL = {};

View File

@ -25,6 +25,10 @@ TickCount Bus::DoRAMAccess(u32 offset, u32& value)
}
else
{
const u32 page_index = offset / CPU_CODE_CACHE_PAGE_SIZE;
if (m_ram_code_bits[page_index])
DoInvalidateCodeCache(page_index);
if constexpr (size == MemoryAccessSize::Byte)
{
m_ram[offset] = Truncate8(value);

View File

@ -40,6 +40,13 @@
<ClCompile Include="cdrom.cpp" />
<ClCompile Include="cpu_core.cpp" />
<ClCompile Include="cpu_disasm.cpp" />
<ClCompile Include="cpu_code_cache.cpp" />
<ClCompile Include="cpu_recompiler_code_generator.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_generic.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_x64.cpp" />
<ClCompile Include="cpu_recompiler_register_cache.cpp" />
<ClCompile Include="cpu_recompiler_thunks.cpp" />
<ClCompile Include="cpu_types.cpp" />
<ClCompile Include="digital_controller.cpp" />
<ClCompile Include="gpu_commands.cpp" />
<ClCompile Include="gpu_hw_d3d11.cpp" />
@ -67,6 +74,11 @@
<ClInclude Include="cdrom.h" />
<ClInclude Include="cpu_core.h" />
<ClInclude Include="cpu_disasm.h" />
<ClInclude Include="cpu_code_cache.h" />
<ClInclude Include="cpu_recompiler_code_generator.h" />
<ClInclude Include="cpu_recompiler_register_cache.h" />
<ClInclude Include="cpu_recompiler_thunks.h" />
<ClInclude Include="cpu_recompiler_types.h" />
<ClInclude Include="digital_controller.h" />
<ClInclude Include="gpu_hw_d3d11.h" />
<ClInclude Include="gpu_hw_shadergen.h" />
@ -251,7 +263,7 @@
<PreprocessorDefinitions>ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<LanguageStandard>stdcpp17</LanguageStandard>
@ -273,7 +285,7 @@
<PreprocessorDefinitions>ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<LanguageStandard>stdcpp17</LanguageStandard>
@ -295,7 +307,7 @@
<PreprocessorDefinitions>ENABLE_VOODOO=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
@ -320,7 +332,7 @@
<PreprocessorDefinitions>ENABLE_VOODOO=1;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
@ -344,7 +356,7 @@
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>false</WholeProgramOptimization>
<LanguageStandard>stdcpp17</LanguageStandard>
@ -367,7 +379,7 @@
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>true</WholeProgramOptimization>
<LanguageStandard>stdcpp17</LanguageStandard>
@ -391,7 +403,7 @@
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>false</WholeProgramOptimization>
<LanguageStandard>stdcpp17</LanguageStandard>
@ -414,7 +426,7 @@
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>ENABLE_VOODOO=1;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)dep\msvc\include;$(SolutionDir)dep\YBaseLib\Include;$(SolutionDir)dep\glad\include;$(SolutionDir)dep\stb\include;$(SolutionDir)dep\imgui\include;$(SolutionDir)dep\simpleini\include;$(SolutionDir)dep\xbyak\xbyak;$(SolutionDir)src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<WholeProgramOptimization>true</WholeProgramOptimization>
<LanguageStandard>stdcpp17</LanguageStandard>

View File

@ -26,6 +26,13 @@
<ClCompile Include="gpu_hw_shadergen.cpp" />
<ClCompile Include="gpu_hw_d3d11.cpp" />
<ClCompile Include="bios.cpp" />
<ClCompile Include="cpu_code_cache.cpp" />
<ClCompile Include="cpu_recompiler_register_cache.cpp" />
<ClCompile Include="cpu_recompiler_thunks.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_x64.cpp" />
<ClCompile Include="cpu_recompiler_code_generator.cpp" />
<ClCompile Include="cpu_recompiler_code_generator_generic.cpp" />
<ClCompile Include="cpu_types.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="types.h" />
@ -57,6 +64,11 @@
<ClInclude Include="gpu_hw_d3d11.h" />
<ClInclude Include="host_display.h" />
<ClInclude Include="bios.h" />
<ClInclude Include="cpu_recompiler_types.h" />
<ClInclude Include="cpu_code_cache.h" />
<ClInclude Include="cpu_recompiler_register_cache.h" />
<ClInclude Include="cpu_recompiler_thunks.h" />
<ClInclude Include="cpu_recompiler_code_generator.h" />
</ItemGroup>
<ItemGroup>
<None Include="cpu_core.inl" />

313
src/core/cpu_code_cache.cpp Normal file
View File

@ -0,0 +1,313 @@
#include "cpu_code_cache.h"
#include "YBaseLib/Log.h"
#include "cpu_core.h"
#include "cpu_disasm.h"
#include "cpu_recompiler_code_generator.h"
#include "cpu_recompiler_thunks.h"
Log_SetChannel(CPU::CodeCache);
namespace CPU {
bool USE_CODE_CACHE = false;
bool USE_RECOMPILER = false;
CodeCache::CodeCache() = default;
CodeCache::~CodeCache() = default;
void CodeCache::Initialize(Core* core, Bus* bus)
{
m_core = core;
m_bus = bus;
m_code_buffer = std::make_unique<JitCodeBuffer>();
m_asm_functions = std::make_unique<Recompiler::ASMFunctions>();
m_asm_functions->Generate(m_code_buffer.get());
}
void CodeCache::Execute()
{
while (m_core->m_downcount >= 0)
{
if (m_core->HasPendingInterrupt())
{
// TODO: Fill in m_next_instruction...
m_core->DispatchInterrupt();
}
m_current_block = GetNextBlock();
if (!m_current_block)
{
Log_WarningPrintf("Falling back to uncached interpreter at 0x%08X", m_core->GetRegs().pc);
InterpretUncachedBlock();
continue;
}
if (USE_RECOMPILER)
m_current_block->host_code(m_core);
else
InterpretCachedBlock(*m_current_block);
if (m_current_block_flushed)
{
m_current_block_flushed = false;
delete m_current_block;
}
m_current_block = nullptr;
}
}
void CodeCache::Reset()
{
m_bus->ClearRAMCodePageFlags();
for (auto& it : m_ram_block_map)
it.clear();
m_blocks.clear();
m_code_buffer->Reset();
}
const CPU::CodeBlock* CodeCache::GetNextBlock()
{
const u32 address = m_bus->UnmirrorAddress(m_core->m_regs.pc & UINT32_C(0x1FFFFFFF));
CodeBlockKey key = {};
key.SetPC(address);
key.user_mode = m_core->InUserMode();
BlockMap::iterator iter = m_blocks.find(key.bits);
if (iter != m_blocks.end())
return iter->second;
CodeBlock* block = new CodeBlock();
block->key = key;
if (CompileBlock(block))
{
// insert into the page map
if (m_bus->IsRAMAddress(address))
{
const u32 start_page = block->GetStartPageIndex();
const u32 end_page = block->GetEndPageIndex();
for (u32 page = start_page; page < end_page; page++)
{
m_ram_block_map[page].push_back(block);
m_bus->SetRAMCodePage(page);
}
}
}
else
{
Log_ErrorPrintf("Failed to compile block at PC=0x%08X", address);
}
iter = m_blocks.emplace(key.bits, block).first;
return block;
}
bool CodeCache::CompileBlock(CodeBlock* block)
{
u32 pc = block->GetPC();
bool is_branch_delay_slot = false;
bool is_load_delay_slot = false;
for (;;)
{
CodeBlockInstruction cbi = {};
if (!m_bus->IsCacheableAddress(pc) ||
m_bus->DispatchAccess<MemoryAccessType::Read, MemoryAccessSize::Word>(pc, cbi.instruction.bits) < 0 ||
!IsInvalidInstruction(cbi.instruction))
{
break;
}
cbi.pc = pc;
cbi.is_branch = IsBranchInstruction(cbi.instruction);
cbi.is_branch_delay_slot = is_branch_delay_slot;
cbi.is_load_delay_slot = is_load_delay_slot;
cbi.can_trap = CanInstructionTrap(cbi.instruction, m_core->InUserMode());
// instruction is decoded now
block->instructions.push_back(cbi);
pc += sizeof(cbi.instruction.bits);
// if we're in a branch delay slot, the block is now done
// except if this is a branch in a branch delay slot, then we grab the one after that, and so on...
if (is_branch_delay_slot && !cbi.is_branch)
break;
// if this is a branch, we grab the next instruction (delay slot), and then exit
is_branch_delay_slot = cbi.is_branch;
// is this a non-branchy exit? (e.g. syscall)
if (IsExitBlockInstruction(cbi.instruction))
break;
}
if (!block->instructions.empty())
{
block->instructions.back().is_last_instruction = true;
#ifdef _DEBUG
SmallString disasm;
Log_DebugPrintf("Block at 0x%08X", block->GetPC());
for (const CodeBlockInstruction& cbi : block->instructions)
{
CPU::DisassembleInstruction(&disasm, cbi.pc, cbi.instruction.bits, nullptr);
Log_DebugPrintf("[%s %s 0x%08X] %08X %s", cbi.is_branch_delay_slot ? "BD" : " ",
cbi.is_load_delay_slot ? "LD" : " ", cbi.pc, cbi.instruction.bits, disasm.GetCharArray());
}
#endif
}
else
{
Log_WarningPrintf("Empty block compiled at 0x%08X", block->key.GetPC());
return false;
}
if (USE_RECOMPILER)
{
// Ensure we're not going to run out of space while compiling this block.
if (m_code_buffer->GetFreeCodeSpace() < (block->instructions.size() * Recompiler::MAX_HOST_BYTES_PER_INSTRUCTION))
{
Log_WarningPrintf("Out of code space, flushing all blocks.");
Reset();
}
Recompiler::CodeGenerator codegen(m_core, m_code_buffer.get(), *m_asm_functions.get());
if (!codegen.CompileBlock(block, &block->host_code, &block->host_code_size))
{
Log_ErrorPrintf("Failed to compile host code for block at 0x%08X", block->key.GetPC());
return false;
}
}
return true;
}
void CodeCache::FlushBlocksWithPageIndex(u32 page_index)
{
DebugAssert(page_index < CPU_CODE_CACHE_PAGE_COUNT);
auto& blocks = m_ram_block_map[page_index];
while (!blocks.empty())
FlushBlock(blocks.back());
m_bus->ClearRAMCodePage(page_index);
}
void CodeCache::FlushBlock(CodeBlock* block)
{
BlockMap::iterator iter = m_blocks.find(block->key.GetPC());
Assert(iter != m_blocks.end() && iter->second == block);
Log_DevPrintf("Flushing block at address 0x%08X", block->GetPC());
// remove from the page map
const u32 start_page = block->GetStartPageIndex();
const u32 end_page = block->GetEndPageIndex();
for (u32 page = start_page; page < end_page; page++)
{
auto& page_blocks = m_ram_block_map[page];
auto page_block_iter = std::find(page_blocks.begin(), page_blocks.end(), block);
Assert(page_block_iter != page_blocks.end());
page_blocks.erase(page_block_iter);
}
// remove from block map
m_blocks.erase(iter);
// flushing block currently executing?
if (m_current_block == block)
{
Log_WarningPrintf("Flushing currently-executing block 0x%08X", block->GetPC());
m_current_block_flushed = true;
}
else
{
delete block;
}
}
void CodeCache::InterpretCachedBlock(const CodeBlock& block)
{
// set up the state so we've already fetched the instruction
DebugAssert((m_core->m_regs.pc & PHYSICAL_MEMORY_ADDRESS_MASK) == block.GetPC());
for (const CodeBlockInstruction& cbi : block.instructions)
{
m_core->m_pending_ticks += 1;
m_core->m_downcount -= 1;
// now executing the instruction we previously fetched
m_core->m_current_instruction.bits = cbi.instruction.bits;
m_core->m_current_instruction_pc = m_core->m_regs.pc;
m_core->m_current_instruction_in_branch_delay_slot = cbi.is_branch_delay_slot;
m_core->m_current_instruction_was_branch_taken = m_core->m_branch_was_taken;
m_core->m_branch_was_taken = false;
m_core->m_exception_raised = false;
// update pc
DebugAssert((m_core->m_regs.pc & PHYSICAL_MEMORY_ADDRESS_MASK) == cbi.pc);
m_core->m_regs.pc = m_core->m_regs.npc;
m_core->m_regs.npc += 4;
// execute the instruction we previously fetched
m_core->ExecuteInstruction();
// next load delay
m_core->m_load_delay_reg = m_core->m_next_load_delay_reg;
m_core->m_next_load_delay_reg = Reg::count;
m_core->m_load_delay_old_value = m_core->m_next_load_delay_old_value;
m_core->m_next_load_delay_old_value = 0;
if (m_core->m_exception_raised)
break;
}
// cleanup so the interpreter can kick in if needed
m_core->m_next_instruction_is_branch_delay_slot = false;
}
void CodeCache::InterpretUncachedBlock()
{
// At this point, pc contains the last address executed (in the previous block). The instruction has not been fetched
// yet. pc shouldn't be updated until the fetch occurs, that way the exception occurs in the delay slot.
bool in_branch_delay_slot = false;
for (;;)
{
m_core->m_pending_ticks += 1;
m_core->m_downcount -= 1;
// now executing the instruction we previously fetched
m_core->m_current_instruction.bits = m_core->m_next_instruction.bits;
m_core->m_current_instruction_pc = m_core->m_regs.pc;
m_core->m_current_instruction_in_branch_delay_slot = m_core->m_next_instruction_is_branch_delay_slot;
m_core->m_current_instruction_was_branch_taken = m_core->m_branch_was_taken;
m_core->m_next_instruction_is_branch_delay_slot = false;
m_core->m_branch_was_taken = false;
m_core->m_exception_raised = false;
// Fetch the next instruction, except if we're in a branch delay slot. The "fetch" is done in the next block.
if (!m_core->FetchInstruction())
break;
// execute the instruction we previously fetched
m_core->ExecuteInstruction();
// next load delay
m_core->m_load_delay_reg = m_core->m_next_load_delay_reg;
m_core->m_next_load_delay_reg = Reg::count;
m_core->m_load_delay_old_value = m_core->m_next_load_delay_old_value;
m_core->m_next_load_delay_old_value = 0;
const bool branch = IsBranchInstruction(m_core->m_current_instruction);
if (m_core->m_exception_raised || (!branch && in_branch_delay_slot) ||
IsExitBlockInstruction(m_core->m_current_instruction))
{
break;
}
in_branch_delay_slot = branch;
}
}
} // namespace CPU

59
src/core/cpu_code_cache.h Normal file
View File

@ -0,0 +1,59 @@
#pragma once
#include "common/bitfield.h"
#include "cpu_types.h"
#include <array>
#include <memory>
#include <unordered_map>
#include <vector>
class JitCodeBuffer;
class Bus;
namespace CPU {
class Core;
namespace Recompiler {
class ASMFunctions;
}
class CodeCache
{
public:
CodeCache();
~CodeCache();
void Initialize(Core* core, Bus* bus);
void Reset();
void Execute();
/// Flushes all blocks which are in the range of the specified code page.
void FlushBlocksWithPageIndex(u32 page_index);
private:
using BlockMap = std::unordered_map<u32, CodeBlock*>;
const CodeBlock* GetNextBlock();
bool CompileBlock(CodeBlock* block);
void FlushBlock(CodeBlock* block);
void InterpretCachedBlock(const CodeBlock& block);
void InterpretUncachedBlock();
Core* m_core;
Bus* m_bus;
const CodeBlock* m_current_block = nullptr;
bool m_current_block_flushed = false;
std::unique_ptr<JitCodeBuffer> m_code_buffer;
std::unique_ptr<Recompiler::ASMFunctions> m_asm_functions;
BlockMap m_blocks;
std::array<std::vector<CodeBlock*>, CPU_CODE_CACHE_PAGE_COUNT> m_ram_block_map;
};
extern bool USE_CODE_CACHE;
extern bool USE_RECOMPILER;
} // namespace CPU

View File

@ -33,7 +33,6 @@ void WriteToExecutionLog(const char* format, ...)
va_end(ap);
}
Core::Core() = default;
Core::~Core() = default;
@ -320,6 +319,7 @@ void Core::RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE)
// flush the pipeline - we don't want to execute the previously fetched instruction
m_regs.npc = GetExceptionVector(excode);
m_exception_raised = true;
FlushPipeline();
}
@ -333,21 +333,25 @@ void Core::ClearExternalInterrupt(u8 bit)
m_cop0_regs.cause.Ip &= static_cast<u8>(~(1u << bit));
}
bool Core::DispatchInterrupts()
bool Core::HasPendingInterrupt()
{
// const bool do_interrupt = m_cop0_regs.sr.IEc && ((m_cop0_regs.cause.Ip & m_cop0_regs.sr.Im) != 0);
const bool do_interrupt =
m_cop0_regs.sr.IEc && (((m_cop0_regs.cause.bits & m_cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0);
return do_interrupt;
}
void Core::DispatchInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
if (m_next_instruction.IsCop2Instruction())
return false;
return;
// const bool do_interrupt = m_cop0_regs.sr.IEc && ((m_cop0_regs.cause.Ip & m_cop0_regs.sr.Im) != 0);
const bool do_interrupt =
m_cop0_regs.sr.IEc && (((m_cop0_regs.cause.bits & m_cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0);
if (!do_interrupt)
return false;
RaiseException(Exception::INT);
return true;
// Interrupt raising occurs before the start of the instruction.
RaiseException(Exception::INT, m_regs.pc, m_next_instruction_is_branch_delay_slot, m_branch_was_taken,
m_next_instruction.cop.cop_n);
}
void Core::FlushLoadDelay()
@ -366,9 +370,15 @@ void Core::FlushPipeline()
// not in a branch delay slot
m_branch_was_taken = false;
m_next_instruction_is_branch_delay_slot = false;
m_current_instruction_pc = m_regs.pc;
// prefetch the next instruction
FetchInstruction();
// and set it as the next one to execute
m_current_instruction.bits = m_next_instruction.bits;
m_current_instruction_in_branch_delay_slot = false;
m_current_instruction_was_branch_taken = false;
}
u32 Core::ReadReg(Reg rs)
@ -567,6 +577,9 @@ void Core::Execute()
{
while (m_downcount >= 0)
{
if (HasPendingInterrupt())
DispatchInterrupt();
m_pending_ticks += 1;
m_downcount -= 1;
@ -577,9 +590,10 @@ void Core::Execute()
m_current_instruction_was_branch_taken = m_branch_was_taken;
m_next_instruction_is_branch_delay_slot = false;
m_branch_was_taken = false;
m_exception_raised = false;
// fetch the next instruction
if (DispatchInterrupts() || !FetchInstruction())
if (!FetchInstruction())
continue;
#if 0 // GTE flag test debugging
@ -621,19 +635,20 @@ void Core::ExecuteInstruction()
const Instruction inst = m_current_instruction;
#if 0
if (inst_pc == 0xBFC06FF0)
if (m_current_instruction_pc == 0xBFC06FF0)
{
TRACE_EXECUTION = true;
//TRACE_EXECUTION = true;
LOG_EXECUTION = true;
__debugbreak();
}
#endif
#ifdef _DEBUG
//#ifdef _DEBUG
if (TRACE_EXECUTION)
PrintInstruction(inst.bits, m_current_instruction_pc, this);
if (LOG_EXECUTION)
LogInstruction(inst.bits, m_current_instruction_pc, this);
#endif
//#endif
switch (inst.op)
{

View File

@ -12,6 +12,14 @@ class Bus;
namespace CPU {
class CodeCache;
namespace Recompiler
{
class CodeGenerator;
class Thunks;
}
class Core
{
public:
@ -20,6 +28,10 @@ public:
static constexpr PhysicalMemoryAddress DCACHE_LOCATION_MASK = UINT32_C(0xFFFFFC00);
static constexpr PhysicalMemoryAddress DCACHE_OFFSET_MASK = UINT32_C(0x000003FF);
static constexpr PhysicalMemoryAddress DCACHE_SIZE = UINT32_C(0x00000400);
friend CodeCache;
friend Recompiler::CodeGenerator;
friend Recompiler::Thunks;
Core();
~Core();
@ -102,7 +114,8 @@ private:
u32 GetExceptionVector(Exception excode) const;
void RaiseException(Exception excode);
void RaiseException(Exception excode, u32 EPC, bool BD, bool BT, u8 CE);
bool DispatchInterrupts();
bool HasPendingInterrupt();
void DispatchInterrupt();
// flushes any load delays if present
void FlushLoadDelay();
@ -141,6 +154,7 @@ private:
bool m_current_instruction_was_branch_taken = false;
bool m_next_instruction_is_branch_delay_slot = false;
bool m_branch_was_taken = false;
bool m_exception_raised = false;
// load delays
Reg m_load_delay_reg = Reg::count;

View File

@ -23,10 +23,6 @@ struct TableEntry
const char* format;
};
static const std::array<const char*, 32> s_reg_names = {
{"$zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"}};
static const std::array<const char*, 64> s_base_table = {{
"", // 0
"UNKNOWN", // 1
@ -188,11 +184,10 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core
if (std::strncmp(str, "rs", 2) == 0)
{
dest->AppendString(s_reg_names[static_cast<u8>(inst.r.rs.GetValue())]);
dest->AppendString(GetRegName(inst.r.rs));
if (state)
{
comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ",
s_reg_names[static_cast<u8>(inst.r.rs.GetValue())],
comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", GetRegName(inst.r.rs),
state->GetRegs().r[static_cast<u8>(inst.r.rs.GetValue())]);
}
@ -200,11 +195,10 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core
}
else if (std::strncmp(str, "rt", 2) == 0)
{
dest->AppendString(s_reg_names[static_cast<u8>(inst.r.rt.GetValue())]);
dest->AppendString(GetRegName(inst.r.rt));
if (state)
{
comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ",
s_reg_names[static_cast<u8>(inst.r.rt.GetValue())],
comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", GetRegName(inst.r.rt),
state->GetRegs().r[static_cast<u8>(inst.r.rt.GetValue())]);
}
@ -212,11 +206,10 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core
}
else if (std::strncmp(str, "rd", 2) == 0)
{
dest->AppendString(s_reg_names[static_cast<u8>(inst.r.rd.GetValue())]);
dest->AppendString(GetRegName(inst.r.rd));
if (state)
{
comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ",
s_reg_names[static_cast<u8>(inst.r.rd.GetValue())],
comment.AppendFormattedString("%s%s=0x%08X", comment.IsEmpty() ? "" : ", ", GetRegName(inst.r.rd),
state->GetRegs().r[static_cast<u8>(inst.r.rd.GetValue())]);
}
@ -247,7 +240,7 @@ static void FormatInstruction(String* dest, const Instruction inst, u32 pc, Core
else if (std::strncmp(str, "offsetrs", 8) == 0)
{
const s32 offset = static_cast<s32>(inst.i.imm_sext32());
dest->AppendFormattedString("%d(%s)", offset, s_reg_names[static_cast<u8>(inst.i.rs.GetValue())]);
dest->AppendFormattedString("%d(%s)", offset, GetRegName(inst.i.rs));
if (state)
{
comment.AppendFormattedString("%saddr=0x%08X", comment.IsEmpty() ? "" : ", ",

View File

@ -0,0 +1,589 @@
#include "cpu_recompiler_code_generator.h"
#include "YBaseLib/Log.h"
#include "cpu_disasm.h"
Log_SetChannel(CPU::Recompiler);
namespace CPU::Recompiler {
CodeGenerator::CodeGenerator(Core* cpu, JitCodeBuffer* code_buffer, const ASMFunctions& asm_functions)
: m_cpu(cpu), m_code_buffer(code_buffer), m_asm_functions(asm_functions), m_register_cache(*this),
m_emit(code_buffer->GetFreeCodeSpace(), code_buffer->GetFreeCodePointer())
{
InitHostRegs();
}
CodeGenerator::~CodeGenerator() = default;
u32 CodeGenerator::CalculateRegisterOffset(Reg reg)
{
return uint32(offsetof(Core, m_regs.r[0]) + (static_cast<u32>(reg) * sizeof(u32)));
}
bool CodeGenerator::CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code,
u32* out_host_code_size)
{
// TODO: Align code buffer.
m_block = block;
m_block_start = block->instructions.data();
m_block_end = block->instructions.data() + block->instructions.size();
m_current_instruction_in_branch_delay_slot_dirty = true;
m_branch_was_taken_dirty = true;
m_current_instruction_was_branch_taken_dirty = false;
m_load_delay_dirty = true;
EmitBeginBlock();
BlockPrologue();
const CodeBlockInstruction* cbi = m_block_start;
while (cbi != m_block_end)
{
#ifndef Y_BUILD_CONFIG_RELEASE
SmallString disasm;
DisassembleInstruction(&disasm, cbi->pc, cbi->instruction.bits, nullptr);
Log_DebugPrintf("Compiling instruction '%s'", disasm.GetCharArray());
#endif
if (!CompileInstruction(*cbi))
{
m_block_end = nullptr;
m_block_start = nullptr;
m_block = nullptr;
return false;
}
cbi++;
}
BlockEpilogue();
EmitEndBlock();
FinalizeBlock(out_host_code, out_host_code_size);
DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
m_block_end = nullptr;
m_block_start = nullptr;
m_block = nullptr;
return true;
}
bool CodeGenerator::CompileInstruction(const CodeBlockInstruction& cbi)
{
bool result;
switch (cbi.instruction.op)
{
#if 1
case InstructionOp::lui:
result = Compile_lui(cbi);
break;
case InstructionOp::ori:
result = Compile_ori(cbi);
break;
case InstructionOp::addiu:
result = Compile_addiu(cbi);
break;
case InstructionOp::funct:
{
switch (cbi.instruction.r.funct)
{
case InstructionFunct::sll:
result = Compile_sll(cbi);
break;
default:
result = Compile_Fallback(cbi);
break;
}
}
break;
#endif
default:
result = Compile_Fallback(cbi);
break;
}
// release temporary effective addresses
for (Value& value : m_operand_memory_addresses)
value.ReleaseAndClear();
return result;
}
Value CodeGenerator::ConvertValueSize(const Value& value, RegSize size, bool sign_extend)
{
DebugAssert(value.size != size);
if (value.IsConstant())
{
// compile-time conversion, woo!
switch (size)
{
case RegSize_8:
return Value::FromConstantU8(value.constant_value & 0xFF);
case RegSize_16:
{
switch (value.size)
{
case RegSize_8:
return Value::FromConstantU16(sign_extend ? SignExtend16(Truncate8(value.constant_value)) :
ZeroExtend16(Truncate8(value.constant_value)));
default:
return Value::FromConstantU16(value.constant_value & 0xFFFF);
}
}
break;
case RegSize_32:
{
switch (value.size)
{
case RegSize_8:
return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate8(value.constant_value)) :
ZeroExtend32(Truncate8(value.constant_value)));
case RegSize_16:
return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate16(value.constant_value)) :
ZeroExtend32(Truncate16(value.constant_value)));
case RegSize_32:
return value;
default:
break;
}
}
break;
default:
break;
}
UnreachableCode();
return Value{};
}
Value new_value = m_register_cache.AllocateScratch(size);
if (size < value.size)
{
EmitCopyValue(new_value.host_reg, value);
}
else
{
if (sign_extend)
EmitSignExtend(new_value.host_reg, size, value.host_reg, value.size);
else
EmitZeroExtend(new_value.host_reg, size, value.host_reg, value.size);
}
return new_value;
}
void CodeGenerator::ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend)
{
DebugAssert(value->size != size);
// We don't want to mess up the register cache value, so generate a new value if it's not scratch.
if (value->IsConstant() || !value->IsScratch())
{
*value = ConvertValueSize(*value, size, sign_extend);
return;
}
DebugAssert(value->IsInHostRegister() && value->IsScratch());
// If the size is smaller and the value is in a register, we can just "view" the lower part.
if (size < value->size)
{
value->size = size;
}
else
{
if (sign_extend)
EmitSignExtend(value->host_reg, size, value->host_reg, value->size);
else
EmitZeroExtend(value->host_reg, size, value->host_reg, value->size);
}
value->size = size;
}
Value CodeGenerator::AddValues(const Value& lhs, const Value& rhs)
{
DebugAssert(lhs.size == rhs.size);
if (lhs.IsConstant() && rhs.IsConstant())
{
// compile-time
u64 new_cv = lhs.constant_value + rhs.constant_value;
switch (lhs.size)
{
case RegSize_8:
return Value::FromConstantU8(Truncate8(new_cv));
case RegSize_16:
return Value::FromConstantU16(Truncate16(new_cv));
case RegSize_32:
return Value::FromConstantU32(Truncate32(new_cv));
case RegSize_64:
return Value::FromConstantU64(new_cv);
default:
return Value();
}
}
Value res = m_register_cache.AllocateScratch(lhs.size);
if (lhs.HasConstantValue(0))
{
EmitCopyValue(res.host_reg, rhs);
return res;
}
else if (rhs.HasConstantValue(0))
{
EmitCopyValue(res.host_reg, lhs);
return res;
}
else
{
EmitCopyValue(res.host_reg, lhs);
EmitAdd(res.host_reg, rhs);
return res;
}
}
Value CodeGenerator::ShlValues(const Value& lhs, const Value& rhs)
{
DebugAssert(lhs.size == rhs.size);
if (lhs.IsConstant() && rhs.IsConstant())
{
// compile-time
u64 new_cv = lhs.constant_value << rhs.constant_value;
switch (lhs.size)
{
case RegSize_8:
return Value::FromConstantU8(Truncate8(new_cv));
case RegSize_16:
return Value::FromConstantU16(Truncate16(new_cv));
case RegSize_32:
return Value::FromConstantU32(Truncate32(new_cv));
case RegSize_64:
return Value::FromConstantU64(new_cv);
default:
return Value();
}
}
Value res = m_register_cache.AllocateScratch(lhs.size);
EmitCopyValue(res.host_reg, lhs);
if (!rhs.HasConstantValue(0))
EmitShl(res.host_reg, res.size, rhs);
return res;
}
Value CodeGenerator::OrValues(const Value& lhs, const Value& rhs)
{
DebugAssert(lhs.size == rhs.size);
if (lhs.IsConstant() && rhs.IsConstant())
{
// compile-time
u64 new_cv = lhs.constant_value | rhs.constant_value;
switch (lhs.size)
{
case RegSize_8:
return Value::FromConstantU8(Truncate8(new_cv));
case RegSize_16:
return Value::FromConstantU16(Truncate16(new_cv));
case RegSize_32:
return Value::FromConstantU32(Truncate32(new_cv));
case RegSize_64:
return Value::FromConstantU64(new_cv);
default:
return Value();
}
}
Value res = m_register_cache.AllocateScratch(lhs.size);
EmitCopyValue(res.host_reg, lhs);
if (lhs.HasConstantValue(0))
{
EmitCopyValue(res.host_reg, rhs);
return res;
}
else if (rhs.HasConstantValue(0))
{
EmitCopyValue(res.host_reg, lhs);
return res;
}
EmitCopyValue(res.host_reg, lhs);
EmitOr(res.host_reg, rhs);
return res;
}
void CodeGenerator::BlockPrologue()
{
EmitStoreCPUStructField(offsetof(Core, m_exception_raised), Value::FromConstantU8(0));
// fetching of the first instruction...
// sync m_current_instruction_pc so we can simply add to it
SyncCurrentInstructionPC();
// and the same for m_regs.pc
SyncPC();
EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(4));
}
void CodeGenerator::BlockEpilogue()
{
#if defined(_DEBUG) && defined(Y_CPU_X64)
m_emit.nop();
#endif
m_register_cache.FlushAllGuestRegisters(true, false);
// if the last instruction wasn't a fallback, we need to add its fetch
if (m_delayed_pc_add > 0)
{
EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(m_delayed_pc_add));
m_delayed_pc_add = 0;
}
AddPendingCycles();
// TODO: correct value for is_branch_delay_slot - branches in branch delay slot.
EmitStoreCPUStructField(offsetof(Core, m_next_instruction_is_branch_delay_slot), Value::FromConstantU8(0));
}
void CodeGenerator::InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles,
bool force_sync /* = false */)
{
#if defined(_DEBUG) && defined(Y_CPU_X64)
m_emit.nop();
#endif
// reset dirty flags
if (m_branch_was_taken_dirty)
{
Value temp = m_register_cache.AllocateScratch(RegSize_8);
EmitLoadCPUStructField(temp.host_reg, RegSize_8, offsetof(Core, m_branch_was_taken));
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), temp);
EmitStoreCPUStructField(offsetof(Core, m_branch_was_taken), Value::FromConstantU8(0));
m_current_instruction_was_branch_taken_dirty = true;
m_branch_was_taken_dirty = false;
}
else if (m_current_instruction_was_branch_taken_dirty)
{
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_was_branch_taken), Value::FromConstantU8(0));
m_current_instruction_was_branch_taken_dirty = false;
}
if (m_current_instruction_in_branch_delay_slot_dirty && !cbi.is_branch_delay_slot)
{
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_in_branch_delay_slot), Value::FromConstantU8(0));
m_current_instruction_in_branch_delay_slot_dirty = false;
}
if (cbi.is_branch_delay_slot)
{
// m_regs.pc should be synced for the next block, as the branch wrote to npc
SyncCurrentInstructionPC();
SyncPC();
// m_current_instruction_in_branch_delay_slot = true
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_in_branch_delay_slot), Value::FromConstantU8(1));
m_current_instruction_in_branch_delay_slot_dirty = true;
}
if (!CanInstructionTrap(cbi.instruction, m_block->key.user_mode) && !force_sync)
{
// Defer updates for non-faulting instructions.
m_delayed_pc_add += INSTRUCTION_SIZE;
m_delayed_cycles_add += cycles;
return;
}
if (m_delayed_pc_add > 0)
{
// m_current_instruction_pc += m_delayed_pc_add
EmitAddCPUStructField(offsetof(Core, m_current_instruction_pc), Value::FromConstantU32(m_delayed_pc_add));
// m_regs.pc += m_delayed_pc_add
EmitAddCPUStructField(offsetof(Core, m_regs.pc), Value::FromConstantU32(m_delayed_pc_add));
// m_regs.npc += m_delayed_pc_add
// TODO: This can go once we recompile branch instructions and unconditionally set npc
EmitAddCPUStructField(offsetof(Core, m_regs.npc), Value::FromConstantU32(m_delayed_pc_add));
m_delayed_pc_add = 0;
}
if (!cbi.is_branch)
m_delayed_pc_add = INSTRUCTION_SIZE;
m_delayed_cycles_add += cycles;
AddPendingCycles();
}
void CodeGenerator::InstructionEpilogue(const CodeBlockInstruction& cbi)
{
// copy if the previous instruction was a load, reset the current value on the next instruction
if (m_load_delay_dirty)
{
// cpu->m_load_delay_reg = cpu->m_next_load_delay_reg;
// cpu->m_next_load_delay_reg = Reg::count;
{
Value temp = m_register_cache.AllocateScratch(RegSize_8);
EmitLoadCPUStructField(temp.host_reg, RegSize_8, offsetof(Core, m_next_load_delay_reg));
EmitStoreCPUStructField(offsetof(Core, m_next_load_delay_reg),
Value::FromConstantU8(static_cast<u8>(Reg::count)));
EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), temp);
}
// cpu->m_load_delay_old_value = cpu->m_next_load_delay_old_value;
// cpu->m_next_load_delay_old_value = 0;
{
Value temp = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(temp.host_reg, RegSize_32, offsetof(Core, m_next_load_delay_old_value));
EmitStoreCPUStructField(offsetof(Core, m_next_load_delay_old_value), Value::FromConstantU32(0));
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), temp);
}
m_load_delay_dirty = false;
m_next_load_delay_dirty = true;
}
else if (m_next_load_delay_dirty)
{
// cpu->m_load_delay_reg = Reg::count;
// cpu->m_load_delay_old_value = 0;
EmitStoreCPUStructField(offsetof(Core, m_load_delay_reg), Value::FromConstantU8(static_cast<u8>(Reg::count)));
EmitStoreCPUStructField(offsetof(Core, m_load_delay_old_value), Value::FromConstantU32(0));
m_next_load_delay_dirty = false;
}
}
void CodeGenerator::SyncCurrentInstructionPC()
{
// m_current_instruction_pc = m_regs.pc
Value pc_value = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(pc_value.host_reg, RegSize_32, offsetof(Core, m_regs.pc));
EmitStoreCPUStructField(offsetof(Core, m_current_instruction_pc), pc_value);
}
void CodeGenerator::SyncPC()
{
// m_regs.pc = m_regs.npc
Value npc_value = m_register_cache.AllocateScratch(RegSize_32);
EmitLoadCPUStructField(npc_value.host_reg, RegSize_32, offsetof(Core, m_regs.npc));
EmitStoreCPUStructField(offsetof(Core, m_regs.pc), npc_value);
}
void CodeGenerator::AddPendingCycles()
{
if (m_delayed_cycles_add == 0)
return;
EmitAddCPUStructField(offsetof(Core, m_pending_ticks), Value::FromConstantU32(m_delayed_cycles_add));
EmitAddCPUStructField(offsetof(Core, m_downcount), Value::FromConstantU32(~u32(m_delayed_cycles_add - 1)));
m_delayed_cycles_add = 0;
}
bool CodeGenerator::Compile_Fallback(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1, true);
// flush and invalidate all guest registers, since the fallback could change any of them
m_register_cache.FlushAllGuestRegisters(true, true);
EmitStoreCPUStructField(offsetof(Core, m_current_instruction.bits), Value::FromConstantU32(cbi.instruction.bits));
// emit the function call
if (CanInstructionTrap(cbi.instruction, m_block->key.user_mode))
{
// TODO: Use carry flag or something here too
Value return_value = m_register_cache.AllocateScratch(RegSize_8);
EmitFunctionCall(&return_value, &Thunks::InterpretInstruction, m_register_cache.GetCPUPtr());
EmitBlockExitOnBool(return_value);
}
else
{
EmitFunctionCall(nullptr, &Thunks::InterpretInstruction, m_register_cache.GetCPUPtr());
}
m_current_instruction_in_branch_delay_slot_dirty = cbi.is_branch;
m_branch_was_taken_dirty = cbi.is_branch;
m_load_delay_dirty = true;
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_lui(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
// rt <- (imm << 16)
m_register_cache.WriteGuestRegister(cbi.instruction.i.rt,
Value::FromConstantU32(cbi.instruction.i.imm_zext32() << 16));
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_ori(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
// rt <- rs | zext(imm)
m_register_cache.WriteGuestRegister(cbi.instruction.i.rt,
OrValues(m_register_cache.ReadGuestRegister(cbi.instruction.i.rs),
Value::FromConstantU32(cbi.instruction.i.imm_zext32())));
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_sll(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
// rd <- rt << shamt
m_register_cache.WriteGuestRegister(cbi.instruction.r.rd,
ShlValues(m_register_cache.ReadGuestRegister(cbi.instruction.r.rt),
Value::FromConstantU32(cbi.instruction.r.shamt)));
InstructionEpilogue(cbi);
return true;
}
bool CodeGenerator::Compile_addiu(const CodeBlockInstruction& cbi)
{
InstructionPrologue(cbi, 1);
// rt <- rs + sext(imm)
m_register_cache.WriteGuestRegister(cbi.instruction.i.rt,
AddValues(m_register_cache.ReadGuestRegister(cbi.instruction.i.rs),
Value::FromConstantU32(cbi.instruction.i.imm_sext32())));
InstructionEpilogue(cbi);
return true;
}
} // namespace CPU::Recompiler

View File

@ -0,0 +1,185 @@
#pragma once
#include <array>
#include <initializer_list>
#include <utility>
#include "common/jit_code_buffer.h"
#include "cpu_recompiler_register_cache.h"
#include "cpu_recompiler_thunks.h"
#include "cpu_recompiler_types.h"
#include "cpu_types.h"
// ABI selection
#if defined(Y_CPU_X64)
#if defined(Y_PLATFORM_WINDOWS)
#define ABI_WIN64 1
#elif defined(Y_PLATFORM_LINUX) || defined(Y_PLATFORM_OSX)
#define ABI_SYSV 1
#else
#error Unknown ABI.
#endif
#endif
namespace CPU::Recompiler {
class CodeGenerator
{
public:
CodeGenerator(Core* cpu, JitCodeBuffer* code_buffer, const ASMFunctions& asm_functions);
~CodeGenerator();
static u32 CalculateRegisterOffset(Reg reg);
static const char* GetHostRegName(HostReg reg, RegSize size = HostPointerSize);
static void AlignCodeBuffer(JitCodeBuffer* code_buffer);
RegisterCache& GetRegisterCache() { return m_register_cache; }
CodeEmitter& GetCodeEmitter() { return m_emit; }
bool CompileBlock(const CodeBlock* block, CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
//////////////////////////////////////////////////////////////////////////
// Code Generation
//////////////////////////////////////////////////////////////////////////
void EmitBeginBlock();
void EmitEndBlock();
void EmitBlockExitOnBool(const Value& value);
void FinalizeBlock(CodeBlock::HostCodePointer* out_host_code, u32* out_host_code_size);
void EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size);
void EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size);
void EmitCopyValue(HostReg to_reg, const Value& value);
void EmitAdd(HostReg to_reg, const Value& value);
void EmitSub(HostReg to_reg, const Value& value);
void EmitCmp(HostReg to_reg, const Value& value);
void EmitInc(HostReg to_reg, RegSize size);
void EmitDec(HostReg to_reg, RegSize size);
void EmitShl(HostReg to_reg, RegSize size, const Value& amount_value);
void EmitShr(HostReg to_reg, RegSize size, const Value& amount_value);
void EmitSar(HostReg to_reg, RegSize size, const Value& amount_value);
void EmitAnd(HostReg to_reg, const Value& value);
void EmitOr(HostReg to_reg, const Value& value);
void EmitXor(HostReg to_reg, const Value& value);
void EmitTest(HostReg to_reg, const Value& value);
void EmitNot(HostReg to_reg, RegSize size);
void EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg);
void EmitStoreGuestRegister(Reg guest_reg, const Value& value);
void EmitLoadCPUStructField(HostReg host_reg, RegSize size, u32 offset);
void EmitStoreCPUStructField(u32 offset, const Value& value);
void EmitAddCPUStructField(u32 offset, const Value& value);
u32 PrepareStackForCall();
void RestoreStackAfterCall(u32 adjust_size);
void EmitFunctionCallPtr(Value* return_value, const void* ptr);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
const Value& arg3);
void EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
const Value& arg3, const Value& arg4);
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr));
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1);
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1, arg2);
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2,
const Value& arg3)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1, arg2, arg3);
}
template<typename FunctionType>
void EmitFunctionCall(Value* return_value, const FunctionType ptr, const Value& arg1, const Value& arg2,
const Value& arg3, const Value& arg4)
{
EmitFunctionCallPtr(return_value, reinterpret_cast<const void**>(ptr), arg1, arg2, arg3, arg4);
}
// Host register saving.
void EmitPushHostReg(HostReg reg);
void EmitPopHostReg(HostReg reg);
// Flags copying from host.
#if defined(Y_CPU_X64)
void ReadFlagsFromHost(Value* value);
Value ReadFlagsFromHost();
#endif
// Value ops
Value AddValues(const Value& lhs, const Value& rhs);
Value MulValues(const Value& lhs, const Value& rhs);
Value ShlValues(const Value& lhs, const Value& rhs);
Value OrValues(const Value& lhs, const Value& rhs);
private:
// Host register setup
void InitHostRegs();
Value ConvertValueSize(const Value& value, RegSize size, bool sign_extend);
void ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend);
//////////////////////////////////////////////////////////////////////////
// Code Generation Helpers
//////////////////////////////////////////////////////////////////////////
// branch target, memory address, etc
void BlockPrologue();
void BlockEpilogue();
void InstructionPrologue(const CodeBlockInstruction& cbi, TickCount cycles,
bool force_sync = false);
void InstructionEpilogue(const CodeBlockInstruction& cbi);
void SyncCurrentInstructionPC();
void SyncPC();
void AddPendingCycles();
//////////////////////////////////////////////////////////////////////////
// Instruction Code Generators
//////////////////////////////////////////////////////////////////////////
bool CompileInstruction(const CodeBlockInstruction& cbi);
bool Compile_Fallback(const CodeBlockInstruction& cbi);
bool Compile_lui(const CodeBlockInstruction& cbi);
bool Compile_ori(const CodeBlockInstruction& cbi);
bool Compile_sll(const CodeBlockInstruction& cbi);
bool Compile_addiu(const CodeBlockInstruction& cbi);
Core* m_cpu;
JitCodeBuffer* m_code_buffer;
const ASMFunctions& m_asm_functions;
const CodeBlock* m_block = nullptr;
const CodeBlockInstruction* m_block_start = nullptr;
const CodeBlockInstruction* m_block_end = nullptr;
RegisterCache m_register_cache;
CodeEmitter m_emit;
u32 m_delayed_pc_add = 0;
TickCount m_delayed_cycles_add = 0;
std::array<Value, 3> m_operand_memory_addresses{};
Xbyak::Label m_block_exit_label;
// whether various flags need to be reset.
bool m_current_instruction_in_branch_delay_slot_dirty = false;
bool m_branch_was_taken_dirty = false;
bool m_current_instruction_was_branch_taken_dirty = false;
bool m_next_load_delay_dirty = false;
bool m_load_delay_dirty = false;
};
} // namespace CPU_X86::Recompiler

View File

@ -0,0 +1,21 @@
#include "cpu_recompiler_code_generator.h"
namespace CPU::Recompiler {
#if !defined(Y_CPU_X64)
void CodeGenerator::AlignCodeBuffer(JitCodeBuffer* code_buffer) {}
#endif
void CodeGenerator::EmitLoadGuestRegister(HostReg host_reg, Reg guest_reg)
{
EmitLoadCPUStructField(host_reg, RegSize_32, CalculateRegisterOffset(guest_reg));
}
void CodeGenerator::EmitStoreGuestRegister(Reg guest_reg, const Value& value)
{
DebugAssert(value.size == RegSize_32);
EmitStoreCPUStructField(CalculateRegisterOffset(guest_reg), value);
}
} // namespace CPU::Recompiler

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,604 @@
#include "cpu_recompiler_register_cache.h"
#include "YBaseLib/Log.h"
#include "cpu_recompiler_code_generator.h"
#include <cinttypes>
Log_SetChannel(CPU::Recompiler);
namespace CPU::Recompiler {
Value::Value() = default;
Value::Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_)
: regcache(regcache_), constant_value(constant_), size(size_), flags(flags_)
{
}
Value::Value(const Value& other)
: regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size),
flags(other.flags)
{
AssertMsg(!other.IsScratch(), "Can't copy a temporary register");
}
Value::Value(Value&& other)
: regcache(other.regcache), constant_value(other.constant_value), host_reg(other.host_reg), size(other.size),
flags(other.flags)
{
other.Clear();
}
Value::Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_)
: regcache(regcache_), host_reg(reg_), size(size_), flags(flags_)
{
}
Value::~Value()
{
Release();
}
Value& Value::operator=(const Value& other)
{
AssertMsg(!other.IsScratch(), "Can't copy a temporary register");
Release();
regcache = other.regcache;
constant_value = other.constant_value;
host_reg = other.host_reg;
size = other.size;
flags = other.flags;
return *this;
}
Value& Value::operator=(Value&& other)
{
Release();
regcache = other.regcache;
constant_value = other.constant_value;
host_reg = other.host_reg;
size = other.size;
flags = other.flags;
other.Clear();
return *this;
}
void Value::Clear()
{
regcache = nullptr;
constant_value = 0;
host_reg = {};
size = RegSize_8;
flags = ValueFlags::None;
}
void Value::Release()
{
if (IsScratch())
{
DebugAssert(IsInHostRegister() && regcache);
regcache->FreeHostReg(host_reg);
}
}
void Value::ReleaseAndClear()
{
Release();
Clear();
}
void Value::Discard()
{
DebugAssert(IsInHostRegister());
regcache->DiscardHostReg(host_reg);
}
void Value::Undiscard()
{
DebugAssert(IsInHostRegister());
regcache->UndiscardHostReg(host_reg);
}
RegisterCache::RegisterCache(CodeGenerator& code_generator) : m_code_generator(code_generator)
{
m_guest_register_order.fill(Reg::count);
}
RegisterCache::~RegisterCache() = default;
void RegisterCache::SetHostRegAllocationOrder(std::initializer_list<HostReg> regs)
{
size_t index = 0;
for (HostReg reg : regs)
{
m_host_register_state[reg] = HostRegState::Usable;
m_host_register_allocation_order[index++] = reg;
}
m_host_register_available_count = static_cast<u32>(index);
}
void RegisterCache::SetCallerSavedHostRegs(std::initializer_list<HostReg> regs)
{
for (HostReg reg : regs)
m_host_register_state[reg] |= HostRegState::CallerSaved;
}
void RegisterCache::SetCalleeSavedHostRegs(std::initializer_list<HostReg> regs)
{
for (HostReg reg : regs)
m_host_register_state[reg] |= HostRegState::CalleeSaved;
}
void RegisterCache::SetCPUPtrHostReg(HostReg reg)
{
m_cpu_ptr_host_register = reg;
}
bool RegisterCache::IsUsableHostReg(HostReg reg) const
{
return (m_host_register_state[reg] & HostRegState::Usable) != HostRegState::None;
}
bool RegisterCache::IsHostRegInUse(HostReg reg) const
{
return (m_host_register_state[reg] & HostRegState::InUse) != HostRegState::None;
}
bool RegisterCache::HasFreeHostRegister() const
{
for (const HostRegState state : m_host_register_state)
{
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable))
return true;
}
return false;
}
u32 RegisterCache::GetUsedHostRegisters() const
{
u32 count = 0;
for (const HostRegState state : m_host_register_state)
{
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable | HostRegState::InUse))
count++;
}
return count;
}
u32 RegisterCache::GetFreeHostRegisters() const
{
u32 count = 0;
for (const HostRegState state : m_host_register_state)
{
if ((state & (HostRegState::Usable | HostRegState::InUse)) == (HostRegState::Usable))
count++;
}
return count;
}
HostReg RegisterCache::AllocateHostReg(HostRegState state /* = HostRegState::InUse */)
{
// try for a free register in allocation order
for (u32 i = 0; i < m_host_register_available_count; i++)
{
const HostReg reg = m_host_register_allocation_order[i];
if ((m_host_register_state[reg] & (HostRegState::Usable | HostRegState::InUse)) == HostRegState::Usable)
{
if (AllocateHostReg(reg, state))
return reg;
}
}
// evict one of the cached guest registers
if (!EvictOneGuestRegister())
Panic("Failed to evict guest register for new allocation");
return AllocateHostReg(state);
}
bool RegisterCache::AllocateHostReg(HostReg reg, HostRegState state /*= HostRegState::InUse*/)
{
if ((m_host_register_state[reg] & HostRegState::InUse) == HostRegState::InUse)
return false;
m_host_register_state[reg] |= state;
if ((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
HostRegState::CalleeSaved)
{
// new register we need to save..
DebugAssert(m_host_register_callee_saved_order_count < HostReg_Count);
m_host_register_callee_saved_order[m_host_register_callee_saved_order_count++] = reg;
m_host_register_state[reg] |= HostRegState::CalleeSavedAllocated;
m_code_generator.EmitPushHostReg(reg);
}
return reg;
}
void RegisterCache::DiscardHostReg(HostReg reg)
{
DebugAssert(IsHostRegInUse(reg));
Log_DebugPrintf("Discarding host register %s", m_code_generator.GetHostRegName(reg));
m_host_register_state[reg] |= HostRegState::Discarded;
}
void RegisterCache::UndiscardHostReg(HostReg reg)
{
DebugAssert(IsHostRegInUse(reg));
Log_DebugPrintf("Undiscarding host register %s", m_code_generator.GetHostRegName(reg));
m_host_register_state[reg] &= ~HostRegState::Discarded;
}
void RegisterCache::FreeHostReg(HostReg reg)
{
DebugAssert(IsHostRegInUse(reg));
Log_DebugPrintf("Freeing host register %s", m_code_generator.GetHostRegName(reg));
m_host_register_state[reg] &= ~HostRegState::InUse;
}
void RegisterCache::EnsureHostRegFree(HostReg reg)
{
if (!IsHostRegInUse(reg))
return;
for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
{
if (m_guest_reg_cache[i].IsInHostRegister() && m_guest_reg_cache[i].GetHostRegister() == reg)
FlushGuestRegister(m_guest_reg_cache[i], static_cast<Reg>(i), true, true);
}
}
Value RegisterCache::GetCPUPtr()
{
return Value::FromHostReg(this, m_cpu_ptr_host_register, HostPointerSize);
}
Value RegisterCache::AllocateScratch(RegSize size, HostReg reg /* = HostReg_Invalid */)
{
if (reg == HostReg_Invalid)
{
reg = AllocateHostReg();
}
else
{
Assert(!IsHostRegInUse(reg));
if (!AllocateHostReg(reg))
Panic("Failed to allocate specific host register");
}
Log_DebugPrintf("Allocating host register %s as scratch", m_code_generator.GetHostRegName(reg));
return Value::FromScratch(this, reg, size);
}
u32 RegisterCache::PushCallerSavedRegisters() const
{
u32 count = 0;
for (u32 i = 0; i < HostReg_Count; i++)
{
if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
m_code_generator.EmitPushHostReg(static_cast<HostReg>(i));
count++;
}
}
return count;
}
u32 RegisterCache::PopCallerSavedRegisters() const
{
u32 count = 0;
u32 i = (HostReg_Count - 1);
do
{
if ((m_host_register_state[i] & (HostRegState::CallerSaved | HostRegState::InUse | HostRegState::Discarded)) ==
(HostRegState::CallerSaved | HostRegState::InUse))
{
m_code_generator.EmitPopHostReg(static_cast<HostReg>(i));
count++;
}
i--;
} while (i > 0);
return count;
}
u32 RegisterCache::PopCalleeSavedRegisters()
{
if (m_host_register_callee_saved_order_count == 0)
return 0;
u32 count = 0;
u32 i = m_host_register_callee_saved_order_count;
do
{
const HostReg reg = m_host_register_callee_saved_order[i - 1];
DebugAssert((m_host_register_state[reg] & (HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated)) ==
(HostRegState::CalleeSaved | HostRegState::CalleeSavedAllocated));
m_code_generator.EmitPopHostReg(reg);
m_host_register_state[reg] &= ~HostRegState::CalleeSavedAllocated;
count++;
i--;
} while (i > 0);
return count;
}
Value RegisterCache::ReadGuestRegister(Reg guest_reg, bool cache /* = true */, bool force_host_register /* = false */,
HostReg forced_host_reg /* = HostReg_Invalid */)
{
return ReadGuestRegister(m_guest_reg_cache[static_cast<u8>(guest_reg)], guest_reg, cache, force_host_register,
forced_host_reg);
}
Value RegisterCache::ReadGuestRegister(Value& cache_value, Reg guest_reg, bool cache, bool force_host_register,
HostReg forced_host_reg)
{
// register zero is always zero
if (guest_reg == Reg::zero)
return Value::FromConstantU32(0);
if (cache_value.IsValid())
{
if (cache_value.IsInHostRegister())
{
PushRegisterToOrder(guest_reg);
// if it's in the wrong register, return it as scratch
if (forced_host_reg == HostReg_Invalid || cache_value.GetHostRegister() == forced_host_reg)
return cache_value;
Value temp = AllocateScratch(RegSize_32, forced_host_reg);
m_code_generator.EmitCopyValue(forced_host_reg, cache_value);
return temp;
}
else if (force_host_register)
{
// if it's not in a register, it should be constant
DebugAssert(cache_value.IsConstant());
HostReg host_reg;
if (forced_host_reg == HostReg_Invalid)
{
host_reg = AllocateHostReg();
}
else
{
Assert(!IsHostRegInUse(forced_host_reg));
if (!AllocateHostReg(forced_host_reg))
Panic("Failed to allocate specific host register");
host_reg = forced_host_reg;
}
Log_DebugPrintf("Allocated host register %s for constant guest register %s (0x%" PRIX64 ")",
m_code_generator.GetHostRegName(host_reg), GetRegName(guest_reg), cache_value.constant_value);
m_code_generator.EmitCopyValue(host_reg, cache_value);
cache_value.AddHostReg(this, host_reg);
AppendRegisterToOrder(guest_reg);
// if we're forcing a host register, we're probably going to be changing the value,
// in which case the constant won't be correct anyway. so just drop it.
cache_value.ClearConstant();
return cache_value;
}
else
{
// constant
return cache_value;
}
}
HostReg host_reg;
if (forced_host_reg == HostReg_Invalid)
{
host_reg = AllocateHostReg();
}
else
{
Assert(!IsHostRegInUse(forced_host_reg));
if (!AllocateHostReg(forced_host_reg))
Panic("Failed to allocate specific host register");
host_reg = forced_host_reg;
}
m_code_generator.EmitLoadGuestRegister(host_reg, guest_reg);
Log_DebugPrintf("Loading guest register %s to host register %s%s", GetRegName(guest_reg),
m_code_generator.GetHostRegName(host_reg, RegSize_32), cache ? " (cached)" : "");
if (cache)
{
// Now in cache.
cache_value.SetHostReg(this, host_reg, RegSize_32);
AppendRegisterToOrder(guest_reg);
return cache_value;
}
else
{
// Skip caching, return the register as a value.
return Value::FromScratch(this, host_reg, RegSize_32);
}
}
Value RegisterCache::WriteGuestRegister(Reg guest_reg, Value&& value)
{
return WriteGuestRegister(m_guest_reg_cache[static_cast<u8>(guest_reg)], guest_reg, std::move(value));
}
Value RegisterCache::WriteGuestRegister(Value& cache_value, Reg guest_reg, Value&& value)
{
// ignore writes to register zero
if (guest_reg == Reg::zero)
return std::move(value);
DebugAssert(value.size == RegSize_32);
if (cache_value.IsInHostRegister() && value.IsInHostRegister() && cache_value.host_reg == value.host_reg)
{
// updating the register value.
Log_DebugPrintf("Updating guest register %s (in host register %s)", GetRegName(guest_reg),
m_code_generator.GetHostRegName(value.host_reg, RegSize_32));
cache_value = std::move(value);
cache_value.SetDirty();
return cache_value;
}
InvalidateGuestRegister(cache_value, guest_reg);
DebugAssert(!cache_value.IsValid());
if (value.IsConstant())
{
// No need to allocate a host register, and we can defer the store.
cache_value = value;
cache_value.SetDirty();
return cache_value;
}
AppendRegisterToOrder(guest_reg);
// If it's a temporary, we can bind that to the guest register.
if (value.IsScratch())
{
Log_DebugPrintf("Binding scratch register %s to guest register %s",
m_code_generator.GetHostRegName(value.host_reg, RegSize_32), GetRegName(guest_reg));
cache_value = std::move(value);
cache_value.flags &= ~ValueFlags::Scratch;
cache_value.SetDirty();
return Value::FromHostReg(this, cache_value.host_reg, RegSize_32);
}
// Allocate host register, and copy value to it.
HostReg host_reg = AllocateHostReg();
m_code_generator.EmitCopyValue(host_reg, value);
cache_value.SetHostReg(this, host_reg, RegSize_32);
cache_value.SetDirty();
Log_DebugPrintf("Copying non-scratch register %s to %s to guest register %s",
m_code_generator.GetHostRegName(value.host_reg, RegSize_32),
m_code_generator.GetHostRegName(host_reg, RegSize_32), GetRegName(guest_reg));
return Value::FromHostReg(this, cache_value.host_reg, RegSize_32);
}
void RegisterCache::FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty)
{
FlushGuestRegister(m_guest_reg_cache[static_cast<u8>(guest_reg)], guest_reg, invalidate, clear_dirty);
}
void RegisterCache::FlushGuestRegister(Value& cache_value, Reg guest_reg, bool invalidate, bool clear_dirty)
{
if (cache_value.IsDirty())
{
if (cache_value.IsInHostRegister())
{
Log_DebugPrintf("Flushing guest register %s from host register %s", GetRegName(guest_reg),
m_code_generator.GetHostRegName(cache_value.host_reg, RegSize_32));
}
else if (cache_value.IsConstant())
{
Log_DebugPrintf("Flushing guest register %s from constant 0x%" PRIX64, GetRegName(guest_reg),
cache_value.constant_value);
}
m_code_generator.EmitStoreGuestRegister(guest_reg, cache_value);
if (clear_dirty)
cache_value.ClearDirty();
}
if (invalidate)
InvalidateGuestRegister(cache_value, guest_reg);
}
void RegisterCache::InvalidateGuestRegister(Reg guest_reg)
{
InvalidateGuestRegister(m_guest_reg_cache[static_cast<u8>(guest_reg)], guest_reg);
}
void RegisterCache::InvalidateGuestRegister(Value& cache_value, Reg guest_reg)
{
if (!cache_value.IsValid())
return;
if (cache_value.IsInHostRegister())
{
FreeHostReg(cache_value.host_reg);
ClearRegisterFromOrder(guest_reg);
}
Log_DebugPrintf("Invalidating guest register %s", GetRegName(guest_reg));
cache_value.Clear();
}
void RegisterCache::FlushAllGuestRegisters(bool invalidate, bool clear_dirty)
{
for (u8 reg = 0; reg < static_cast<u8>(Reg::count); reg++)
FlushGuestRegister(static_cast<Reg>(reg), invalidate, clear_dirty);
}
bool RegisterCache::EvictOneGuestRegister()
{
if (m_guest_register_order_count == 0)
return false;
// evict the register used the longest time ago
Reg evict_reg = m_guest_register_order[m_guest_register_order_count - 1];
Log_ProfilePrintf("Evicting guest register %s", GetRegName(evict_reg));
FlushGuestRegister(evict_reg, true, true);
return HasFreeHostRegister();
}
void RegisterCache::ClearRegisterFromOrder(Reg reg)
{
for (u32 i = 0; i < m_guest_register_order_count; i++)
{
if (m_guest_register_order[i] == reg)
{
// move the registers after backwards into this spot
const u32 count_after = m_guest_register_order_count - i - 1;
if (count_after > 0)
std::memmove(&m_guest_register_order[i], &m_guest_register_order[i + 1], sizeof(Reg) * count_after);
else
m_guest_register_order[i] = Reg::count;
m_guest_register_order_count--;
return;
}
}
Panic("Clearing register from order not in order");
}
void RegisterCache::PushRegisterToOrder(Reg reg)
{
for (u32 i = 0; i < m_guest_register_order_count; i++)
{
if (m_guest_register_order[i] == reg)
{
// move the registers after backwards into this spot
const u32 count_before = i;
if (count_before > 0)
std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * count_before);
m_guest_register_order[0] = reg;
return;
}
}
Panic("Attempt to push register which is not ordered");
}
void RegisterCache::AppendRegisterToOrder(Reg reg)
{
DebugAssert(m_guest_register_order_count < HostReg_Count);
if (m_guest_register_order_count > 0)
std::memmove(&m_guest_register_order[1], &m_guest_register_order[0], sizeof(Reg) * m_guest_register_order_count);
m_guest_register_order[0] = reg;
m_guest_register_order_count++;
}
} // namespace CPU::Recompiler

View File

@ -0,0 +1,243 @@
#pragma once
#include "YBaseLib/Assert.h"
#include "cpu_recompiler_types.h"
#include "cpu_types.h"
#include <array>
#include <tuple>
#include <optional>
namespace CPU::Recompiler {
enum class HostRegState : u8
{
None = 0,
Usable = (1 << 1), // Can be allocated
CallerSaved = (1 << 2), // Register is caller-saved, and should be saved/restored after calling a function.
CalleeSaved = (1 << 3), // Register is callee-saved, and should be restored after leaving the block.
InUse = (1 << 4), // In-use, must be saved/restored across function call.
CalleeSavedAllocated = (1 << 5), // Register was callee-saved and allocated, so should be restored before returning.
Discarded = (1 << 6), // Register contents is not used, so do not preserve across function calls.
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(HostRegState);
enum class ValueFlags : u8
{
None = 0,
Valid = (1 << 0),
Constant = (1 << 1), // The value itself is constant, and not in a register.
InHostRegister = (1 << 2), // The value itself is located in a host register.
Scratch = (1 << 3), // The value is temporary, and will be released after the Value is destroyed.
Dirty = (1 << 4), // For register cache values, the value needs to be written back to the CPU struct.
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(ValueFlags);
struct Value
{
RegisterCache* regcache = nullptr;
u64 constant_value = 0;
HostReg host_reg = {};
RegSize size = RegSize_8;
ValueFlags flags = ValueFlags::None;
Value();
Value(RegisterCache* regcache_, u64 constant_, RegSize size_, ValueFlags flags_);
Value(RegisterCache* regcache_, HostReg reg_, RegSize size_, ValueFlags flags_);
Value(const Value& other);
Value(Value&& other);
~Value();
Value& operator=(const Value& other);
Value& operator=(Value&& other);
bool IsConstant() const { return (flags & ValueFlags::Constant) != ValueFlags::None; }
bool IsValid() const { return (flags & ValueFlags::Valid) != ValueFlags::None; }
bool IsInHostRegister() const { return (flags & ValueFlags::InHostRegister) != ValueFlags::None; }
bool IsScratch() const { return (flags & ValueFlags::Scratch) != ValueFlags::None; }
/// Returns the host register this value is bound to.
HostReg GetHostRegister() const
{
DebugAssert(IsInHostRegister());
return host_reg;
}
/// Returns true if this value is constant and has the specified value.
bool HasConstantValue(u64 cv) const
{
return (((flags & ValueFlags::Constant) != ValueFlags::None) && constant_value == cv);
}
/// Removes the contents of this value. Use with care, as scratch/temporaries are not released.
void Clear();
/// Releases the host register if needed, and clears the contents.
void ReleaseAndClear();
/// Flags the value is being discarded. Call Undiscard() to track again.
void Discard();
void Undiscard();
void AddHostReg(RegisterCache* regcache_, HostReg hr)
{
DebugAssert(IsValid());
regcache = regcache_;
host_reg = hr;
flags |= ValueFlags::InHostRegister;
}
void SetHostReg(RegisterCache* regcache_, HostReg hr, RegSize size_)
{
regcache = regcache_;
constant_value = 0;
host_reg = hr;
size = size_;
flags = ValueFlags::Valid | ValueFlags::InHostRegister;
}
void ClearConstant()
{
// By clearing the constant bit, we should already be in a host register.
DebugAssert(IsInHostRegister());
flags &= ~ValueFlags::Constant;
}
bool IsDirty() const { return (flags & ValueFlags::Dirty) != ValueFlags::None; }
void SetDirty() { flags |= ValueFlags::Dirty; }
void ClearDirty() { flags &= ~ValueFlags::Dirty; }
static Value FromHostReg(RegisterCache* regcache, HostReg reg, RegSize size)
{
return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister);
}
static Value FromScratch(RegisterCache* regcache, HostReg reg, RegSize size)
{
return Value(regcache, reg, size, ValueFlags::Valid | ValueFlags::InHostRegister | ValueFlags::Scratch);
}
static Value FromConstant(u64 cv, RegSize size)
{
return Value(nullptr, cv, size, ValueFlags::Valid | ValueFlags::Constant);
}
static Value FromConstantU8(u8 value) { return FromConstant(ZeroExtend64(value), RegSize_8); }
static Value FromConstantU16(u16 value) { return FromConstant(ZeroExtend64(value), RegSize_16); }
static Value FromConstantU32(u32 value) { return FromConstant(ZeroExtend64(value), RegSize_32); }
static Value FromConstantU64(u64 value) { return FromConstant(value, RegSize_64); }
private:
void Release();
};
class RegisterCache
{
public:
RegisterCache(CodeGenerator& code_generator);
~RegisterCache();
u32 GetActiveCalleeSavedRegisterCount() const { return m_host_register_callee_saved_order_count; }
//////////////////////////////////////////////////////////////////////////
// Register Allocation
//////////////////////////////////////////////////////////////////////////
void SetHostRegAllocationOrder(std::initializer_list<HostReg> regs);
void SetCallerSavedHostRegs(std::initializer_list<HostReg> regs);
void SetCalleeSavedHostRegs(std::initializer_list<HostReg> regs);
void SetCPUPtrHostReg(HostReg reg);
/// Returns true if the register is permitted to be used in the register cache.
bool IsUsableHostReg(HostReg reg) const;
bool IsHostRegInUse(HostReg reg) const;
bool HasFreeHostRegister() const;
u32 GetUsedHostRegisters() const;
u32 GetFreeHostRegisters() const;
/// Allocates a new host register. If there are no free registers, the guest register which was accessed the longest
/// time ago will be evicted.
HostReg AllocateHostReg(HostRegState state = HostRegState::InUse);
/// Allocates a specific host register. If this register is not free, returns false.
bool AllocateHostReg(HostReg reg, HostRegState state = HostRegState::InUse);
/// Flags the host register as discard-able. This means that the contents is no longer required, and will not be
/// pushed when saving caller-saved registers.
void DiscardHostReg(HostReg reg);
/// Clears the discard-able flag on a host register, so that the contents will be preserved across function calls.
void UndiscardHostReg(HostReg reg);
/// Frees a host register, making it usable in future allocations.
void FreeHostReg(HostReg reg);
/// Ensures a host register is free, removing any value cached.
void EnsureHostRegFree(HostReg reg);
/// Push/pop volatile host registers. Returns the number of registers pushed/popped.
u32 PushCallerSavedRegisters() const;
u32 PopCallerSavedRegisters() const;
/// Restore callee-saved registers. Call at the end of the function.
u32 PopCalleeSavedRegisters();
//////////////////////////////////////////////////////////////////////////
// Scratch Register Allocation
//////////////////////////////////////////////////////////////////////////
Value GetCPUPtr();
Value AllocateScratch(RegSize size, HostReg reg = HostReg_Invalid);
//////////////////////////////////////////////////////////////////////////
// Guest Register Caching
//////////////////////////////////////////////////////////////////////////
/// Returns true if the specified guest register is cached.
bool IsGuestRegisterInHostReg(Reg guest_reg) const
{
return m_guest_reg_cache[static_cast<u8>(guest_reg)].IsInHostRegister();
}
/// Returns the host register if the guest register is cached.
std::optional<HostReg> GetHostRegisterForGuestRegister(Reg guest_reg) const
{
if (!m_guest_reg_cache[static_cast<u8>(guest_reg)].IsInHostRegister())
return std::nullopt;
return m_guest_reg_cache[static_cast<u8>(guest_reg)].GetHostRegister();
}
Value ReadGuestRegister(Reg guest_reg, bool cache = true, bool force_host_register = false,
HostReg forced_host_reg = HostReg_Invalid);
/// Creates a copy of value, and stores it to guest_reg.
Value WriteGuestRegister(Reg guest_reg, Value&& value);
void FlushGuestRegister(Reg guest_reg, bool invalidate, bool clear_dirty);
void InvalidateGuestRegister(Reg guest_reg);
void FlushAllGuestRegisters(bool invalidate, bool clear_dirty);
bool EvictOneGuestRegister();
private:
Value ReadGuestRegister(Value& cache_value, Reg guest_reg, bool cache, bool force_host_register,
HostReg forced_host_reg);
Value WriteGuestRegister(Value& cache_value, Reg guest_reg, Value&& value);
void FlushGuestRegister(Value& cache_value, Reg guest_reg, bool invalidate, bool clear_dirty);
void InvalidateGuestRegister(Value& cache_value, Reg guest_reg);
void ClearRegisterFromOrder(Reg reg);
void PushRegisterToOrder(Reg reg);
void AppendRegisterToOrder(Reg reg);
CodeGenerator& m_code_generator;
HostReg m_cpu_ptr_host_register = {};
std::array<HostRegState, HostReg_Count> m_host_register_state{};
std::array<HostReg, HostReg_Count> m_host_register_allocation_order{};
u32 m_host_register_available_count = 0;
std::array<Value, static_cast<u8>(Reg::count)> m_guest_reg_cache{};
std::array<Reg, HostReg_Count> m_guest_register_order{};
u32 m_guest_register_order_count = 0;
std::array<HostReg, HostReg_Count> m_host_register_callee_saved_order{};
u32 m_host_register_callee_saved_order_count = 0;
};
} // namespace CPU::Recompiler

View File

@ -0,0 +1,43 @@
#include "cpu_recompiler_thunks.h"
namespace CPU::Recompiler {
// TODO: Port thunks to "ASM routines", i.e. code in the jit buffer.
bool Thunks::ReadMemoryByte(Core* cpu, u32 address, u8* value)
{
return cpu->ReadMemoryByte(address, value);
}
bool Thunks::ReadMemoryHalfWord(Core* cpu, u32 address, u16* value)
{
return cpu->ReadMemoryHalfWord(address, value);
}
bool Thunks::ReadMemoryWord(Core* cpu, u32 address, u32* value)
{
return cpu->ReadMemoryWord(address, value);
}
bool Thunks::WriteMemoryByte(Core* cpu, u32 address, u8 value)
{
return cpu->WriteMemoryByte(address, value);
}
bool Thunks::WriteMemoryHalfWord(Core* cpu, u32 address, u16 value)
{
return cpu->WriteMemoryHalfWord(address, value);
}
bool Thunks::WriteMemoryWord(Core* cpu, u32 address, u32 value)
{
return cpu->WriteMemoryWord(address, value);
}
bool Thunks::InterpretInstruction(Core* cpu)
{
cpu->ExecuteInstruction();
return cpu->m_exception_raised;
}
} // namespace CPU::Recompiler

View File

@ -0,0 +1,38 @@
#pragma once
#include "common/jit_code_buffer.h"
#include "cpu_core.h"
#include <array>
namespace CPU::Recompiler {
class Thunks
{
public:
//////////////////////////////////////////////////////////////////////////
// Trampolines for calling back from the JIT
// Needed because we can't cast member functions to void*...
// TODO: Abuse carry flag or something else for exception
//////////////////////////////////////////////////////////////////////////
static bool ReadMemoryByte(Core* cpu, u32 address, u8* value);
static bool ReadMemoryHalfWord(Core* cpu, u32 address, u16* value);
static bool ReadMemoryWord(Core* cpu, u32 address, u32* value);
static bool WriteMemoryByte(Core* cpu, u32 address, u8 value);
static bool WriteMemoryHalfWord(Core* cpu, u32 address, u16 value);
static bool WriteMemoryWord(Core* cpu, u32 address, u32 value);
static bool InterpretInstruction(Core* cpu);
};
class ASMFunctions
{
public:
bool (*read_memory_byte)(u32 address, u8* value);
bool (*read_memory_word)(u32 address, u16* value);
bool (*read_memory_dword)(u32 address, u32* value);
void (*write_memory_byte)(u32 address, u8 value);
void (*write_memory_word)(u32 address, u16 value);
void (*write_memory_dword)(u32 address, u32 value);
void Generate(JitCodeBuffer* code_buffer);
};
} // namespace CPU_X86::Recompiler

View File

@ -0,0 +1,53 @@
#pragma once
#include "cpu_types.h"
#if defined(Y_CPU_X64)
#define XBYAK_NO_OP_NAMES 1
#include "xbyak.h"
#endif
namespace CPU {
class Core;
class CodeCache;
namespace Recompiler {
class CodeGenerator;
class RegisterCache;
enum RegSize : u8
{
RegSize_8,
RegSize_16,
RegSize_32,
RegSize_64,
};
#if defined(Y_CPU_X64)
using HostReg = Xbyak::Operand::Code;
using CodeEmitter = Xbyak::CodeGenerator;
enum : u32
{
HostReg_Count = 16
};
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
constexpr RegSize HostPointerSize = RegSize_64;
// A reasonable "maximum" number of bytes per instruction.
constexpr u32 MAX_HOST_BYTES_PER_INSTRUCTION = 128;
#else
using HostReg = void;
using CodeEmitter = void;
enum : u32
{
HostReg_Count = 0
};
constexpr HostReg HostReg_Invalid = static_cast<HostReg>(HostReg_Count);
constexpr OperandSize HostPointerSize = OperandSize_64;
#endif
} // namespace Recompiler
} // namespace CPU

196
src/core/cpu_types.cpp Normal file
View File

@ -0,0 +1,196 @@
#include "cpu_types.h"
#include "YBaseLib/Assert.h"
#include <array>
namespace CPU {
static const std::array<const char*, 32> s_reg_names = {
{"$zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"}};
const char* GetRegName(Reg reg)
{
DebugAssert(reg < Reg::count);
return s_reg_names[static_cast<u8>(reg)];
}
bool IsBranchInstruction(const Instruction& instruction)
{
switch (instruction.op)
{
case InstructionOp::j:
case InstructionOp::jal:
case InstructionOp::b:
case InstructionOp::beq:
case InstructionOp::bgtz:
case InstructionOp::blez:
case InstructionOp::bne:
return true;
case InstructionOp::funct:
{
switch (instruction.r.funct)
{
case InstructionFunct::jr:
case InstructionFunct::jalr:
return true;
default:
return false;
}
}
default:
return false;
}
}
bool IsExitBlockInstruction(const Instruction& instruction)
{
switch (instruction.op)
{
case InstructionOp::funct:
{
switch (instruction.r.funct)
{
case InstructionFunct::syscall:
case InstructionFunct::break_:
return true;
default:
return false;
}
}
default:
return false;
}
}
bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode)
{
switch (instruction.op)
{
case InstructionOp::lui:
case InstructionOp::andi:
case InstructionOp::ori:
case InstructionOp::xori:
case InstructionOp::addiu:
case InstructionOp::slti:
case InstructionOp::sltiu:
return false;
case InstructionOp::cop0:
case InstructionOp::cop2:
case InstructionOp::lwc2:
case InstructionOp::swc2:
return in_user_mode;
// swc0/lwc0/cop1/cop3 are essentially no-ops
case InstructionOp::cop1:
case InstructionOp::cop3:
case InstructionOp::lwc0:
case InstructionOp::lwc1:
case InstructionOp::lwc3:
case InstructionOp::swc0:
case InstructionOp::swc1:
case InstructionOp::swc3:
return false;
case InstructionOp::addi:
case InstructionOp::lb:
case InstructionOp::lh:
case InstructionOp::lw:
case InstructionOp::lbu:
case InstructionOp::lhu:
case InstructionOp::lwl:
case InstructionOp::lwr:
case InstructionOp::sb:
case InstructionOp::sh:
case InstructionOp::sw:
case InstructionOp::swl:
case InstructionOp::swr:
return true;
// These can fault on the branch address. Perhaps we should move this to the next instruction?
case InstructionOp::j:
case InstructionOp::jal:
case InstructionOp::b:
case InstructionOp::beq:
case InstructionOp::bgtz:
case InstructionOp::blez:
case InstructionOp::bne:
return true;
case InstructionOp::funct:
{
switch (instruction.r.funct)
{
case InstructionFunct::sll:
case InstructionFunct::srl:
case InstructionFunct::sra:
case InstructionFunct::sllv:
case InstructionFunct::srlv:
case InstructionFunct::srav:
case InstructionFunct::and_:
case InstructionFunct::or_:
case InstructionFunct::xor_:
case InstructionFunct::nor:
case InstructionFunct::addu:
case InstructionFunct::subu:
case InstructionFunct::slt:
case InstructionFunct::sltu:
case InstructionFunct::mfhi:
case InstructionFunct::mthi:
case InstructionFunct::mflo:
case InstructionFunct::mtlo:
case InstructionFunct::mult:
case InstructionFunct::multu:
case InstructionFunct::div:
case InstructionFunct::divu:
return false;
case InstructionFunct::jr:
case InstructionFunct::jalr:
return true;
case InstructionFunct::add:
case InstructionFunct::sub:
case InstructionFunct::syscall:
case InstructionFunct::break_:
default:
return true;
}
}
default:
return true;
}
}
bool IsLoadDelayingInstruction(const Instruction& instruction)
{
switch (instruction.op)
{
case InstructionOp::lb:
case InstructionOp::lh:
case InstructionOp::lw:
case InstructionOp::lbu:
case InstructionOp::lhu:
return true;
case InstructionOp::lwl:
case InstructionOp::lwr:
return false;
default:
return false;
}
}
bool IsInvalidInstruction(const Instruction& instruction)
{
// TODO
return true;
}
} // namespace CPU

View File

@ -4,6 +4,18 @@
namespace CPU {
class Core;
// Memory address mask used for fetching as well as loadstores (removes cached/uncached/user/kernel bits).
enum : PhysicalMemoryAddress
{
PHYSICAL_MEMORY_ADDRESS_MASK = 0x1FFFFFFF
};
enum : u32
{
INSTRUCTION_SIZE = sizeof(u32)
};
enum class Reg : u8
{
zero,
@ -41,6 +53,8 @@ enum class Reg : u8
count
};
const char* GetRegName(Reg reg);
enum class InstructionOp : u8
{
funct = 0,
@ -192,6 +206,13 @@ union Instruction
}
};
// Instruction helpers.
bool IsBranchInstruction(const Instruction& instruction);
bool IsExitBlockInstruction(const Instruction& instruction);
bool CanInstructionTrap(const Instruction& instruction, bool in_user_mode);
bool IsLoadDelayingInstruction(const Instruction& instruction);
bool IsInvalidInstruction(const Instruction& instruction);
struct Registers
{
union
@ -349,4 +370,56 @@ struct Cop0Registers
} dcic;
};
union CodeBlockKey
{
u32 bits;
BitField<u32, bool, 0, 1> user_mode;
BitField<u32, u32, 2, 30> aligned_pc;
ALWAYS_INLINE u32 GetPC() const { return aligned_pc << 2; }
ALWAYS_INLINE void SetPC(u32 pc) { aligned_pc = pc >> 2; }
ALWAYS_INLINE CodeBlockKey& operator=(const CodeBlockKey& rhs)
{
bits = rhs.bits;
return *this;
}
ALWAYS_INLINE bool operator==(const CodeBlockKey& rhs) const { return bits == rhs.bits; }
ALWAYS_INLINE bool operator!=(const CodeBlockKey& rhs) const { return bits != rhs.bits; }
ALWAYS_INLINE bool operator<(const CodeBlockKey& rhs) const { return bits < rhs.bits; }
};
struct CodeBlockInstruction
{
Instruction instruction;
u32 pc;
bool is_branch : 1;
bool is_branch_delay_slot : 1;
bool is_load_delay_slot : 1;
bool is_last_instruction : 1;
bool can_trap : 1;
};
struct CodeBlock
{
CodeBlockKey key;
std::vector<CodeBlockInstruction> instructions;
using HostCodePointer = void(*)(Core*);
HostCodePointer host_code;
u32 host_code_size;
const u32 GetPC() const { return key.GetPC(); }
const u32 GetSizeInBytes() const { return static_cast<u32>(instructions.size()) * sizeof(Instruction); }
const u32 GetStartPageIndex() const { return (key.GetPC() / CPU_CODE_CACHE_PAGE_SIZE); }
const u32 GetEndPageIndex() const
{
return ((key.GetPC() + GetSizeInBytes() + (CPU_CODE_CACHE_PAGE_SIZE - 1)) / CPU_CODE_CACHE_PAGE_SIZE);
}
};
} // namespace CPU

View File

@ -5,6 +5,7 @@
#include "bus.h"
#include "cdrom.h"
#include "common/state_wrapper.h"
#include "cpu_code_cache.h"
#include "cpu_core.h"
#include "dma.h"
#include "gpu.h"
@ -23,6 +24,7 @@ Log_SetChannel(System);
System::System(HostInterface* host_interface) : m_host_interface(host_interface)
{
m_cpu = std::make_unique<CPU::Core>();
m_cpu_code_cache = std::make_unique<CPU::CodeCache>();
m_bus = std::make_unique<Bus>();
m_dma = std::make_unique<DMA>();
m_interrupt_controller = std::make_unique<InterruptController>();
@ -169,8 +171,9 @@ bool System::Boot(const char* filename)
void System::InitializeComponents()
{
m_cpu->Initialize(m_bus.get());
m_bus->Initialize(m_cpu.get(), m_dma.get(), m_interrupt_controller.get(), m_gpu.get(), m_cdrom.get(), m_pad.get(),
m_timers.get(), m_spu.get(), m_mdec.get());
m_cpu_code_cache->Initialize(m_cpu.get(), m_bus.get());
m_bus->Initialize(m_cpu.get(), m_cpu_code_cache.get(), m_dma.get(), m_interrupt_controller.get(), m_gpu.get(),
m_cdrom.get(), m_pad.get(), m_timers.get(), m_spu.get(), m_mdec.get());
m_dma->Initialize(this, m_bus.get(), m_interrupt_controller.get(), m_gpu.get(), m_cdrom.get(), m_spu.get(),
m_mdec.get());
@ -235,6 +238,9 @@ bool System::DoState(StateWrapper& sw)
if (!sw.DoMarker("CPU") || !m_cpu->DoState(sw))
return false;
if (sw.IsReading())
m_cpu_code_cache->Reset();
if (!sw.DoMarker("Bus") || !m_bus->DoState(sw))
return false;
@ -268,6 +274,7 @@ bool System::DoState(StateWrapper& sw)
void System::Reset()
{
m_cpu->Reset();
m_cpu_code_cache->Reset();
m_bus->Reset();
m_dma->Reset();
m_interrupt_controller->Reset();
@ -299,7 +306,10 @@ void System::RunFrame()
u32 current_frame_number = m_frame_number;
while (current_frame_number == m_frame_number)
{
m_cpu->Execute();
if (CPU::USE_CODE_CACHE)
m_cpu_code_cache->Execute();
else
m_cpu->Execute();
Synchronize();
}
}

View File

@ -10,6 +10,7 @@ class StateWrapper;
namespace CPU {
class Core;
class CodeCache;
}
class Bus;
@ -97,6 +98,7 @@ private:
HostInterface* m_host_interface;
std::unique_ptr<CPU::Core> m_cpu;
std::unique_ptr<CPU::CodeCache> m_cpu_code_cache;
std::unique_ptr<Bus> m_bus;
std::unique_ptr<DMA> m_dma;
std::unique_ptr<InterruptController> m_interrupt_controller;

View File

@ -38,3 +38,9 @@ enum class GPURenderer : u8
Software,
Count
};
enum : u32
{
CPU_CODE_CACHE_PAGE_SIZE = 1024,
CPU_CODE_CACHE_PAGE_COUNT = 0x200000 / CPU_CODE_CACHE_PAGE_SIZE
};