Cached Interpreter 2.0

It now supports variable-sized data payloads and memory range freeing. It's a little faster, too.
This commit is contained in:
mitaclaw 2024-04-23 09:28:49 -07:00
parent 5f4f974b48
commit f79520a906
11 changed files with 479 additions and 252 deletions

View File

@ -17,7 +17,7 @@ namespace Common
// having to prefix them with gen-> or something similar. // having to prefix them with gen-> or something similar.
// Example implementation: // Example implementation:
// class JIT : public CodeBlock<ARMXEmitter> {} // class JIT : public CodeBlock<ARMXEmitter> {}
template <class T> template <class T, bool executable = true>
class CodeBlock : public T class CodeBlock : public T
{ {
private: private:
@ -53,7 +53,10 @@ public:
{ {
region_size = size; region_size = size;
total_region_size = size; total_region_size = size;
region = static_cast<u8*>(Common::AllocateExecutableMemory(total_region_size)); if constexpr (executable)
region = static_cast<u8*>(Common::AllocateExecutableMemory(total_region_size));
else
region = static_cast<u8*>(Common::AllocateMemoryPages(total_region_size));
T::SetCodePtr(region, region + size); T::SetCodePtr(region, region + size);
} }

View File

@ -481,8 +481,10 @@ add_library(core
PowerPC/BreakPoints.h PowerPC/BreakPoints.h
PowerPC/CachedInterpreter/CachedInterpreter.cpp PowerPC/CachedInterpreter/CachedInterpreter.cpp
PowerPC/CachedInterpreter/CachedInterpreter.h PowerPC/CachedInterpreter/CachedInterpreter.h
PowerPC/CachedInterpreter/InterpreterBlockCache.cpp PowerPC/CachedInterpreter/CachedInterpreterBlockCache.cpp
PowerPC/CachedInterpreter/InterpreterBlockCache.h PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h
PowerPC/CachedInterpreter/CachedInterpreterEmitter.cpp
PowerPC/CachedInterpreter/CachedInterpreterEmitter.h
PowerPC/ConditionRegister.cpp PowerPC/ConditionRegister.cpp
PowerPC/ConditionRegister.h PowerPC/ConditionRegister.h
PowerPC/Expression.cpp PowerPC/Expression.cpp

View File

@ -6,6 +6,7 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/HLE/HLE.h" #include "Core/HLE/HLE.h"
#include "Core/HW/CPU.h" #include "Core/HW/CPU.h"
@ -16,65 +17,7 @@
#include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PowerPC.h"
#include "Core/System.h" #include "Core/System.h"
struct CachedInterpreter::Instruction CachedInterpreter::CachedInterpreter(Core::System& system) : JitBase(system), m_block_cache(*this)
{
using CommonCallback = void (*)(UGeckoInstruction);
using ConditionalCallback = bool (*)(u32);
using InterpreterCallback = void (*)(Interpreter&, UGeckoInstruction);
using CachedInterpreterCallback = void (*)(CachedInterpreter&, UGeckoInstruction);
using ConditionalCachedInterpreterCallback = bool (*)(CachedInterpreter&, u32);
Instruction() {}
Instruction(const CommonCallback c, UGeckoInstruction i)
: common_callback(c), data(i.hex), type(Type::Common)
{
}
Instruction(const ConditionalCallback c, u32 d)
: conditional_callback(c), data(d), type(Type::Conditional)
{
}
Instruction(const InterpreterCallback c, UGeckoInstruction i)
: interpreter_callback(c), data(i.hex), type(Type::Interpreter)
{
}
Instruction(const CachedInterpreterCallback c, UGeckoInstruction i)
: cached_interpreter_callback(c), data(i.hex), type(Type::CachedInterpreter)
{
}
Instruction(const ConditionalCachedInterpreterCallback c, u32 d)
: conditional_cached_interpreter_callback(c), data(d),
type(Type::ConditionalCachedInterpreter)
{
}
enum class Type
{
Abort,
Common,
Conditional,
Interpreter,
CachedInterpreter,
ConditionalCachedInterpreter,
};
union
{
const CommonCallback common_callback = nullptr;
const ConditionalCallback conditional_callback;
const InterpreterCallback interpreter_callback;
const CachedInterpreterCallback cached_interpreter_callback;
const ConditionalCachedInterpreterCallback conditional_cached_interpreter_callback;
};
u32 data = 0;
Type type = Type::Abort;
};
CachedInterpreter::CachedInterpreter(Core::System& system) : JitBase(system)
{ {
} }
@ -84,7 +27,8 @@ void CachedInterpreter::Init()
{ {
RefreshConfig(); RefreshConfig();
m_code.reserve(CODE_SIZE / sizeof(Instruction)); AllocCodeSpace(CODE_SIZE);
ResetFreeMemoryRanges();
jo.enableBlocklink = false; jo.enableBlocklink = false;
@ -100,11 +44,6 @@ void CachedInterpreter::Shutdown()
m_block_cache.Shutdown(); m_block_cache.Shutdown();
} }
u8* CachedInterpreter::GetCodePtr()
{
return reinterpret_cast<u8*>(m_code.data() + m_code.size());
}
void CachedInterpreter::ExecuteOneBlock() void CachedInterpreter::ExecuteOneBlock()
{ {
const u8* normal_entry = m_block_cache.Dispatch(); const u8* normal_entry = m_block_cache.Dispatch();
@ -114,50 +53,23 @@ void CachedInterpreter::ExecuteOneBlock()
return; return;
} }
const Instruction* code = reinterpret_cast<const Instruction*>(normal_entry); auto& ppc_state = m_ppc_state;
auto& interpreter = m_system.GetInterpreter(); while (true)
for (; code->type != Instruction::Type::Abort; ++code)
{ {
switch (code->type) const auto callback = *reinterpret_cast<const AnyCallback*>(normal_entry);
{ if (const auto distance = callback(ppc_state, normal_entry + sizeof(callback)))
case Instruction::Type::Common: normal_entry += distance;
code->common_callback(UGeckoInstruction(code->data)); else
break; break;
case Instruction::Type::Conditional:
if (code->conditional_callback(code->data))
return;
break;
case Instruction::Type::Interpreter:
code->interpreter_callback(interpreter, UGeckoInstruction(code->data));
break;
case Instruction::Type::CachedInterpreter:
code->cached_interpreter_callback(*this, UGeckoInstruction(code->data));
break;
case Instruction::Type::ConditionalCachedInterpreter:
if (code->conditional_cached_interpreter_callback(*this, code->data))
return;
break;
default:
ERROR_LOG_FMT(POWERPC, "Unknown CachedInterpreter Instruction: {}",
static_cast<int>(code->type));
break;
}
} }
} }
void CachedInterpreter::Run() void CachedInterpreter::Run()
{ {
auto& core_timing = m_system.GetCoreTiming(); auto& core_timing = m_system.GetCoreTiming();
auto& cpu = m_system.GetCPU();
const CPU::State* state_ptr = cpu.GetStatePtr(); const CPU::State* state_ptr = m_system.GetCPU().GetStatePtr();
while (cpu.GetState() == CPU::State::Running) while (*state_ptr == CPU::State::Running)
{ {
// Start new timing slice // Start new timing slice
// NOTE: Exceptions may change PC // NOTE: Exceptions may change PC
@ -177,93 +89,105 @@ void CachedInterpreter::SingleStep()
ExecuteOneBlock(); ExecuteOneBlock();
} }
void CachedInterpreter::EndBlock(CachedInterpreter& cached_interpreter, UGeckoInstruction data) s32 CachedInterpreter::EndBlock(PowerPC::PowerPCState& ppc_state, const EndBlockOperands& operands)
{ {
auto& ppc_state = cached_interpreter.m_ppc_state; const auto& [downcount, num_load_stores, num_fp_inst] = operands;
ppc_state.pc = ppc_state.npc; ppc_state.pc = ppc_state.npc;
ppc_state.downcount -= data.hex; ppc_state.downcount -= downcount;
PowerPC::UpdatePerformanceMonitor(data.hex, 0, 0, ppc_state); PowerPC::UpdatePerformanceMonitor(downcount, num_load_stores, num_fp_inst, ppc_state);
return 0;
} }
void CachedInterpreter::UpdateNumLoadStoreInstructions(CachedInterpreter& cached_interpreter, s32 CachedInterpreter::Interpret(PowerPC::PowerPCState& ppc_state,
UGeckoInstruction data) const InterpretOperands& operands)
{ {
PowerPC::UpdatePerformanceMonitor(0, data.hex, 0, cached_interpreter.m_ppc_state); const auto& [interpreter, func, current_pc, inst] = operands;
func(interpreter, inst);
return sizeof(AnyCallback) + sizeof(operands);
} }
void CachedInterpreter::UpdateNumFloatingPointInstructions(CachedInterpreter& cached_interpreter, s32 CachedInterpreter::HLEFunction(PowerPC::PowerPCState& ppc_state,
UGeckoInstruction data) const HLEFunctionOperands& operands)
{ {
PowerPC::UpdatePerformanceMonitor(0, 0, data.hex, cached_interpreter.m_ppc_state); const auto& [system, current_pc, hook_index] = operands;
HLE::Execute(Core::CPUThreadGuard{system}, current_pc, hook_index);
return sizeof(AnyCallback) + sizeof(operands);
} }
void CachedInterpreter::WritePC(CachedInterpreter& cached_interpreter, UGeckoInstruction data) s32 CachedInterpreter::WritePC(PowerPC::PowerPCState& ppc_state, const WritePCOperands& operands)
{ {
auto& ppc_state = cached_interpreter.m_ppc_state; const auto& [current_pc] = operands;
ppc_state.pc = data.hex; ppc_state.pc = current_pc;
ppc_state.npc = data.hex + 4; ppc_state.npc = current_pc + 4;
return sizeof(AnyCallback) + sizeof(operands);
} }
void CachedInterpreter::WriteBrokenBlockNPC(CachedInterpreter& cached_interpreter, s32 CachedInterpreter::WriteBrokenBlockNPC(PowerPC::PowerPCState& ppc_state,
UGeckoInstruction data) const WritePCOperands& operands)
{ {
cached_interpreter.m_ppc_state.npc = data.hex; const auto& [current_pc] = operands;
ppc_state.npc = current_pc;
return sizeof(AnyCallback) + sizeof(operands);
} }
bool CachedInterpreter::CheckFPU(CachedInterpreter& cached_interpreter, u32 data) s32 CachedInterpreter::CheckFPU(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands)
{ {
auto& ppc_state = cached_interpreter.m_ppc_state; const auto& [power_pc, downcount] = operands;
if (!ppc_state.msr.FP) if (!ppc_state.msr.FP)
{ {
ppc_state.Exceptions |= EXCEPTION_FPU_UNAVAILABLE; ppc_state.Exceptions |= EXCEPTION_FPU_UNAVAILABLE;
cached_interpreter.m_system.GetPowerPC().CheckExceptions(); power_pc.CheckExceptions();
ppc_state.downcount -= data; ppc_state.downcount -= downcount;
return true; return 0;
} }
return false; return sizeof(AnyCallback) + sizeof(operands);
} }
bool CachedInterpreter::CheckDSI(CachedInterpreter& cached_interpreter, u32 data) s32 CachedInterpreter::CheckDSI(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands)
{ {
auto& ppc_state = cached_interpreter.m_ppc_state; const auto& [power_pc, downcount] = operands;
if (ppc_state.Exceptions & EXCEPTION_DSI) if ((ppc_state.Exceptions & EXCEPTION_DSI) != 0)
{ {
cached_interpreter.m_system.GetPowerPC().CheckExceptions(); power_pc.CheckExceptions();
ppc_state.downcount -= data; ppc_state.downcount -= downcount;
return true; return 0;
} }
return false; return sizeof(AnyCallback) + sizeof(operands);
} }
bool CachedInterpreter::CheckProgramException(CachedInterpreter& cached_interpreter, u32 data) s32 CachedInterpreter::CheckProgramException(PowerPC::PowerPCState& ppc_state,
const CheckHaltOperands& operands)
{ {
auto& ppc_state = cached_interpreter.m_ppc_state; const auto& [power_pc, downcount] = operands;
if (ppc_state.Exceptions & EXCEPTION_PROGRAM) if ((ppc_state.Exceptions & EXCEPTION_PROGRAM) != 0)
{ {
cached_interpreter.m_system.GetPowerPC().CheckExceptions(); power_pc.CheckExceptions();
ppc_state.downcount -= data; ppc_state.downcount -= downcount;
return true; return 0;
} }
return false; return sizeof(AnyCallback) + sizeof(operands);
} }
bool CachedInterpreter::CheckBreakpoint(CachedInterpreter& cached_interpreter, u32 data) s32 CachedInterpreter::CheckBreakpoint(PowerPC::PowerPCState& ppc_state,
const CheckHaltOperands& operands)
{ {
if (cached_interpreter.m_system.GetPowerPC().CheckAndHandleBreakPoints()) const auto& [power_pc, downcount] = operands;
if (power_pc.CheckAndHandleBreakPoints())
{ {
cached_interpreter.m_ppc_state.downcount -= data; // Accessing PowerPCState through power_pc instead of ppc_state produces better assembly.
return true; power_pc.GetPPCState().downcount -= downcount;
return 0;
} }
return false; return sizeof(AnyCallback) + sizeof(operands);
} }
bool CachedInterpreter::CheckIdle(CachedInterpreter& cached_interpreter, u32 idle_pc) s32 CachedInterpreter::CheckIdle(PowerPC::PowerPCState& ppc_state,
const CheckIdleOperands& operands)
{ {
if (cached_interpreter.m_ppc_state.npc == idle_pc) const auto& [core_timing, idle_pc] = operands;
{ if (ppc_state.npc == idle_pc)
cached_interpreter.m_system.GetCoreTiming().Idle(); core_timing.Idle();
} return sizeof(AnyCallback) + sizeof(operands);
return false;
} }
bool CachedInterpreter::HandleFunctionHooking(u32 address) bool CachedInterpreter::HandleFunctionHooking(u32 address)
@ -274,27 +198,57 @@ bool CachedInterpreter::HandleFunctionHooking(u32 address)
if (!result) if (!result)
return false; return false;
m_code.emplace_back(WritePC, address); Write(WritePC, {address});
m_code.emplace_back(Interpreter::HLEFunction, result.hook_index); Write(HLEFunction, {m_system, address, result.hook_index});
if (result.type != HLE::HookType::Replace) if (result.type != HLE::HookType::Replace)
return false; return false;
m_code.emplace_back(EndBlock, js.downcountAmount); js.downcountAmount += js.st.numCycles;
m_code.emplace_back(); Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
return true; return true;
} }
void CachedInterpreter::Jit(u32 address) bool CachedInterpreter::SetEmitterStateToFreeCodeRegion()
{ {
if (m_code.size() >= CODE_SIZE / sizeof(Instruction) - 0x1000 || const auto free = m_free_ranges.by_size_begin();
SConfig::GetInstance().bJITNoBlockCache) if (free == m_free_ranges.by_size_end())
{
WARN_LOG_FMT(DYNA_REC, "Failed to find free memory region in code region.");
return false;
}
SetCodePtr(free.from(), free.to());
return true;
}
void CachedInterpreter::FreeRanges()
{
for (const auto& [from, to] : m_block_cache.GetRangesToFree())
m_free_ranges.insert(from, to);
m_block_cache.ClearRangesToFree();
}
void CachedInterpreter::ResetFreeMemoryRanges()
{
m_free_ranges.clear();
m_free_ranges.insert(region, region + region_size);
}
void CachedInterpreter::Jit(u32 em_address)
{
Jit(em_address, true);
}
void CachedInterpreter::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
{
if (IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
{ {
ClearCache(); ClearCache();
} }
FreeRanges();
const u32 nextPC = const u32 nextPC =
analyzer.Analyze(m_ppc_state.pc, &code_block, &m_code_buffer, m_code_buffer.size()); analyzer.Analyze(em_address, &code_block, &m_code_buffer, m_code_buffer.size());
if (code_block.m_memory_exception) if (code_block.m_memory_exception)
{ {
// Address of instruction could not be translated // Address of instruction could not be translated
@ -305,9 +259,46 @@ void CachedInterpreter::Jit(u32 address)
return; return;
} }
JitBlock* b = m_block_cache.AllocateBlock(m_ppc_state.pc); if (SetEmitterStateToFreeCodeRegion())
{
JitBlock* b = m_block_cache.AllocateBlock(em_address);
b->normalEntry = b->near_begin = GetWritableCodePtr();
js.blockStart = m_ppc_state.pc; if (DoJit(em_address, b, nextPC))
{
// Record what memory region was used so we know what to free if this block gets invalidated.
b->near_end = GetWritableCodePtr();
b->far_begin = b->far_end = nullptr;
b->codeSize = static_cast<u32>(b->near_end - b->normalEntry);
b->originalSize = code_block.m_num_instructions;
// Mark the memory region that this code block uses in the RangeSizeSet.
if (b->near_begin != b->near_end)
m_free_ranges.erase(b->near_begin, b->near_end);
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
return;
}
}
if (clear_cache_and_retry_on_failure)
{
WARN_LOG_FMT(DYNA_REC, "flushing code caches, please report if this happens a lot");
ClearCache();
Jit(em_address, false);
return;
}
PanicAlertFmtT("JIT failed to find code space after a cache clear. This should never happen. "
"Please report this incident on the bug tracker. Dolphin will now exit.");
std::exit(-1);
}
bool CachedInterpreter::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
{
js.blockStart = em_address;
js.firstFPInstructionFound = false; js.firstFPInstructionFound = false;
js.fifoBytesSinceCheck = 0; js.fifoBytesSinceCheck = 0;
js.downcountAmount = 0; js.downcountAmount = 0;
@ -315,85 +306,80 @@ void CachedInterpreter::Jit(u32 address)
js.numFloatingPointInst = 0; js.numFloatingPointInst = 0;
js.curBlock = b; js.curBlock = b;
b->normalEntry = b->near_begin = GetCodePtr(); auto& interpreter = m_system.GetInterpreter();
auto& power_pc = m_system.GetPowerPC();
auto& cpu = m_system.GetCPU();
auto& breakpoints = power_pc.GetBreakPoints();
for (u32 i = 0; i < code_block.m_num_instructions; i++) for (u32 i = 0; i < code_block.m_num_instructions; i++)
{ {
PPCAnalyst::CodeOp& op = m_code_buffer[i]; PPCAnalyst::CodeOp& op = m_code_buffer[i];
js.op = &op;
js.compilerPC = op.address;
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
js.downcountAmount += op.opinfo->num_cycles; js.downcountAmount += op.opinfo->num_cycles;
if (op.opinfo->flags & FL_LOADSTORE) if (op.opinfo->flags & FL_LOADSTORE)
++js.numLoadStoreInst; ++js.numLoadStoreInst;
if (op.opinfo->flags & FL_USE_FPU) if (op.opinfo->flags & FL_USE_FPU)
++js.numFloatingPointInst; ++js.numFloatingPointInst;
if (HandleFunctionHooking(op.address)) if (HandleFunctionHooking(js.compilerPC))
break; break;
if (!op.skip) if (!op.skip)
{ {
const bool breakpoint = const bool breakpoint = IsDebuggingEnabled() && !cpu.IsStepping() &&
m_enable_debugging && breakpoints.IsAddressBreakPoint(js.compilerPC);
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address); const bool check_fpu = (op.opinfo->flags & FL_USE_FPU) != 0 && !js.firstFPInstructionFound;
const bool check_fpu = (op.opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound;
const bool endblock = (op.opinfo->flags & FL_ENDBLOCK) != 0; const bool endblock = (op.opinfo->flags & FL_ENDBLOCK) != 0;
const bool memcheck = (op.opinfo->flags & FL_LOADSTORE) && jo.memcheck; const bool memcheck = (op.opinfo->flags & FL_LOADSTORE) != 0 && jo.memcheck;
const bool check_program_exception = !endblock && ShouldHandleFPExceptionForInstruction(&op); const bool check_program_exception = !endblock && ShouldHandleFPExceptionForInstruction(&op);
const bool idle_loop = op.branchIsIdleLoop; const bool idle_loop = op.branchIsIdleLoop;
if (breakpoint || check_fpu || endblock || memcheck || check_program_exception) if (breakpoint || check_fpu || endblock || memcheck || check_program_exception)
m_code.emplace_back(WritePC, op.address); Write(WritePC, {js.compilerPC});
if (breakpoint) if (breakpoint)
m_code.emplace_back(CheckBreakpoint, js.downcountAmount); Write(CheckBreakpoint, {power_pc, js.downcountAmount});
if (check_fpu) if (check_fpu)
{ {
m_code.emplace_back(CheckFPU, js.downcountAmount); Write(CheckFPU, {power_pc, js.downcountAmount});
js.firstFPInstructionFound = true; js.firstFPInstructionFound = true;
} }
m_code.emplace_back(Interpreter::GetInterpreterOp(op.inst), op.inst); Write(Interpret,
{interpreter, Interpreter::GetInterpreterOp(op.inst), js.compilerPC, op.inst});
if (memcheck) if (memcheck)
m_code.emplace_back(CheckDSI, js.downcountAmount); Write(CheckDSI, {power_pc, js.downcountAmount});
if (check_program_exception) if (check_program_exception)
m_code.emplace_back(CheckProgramException, js.downcountAmount); Write(CheckProgramException, {power_pc, js.downcountAmount});
if (idle_loop) if (idle_loop)
m_code.emplace_back(CheckIdle, js.blockStart); Write(CheckIdle, {m_system.GetCoreTiming(), js.blockStart});
if (endblock) if (endblock)
{ Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
m_code.emplace_back(EndBlock, js.downcountAmount);
if (js.numLoadStoreInst != 0)
m_code.emplace_back(UpdateNumLoadStoreInstructions, js.numLoadStoreInst);
if (js.numFloatingPointInst != 0)
m_code.emplace_back(UpdateNumFloatingPointInstructions, js.numFloatingPointInst);
}
} }
} }
if (code_block.m_broken) if (code_block.m_broken)
{ {
m_code.emplace_back(WriteBrokenBlockNPC, nextPC); Write(WriteBrokenBlockNPC, {nextPC});
m_code.emplace_back(EndBlock, js.downcountAmount); Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
if (js.numLoadStoreInst != 0)
m_code.emplace_back(UpdateNumLoadStoreInstructions, js.numLoadStoreInst);
if (js.numFloatingPointInst != 0)
m_code.emplace_back(UpdateNumFloatingPointInstructions, js.numFloatingPointInst);
} }
m_code.emplace_back();
b->near_end = GetCodePtr(); if (HasWriteFailed())
b->far_begin = nullptr; {
b->far_end = nullptr; WARN_LOG_FMT(DYNA_REC, "JIT ran out of space in code region during code generation.");
return false;
b->codeSize = static_cast<u32>(GetCodePtr() - b->normalEntry); }
b->originalSize = code_block.m_num_instructions; return true;
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
} }
void CachedInterpreter::ClearCache() void CachedInterpreter::ClearCache()
{ {
m_code.clear();
m_block_cache.Clear(); m_block_cache.Clear();
m_block_cache.ClearRangesToFree();
ClearCodeSpace();
ResetFreeMemoryRanges();
RefreshConfig(); RefreshConfig();
} }

View File

@ -3,14 +3,27 @@
#pragma once #pragma once
#include <vector> #include <cstddef>
#include <rangeset/rangesizeset.h>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Core/PowerPC/CachedInterpreter/InterpreterBlockCache.h" #include "Core/PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h"
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
#include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PPCAnalyst.h"
class CachedInterpreter : public JitBase namespace CoreTiming
{
class CoreTimingManager;
}
namespace CPU
{
enum class State;
}
class Interpreter;
class CachedInterpreter : public JitBase, public CachedInterpreterCodeBlock
{ {
public: public:
explicit CachedInterpreter(Core::System& system); explicit CachedInterpreter(Core::System& system);
@ -30,32 +43,85 @@ public:
void SingleStep() override; void SingleStep() override;
void Jit(u32 address) override; void Jit(u32 address) override;
void Jit(u32 address, bool clear_cache_and_retry_on_failure);
bool DoJit(u32 address, JitBlock* b, u32 nextPC);
JitBaseBlockCache* GetBlockCache() override { return &m_block_cache; } JitBaseBlockCache* GetBlockCache() override { return &m_block_cache; }
const char* GetName() const override { return "Cached Interpreter"; } const char* GetName() const override { return "Cached Interpreter"; }
const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; } const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; }
private: private:
struct Instruction;
u8* GetCodePtr();
void ExecuteOneBlock(); void ExecuteOneBlock();
bool HandleFunctionHooking(u32 address); bool HandleFunctionHooking(u32 address);
static void EndBlock(CachedInterpreter& cached_interpreter, UGeckoInstruction data); // Finds a free memory region and sets the code emitter to point at that region.
static void UpdateNumLoadStoreInstructions(CachedInterpreter& cached_interpreter, // Returns false if no free memory region can be found.
UGeckoInstruction data); bool SetEmitterStateToFreeCodeRegion();
static void UpdateNumFloatingPointInstructions(CachedInterpreter& cached_interpreter,
UGeckoInstruction data);
static void WritePC(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
static void WriteBrokenBlockNPC(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
static bool CheckFPU(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckDSI(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckProgramException(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckBreakpoint(CachedInterpreter& cached_interpreter, u32 data);
static bool CheckIdle(CachedInterpreter& cached_interpreter, u32 idle_pc);
BlockCache m_block_cache{*this}; void FreeRanges();
std::vector<Instruction> m_code; void ResetFreeMemoryRanges();
struct EndBlockOperands;
struct InterpretOperands;
struct HLEFunctionOperands;
struct WritePCOperands;
struct CheckHaltOperands;
struct CheckIdleOperands;
static s32 EndBlock(PowerPC::PowerPCState& ppc_state, const EndBlockOperands& operands);
static s32 Interpret(PowerPC::PowerPCState& ppc_state, const InterpretOperands& operands);
static s32 HLEFunction(PowerPC::PowerPCState& ppc_state, const HLEFunctionOperands& operands);
static s32 WritePC(PowerPC::PowerPCState& ppc_state, const WritePCOperands& operands);
static s32 WriteBrokenBlockNPC(PowerPC::PowerPCState& ppc_state, const WritePCOperands& operands);
static s32 CheckFPU(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands);
static s32 CheckDSI(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands);
static s32 CheckProgramException(PowerPC::PowerPCState& ppc_state,
const CheckHaltOperands& operands);
static s32 CheckBreakpoint(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands);
static s32 CheckIdle(PowerPC::PowerPCState& ppc_state, const CheckIdleOperands& operands);
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges;
CachedInterpreterBlockCache m_block_cache;
};
struct CachedInterpreter::EndBlockOperands
{
u32 downcount;
u32 num_load_stores;
u32 num_fp_inst;
u32 : 32;
};
struct CachedInterpreter::InterpretOperands
{
Interpreter& interpreter;
void (*func)(Interpreter&, UGeckoInstruction); // Interpreter::Instruction
u32 current_pc;
UGeckoInstruction inst;
};
struct CachedInterpreter::HLEFunctionOperands
{
Core::System& system;
u32 current_pc;
u32 hook_index;
};
struct CachedInterpreter::WritePCOperands
{
u32 current_pc;
u32 : 32;
};
struct CachedInterpreter::CheckHaltOperands
{
PowerPC::PowerPCManager& power_pc;
u32 downcount;
};
struct CachedInterpreter::CheckIdleOperands
{
CoreTiming::CoreTimingManager& core_timing;
u32 idle_pc;
}; };

View File

@ -0,0 +1,41 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h"
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
CachedInterpreterBlockCache::CachedInterpreterBlockCache(JitBase& jit) : JitBaseBlockCache{jit}
{
}
void CachedInterpreterBlockCache::Init()
{
JitBaseBlockCache::Init();
ClearRangesToFree();
}
void CachedInterpreterBlockCache::DestroyBlock(JitBlock& block)
{
JitBaseBlockCache::DestroyBlock(block);
if (block.near_begin != block.near_end)
m_ranges_to_free_on_next_codegen.emplace_back(block.near_begin, block.near_end);
}
void CachedInterpreterBlockCache::ClearRangesToFree()
{
m_ranges_to_free_on_next_codegen.clear();
}
void CachedInterpreterBlockCache::WriteLinkBlock(const JitBlock::LinkData& source,
const JitBlock* dest)
{
}
void CachedInterpreterBlockCache::WriteDestroyBlock(const JitBlock& block)
{
CachedInterpreterEmitter emitter(block.normalEntry, block.near_end);
emitter.Write(CachedInterpreterEmitter::PoisonCallback);
}

View File

@ -0,0 +1,35 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <utility>
#include <vector>
#include "Common/CommonTypes.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
class JitBase;
class CachedInterpreterBlockCache final : public JitBaseBlockCache
{
public:
explicit CachedInterpreterBlockCache(JitBase& jit);
void Init() override;
void DestroyBlock(JitBlock& block) override;
void ClearRangesToFree();
const std::vector<std::pair<u8*, u8*>>& GetRangesToFree() const
{
return m_ranges_to_free_on_next_codegen;
};
private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
void WriteDestroyBlock(const JitBlock& block) override;
std::vector<std::pair<u8*, u8*>> m_ranges_to_free_on_next_codegen;
};

View File

@ -0,0 +1,39 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
#include <algorithm>
#include <cstring>
#include "Common/Assert.h"
#include "Common/MsgHandler.h"
void CachedInterpreterEmitter::Write(AnyCallback callback, const void* operands, std::size_t size)
{
DEBUG_ASSERT(reinterpret_cast<std::uintptr_t>(m_code) % alignof(AnyCallback) == 0);
if (m_code + sizeof(callback) + size >= m_code_end)
{
m_write_failed = true;
return;
}
std::memcpy(m_code, &callback, sizeof(callback));
m_code += sizeof(callback);
std::memcpy(m_code, operands, size);
m_code += size;
}
s32 CachedInterpreterEmitter::PoisonCallback(PowerPC::PowerPCState& ppc_state, const void* operands)
{
ASSERT_MSG(DYNA_REC, false,
"The Cached Interpreter reached a poisoned callback. This should never happen!");
return 0;
}
void CachedInterpreterCodeBlock::PoisonMemory()
{
DEBUG_ASSERT(reinterpret_cast<std::uintptr_t>(region) % alignof(AnyCallback) == 0);
DEBUG_ASSERT(region_size % sizeof(AnyCallback) == 0);
std::fill(reinterpret_cast<AnyCallback*>(region),
reinterpret_cast<AnyCallback*>(region + region_size), AnyCallbackCast(PoisonCallback));
}

View File

@ -0,0 +1,84 @@
// Copyright 2024 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <type_traits>
#include "Common/CodeBlock.h"
#include "Common/CommonTypes.h"
namespace PowerPC
{
struct PowerPCState;
}
class CachedInterpreterEmitter
{
protected:
// The return value of most callbacks is the distance in memory to the next callback.
// If a callback returns 0, the block will be exited. The return value is signed to
// support block-linking. 32-bit return values seem to perform better than 64-bit ones.
template <class Operands>
using Callback = s32 (*)(PowerPC::PowerPCState& ppc_state, const Operands& operands);
using AnyCallback = s32 (*)(PowerPC::PowerPCState& ppc_state, const void* operands);
template <class Operands>
static AnyCallback AnyCallbackCast(Callback<Operands> callback)
{
return reinterpret_cast<AnyCallback>(callback);
}
static consteval AnyCallback AnyCallbackCast(AnyCallback callback) { return callback; }
public:
CachedInterpreterEmitter() = default;
explicit CachedInterpreterEmitter(u8* begin, u8* end) : m_code(begin), m_code_end(end) {}
template <class Operands>
void Write(Callback<Operands> callback, const Operands& operands)
{
// I would use std::is_trivial_v, but almost every operands struct uses
// references instead of pointers to make the callback functions nicer.
static_assert(
std::is_trivially_copyable_v<Operands> && std::is_trivially_destructible_v<Operands> &&
alignof(Operands) <= alignof(AnyCallback) && sizeof(Operands) % alignof(AnyCallback) == 0);
Write(AnyCallbackCast(callback), &operands, sizeof(Operands));
}
void Write(AnyCallback callback) { Write(callback, nullptr, 0); }
const u8* GetCodePtr() const { return m_code; }
u8* GetWritableCodePtr() { return m_code; }
const u8* GetCodeEnd() const { return m_code_end; };
u8* GetWritableCodeEnd() { return m_code_end; };
// Should be checked after a block of code has been generated to see if the code has been
// successfully written to memory. Do not call the generated code when this returns true!
bool HasWriteFailed() const { return m_write_failed; }
void SetCodePtr(u8* begin, u8* end)
{
m_code = begin;
m_code_end = end;
m_write_failed = false;
};
static s32 PoisonCallback(PowerPC::PowerPCState& ppc_state, const void* operands);
private:
void Write(AnyCallback callback, const void* operands, std::size_t size);
// Pointer to memory where code will be emitted to.
u8* m_code = nullptr;
// Pointer past the end of the memory region we're allowed to emit to.
// Writes that would reach this memory are refused and will set the m_write_failed flag instead.
u8* m_code_end = nullptr;
// Set to true when a write request happens that would write past m_code_end.
// Must be cleared with SetCodePtr() afterwards.
bool m_write_failed = false;
};
class CachedInterpreterCodeBlock : public Common::CodeBlock<CachedInterpreterEmitter, false>
{
private:
void PoisonMemory() override;
};

View File

@ -1,14 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/CachedInterpreter/InterpreterBlockCache.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
BlockCache::BlockCache(JitBase& jit) : JitBaseBlockCache{jit}
{
}
void BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest)
{
}

View File

@ -1,17 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "Core/PowerPC/JitCommon/JitCache.h"
class JitBase;
class BlockCache final : public JitBaseBlockCache
{
public:
explicit BlockCache(JitBase& jit);
private:
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
};

View File

@ -428,7 +428,8 @@
<ClInclude Include="Core\PatchEngine.h" /> <ClInclude Include="Core\PatchEngine.h" />
<ClInclude Include="Core\PowerPC\BreakPoints.h" /> <ClInclude Include="Core\PowerPC\BreakPoints.h" />
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.h" /> <ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.h" />
<ClInclude Include="Core\PowerPC\CachedInterpreter\InterpreterBlockCache.h" /> <ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreterBlockCache.h" />
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreterEmitter.h" />
<ClInclude Include="Core\PowerPC\ConditionRegister.h" /> <ClInclude Include="Core\PowerPC\ConditionRegister.h" />
<ClInclude Include="Core\PowerPC\CPUCoreBase.h" /> <ClInclude Include="Core\PowerPC\CPUCoreBase.h" />
<ClInclude Include="Core\PowerPC\Expression.h" /> <ClInclude Include="Core\PowerPC\Expression.h" />
@ -1089,7 +1090,8 @@
<ClCompile Include="Core\PatchEngine.cpp" /> <ClCompile Include="Core\PatchEngine.cpp" />
<ClCompile Include="Core\PowerPC\BreakPoints.cpp" /> <ClCompile Include="Core\PowerPC\BreakPoints.cpp" />
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.cpp" /> <ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.cpp" />
<ClCompile Include="Core\PowerPC\CachedInterpreter\InterpreterBlockCache.cpp" /> <ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreterBlockCache.cpp" />
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreterEmitter.cpp" />
<ClCompile Include="Core\PowerPC\ConditionRegister.cpp" /> <ClCompile Include="Core\PowerPC\ConditionRegister.cpp" />
<ClCompile Include="Core\PowerPC\Expression.cpp" /> <ClCompile Include="Core\PowerPC\Expression.cpp" />
<ClCompile Include="Core\PowerPC\GDBStub.cpp" /> <ClCompile Include="Core\PowerPC\GDBStub.cpp" />