Merge pull request #12723 from mitaclaw/cached-interpreter-2.0
Cached Interpreter 2.0
This commit is contained in:
commit
87b7009c12
|
@ -17,7 +17,7 @@ namespace Common
|
|||
// having to prefix them with gen-> or something similar.
|
||||
// Example implementation:
|
||||
// class JIT : public CodeBlock<ARMXEmitter> {}
|
||||
template <class T>
|
||||
template <class T, bool executable = true>
|
||||
class CodeBlock : public T
|
||||
{
|
||||
private:
|
||||
|
@ -53,7 +53,10 @@ public:
|
|||
{
|
||||
region_size = size;
|
||||
total_region_size = size;
|
||||
region = static_cast<u8*>(Common::AllocateExecutableMemory(total_region_size));
|
||||
if constexpr (executable)
|
||||
region = static_cast<u8*>(Common::AllocateExecutableMemory(total_region_size));
|
||||
else
|
||||
region = static_cast<u8*>(Common::AllocateMemoryPages(total_region_size));
|
||||
T::SetCodePtr(region, region + size);
|
||||
}
|
||||
|
||||
|
|
|
@ -481,8 +481,10 @@ add_library(core
|
|||
PowerPC/BreakPoints.h
|
||||
PowerPC/CachedInterpreter/CachedInterpreter.cpp
|
||||
PowerPC/CachedInterpreter/CachedInterpreter.h
|
||||
PowerPC/CachedInterpreter/InterpreterBlockCache.cpp
|
||||
PowerPC/CachedInterpreter/InterpreterBlockCache.h
|
||||
PowerPC/CachedInterpreter/CachedInterpreterBlockCache.cpp
|
||||
PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h
|
||||
PowerPC/CachedInterpreter/CachedInterpreterEmitter.cpp
|
||||
PowerPC/CachedInterpreter/CachedInterpreterEmitter.h
|
||||
PowerPC/ConditionRegister.cpp
|
||||
PowerPC/ConditionRegister.h
|
||||
PowerPC/Expression.cpp
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Logging/Log.h"
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HW/CPU.h"
|
||||
|
@ -16,65 +17,7 @@
|
|||
#include "Core/PowerPC/PowerPC.h"
|
||||
#include "Core/System.h"
|
||||
|
||||
struct CachedInterpreter::Instruction
|
||||
{
|
||||
using CommonCallback = void (*)(UGeckoInstruction);
|
||||
using ConditionalCallback = bool (*)(u32);
|
||||
using InterpreterCallback = void (*)(Interpreter&, UGeckoInstruction);
|
||||
using CachedInterpreterCallback = void (*)(CachedInterpreter&, UGeckoInstruction);
|
||||
using ConditionalCachedInterpreterCallback = bool (*)(CachedInterpreter&, u32);
|
||||
|
||||
Instruction() {}
|
||||
Instruction(const CommonCallback c, UGeckoInstruction i)
|
||||
: common_callback(c), data(i.hex), type(Type::Common)
|
||||
{
|
||||
}
|
||||
|
||||
Instruction(const ConditionalCallback c, u32 d)
|
||||
: conditional_callback(c), data(d), type(Type::Conditional)
|
||||
{
|
||||
}
|
||||
|
||||
Instruction(const InterpreterCallback c, UGeckoInstruction i)
|
||||
: interpreter_callback(c), data(i.hex), type(Type::Interpreter)
|
||||
{
|
||||
}
|
||||
|
||||
Instruction(const CachedInterpreterCallback c, UGeckoInstruction i)
|
||||
: cached_interpreter_callback(c), data(i.hex), type(Type::CachedInterpreter)
|
||||
{
|
||||
}
|
||||
|
||||
Instruction(const ConditionalCachedInterpreterCallback c, u32 d)
|
||||
: conditional_cached_interpreter_callback(c), data(d),
|
||||
type(Type::ConditionalCachedInterpreter)
|
||||
{
|
||||
}
|
||||
|
||||
enum class Type
|
||||
{
|
||||
Abort,
|
||||
Common,
|
||||
Conditional,
|
||||
Interpreter,
|
||||
CachedInterpreter,
|
||||
ConditionalCachedInterpreter,
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
const CommonCallback common_callback = nullptr;
|
||||
const ConditionalCallback conditional_callback;
|
||||
const InterpreterCallback interpreter_callback;
|
||||
const CachedInterpreterCallback cached_interpreter_callback;
|
||||
const ConditionalCachedInterpreterCallback conditional_cached_interpreter_callback;
|
||||
};
|
||||
|
||||
u32 data = 0;
|
||||
Type type = Type::Abort;
|
||||
};
|
||||
|
||||
CachedInterpreter::CachedInterpreter(Core::System& system) : JitBase(system)
|
||||
CachedInterpreter::CachedInterpreter(Core::System& system) : JitBase(system), m_block_cache(*this)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -84,7 +27,8 @@ void CachedInterpreter::Init()
|
|||
{
|
||||
RefreshConfig();
|
||||
|
||||
m_code.reserve(CODE_SIZE / sizeof(Instruction));
|
||||
AllocCodeSpace(CODE_SIZE);
|
||||
ResetFreeMemoryRanges();
|
||||
|
||||
jo.enableBlocklink = false;
|
||||
|
||||
|
@ -100,11 +44,6 @@ void CachedInterpreter::Shutdown()
|
|||
m_block_cache.Shutdown();
|
||||
}
|
||||
|
||||
u8* CachedInterpreter::GetCodePtr()
|
||||
{
|
||||
return reinterpret_cast<u8*>(m_code.data() + m_code.size());
|
||||
}
|
||||
|
||||
void CachedInterpreter::ExecuteOneBlock()
|
||||
{
|
||||
const u8* normal_entry = m_block_cache.Dispatch();
|
||||
|
@ -114,50 +53,23 @@ void CachedInterpreter::ExecuteOneBlock()
|
|||
return;
|
||||
}
|
||||
|
||||
const Instruction* code = reinterpret_cast<const Instruction*>(normal_entry);
|
||||
auto& interpreter = m_system.GetInterpreter();
|
||||
|
||||
for (; code->type != Instruction::Type::Abort; ++code)
|
||||
auto& ppc_state = m_ppc_state;
|
||||
while (true)
|
||||
{
|
||||
switch (code->type)
|
||||
{
|
||||
case Instruction::Type::Common:
|
||||
code->common_callback(UGeckoInstruction(code->data));
|
||||
const auto callback = *reinterpret_cast<const AnyCallback*>(normal_entry);
|
||||
if (const auto distance = callback(ppc_state, normal_entry + sizeof(callback)))
|
||||
normal_entry += distance;
|
||||
else
|
||||
break;
|
||||
|
||||
case Instruction::Type::Conditional:
|
||||
if (code->conditional_callback(code->data))
|
||||
return;
|
||||
break;
|
||||
|
||||
case Instruction::Type::Interpreter:
|
||||
code->interpreter_callback(interpreter, UGeckoInstruction(code->data));
|
||||
break;
|
||||
|
||||
case Instruction::Type::CachedInterpreter:
|
||||
code->cached_interpreter_callback(*this, UGeckoInstruction(code->data));
|
||||
break;
|
||||
|
||||
case Instruction::Type::ConditionalCachedInterpreter:
|
||||
if (code->conditional_cached_interpreter_callback(*this, code->data))
|
||||
return;
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR_LOG_FMT(POWERPC, "Unknown CachedInterpreter Instruction: {}",
|
||||
static_cast<int>(code->type));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CachedInterpreter::Run()
|
||||
{
|
||||
auto& core_timing = m_system.GetCoreTiming();
|
||||
auto& cpu = m_system.GetCPU();
|
||||
|
||||
const CPU::State* state_ptr = cpu.GetStatePtr();
|
||||
while (cpu.GetState() == CPU::State::Running)
|
||||
const CPU::State* state_ptr = m_system.GetCPU().GetStatePtr();
|
||||
while (*state_ptr == CPU::State::Running)
|
||||
{
|
||||
// Start new timing slice
|
||||
// NOTE: Exceptions may change PC
|
||||
|
@ -177,93 +89,101 @@ void CachedInterpreter::SingleStep()
|
|||
ExecuteOneBlock();
|
||||
}
|
||||
|
||||
void CachedInterpreter::EndBlock(CachedInterpreter& cached_interpreter, UGeckoInstruction data)
|
||||
s32 CachedInterpreter::EndBlock(PowerPC::PowerPCState& ppc_state, const EndBlockOperands& operands)
|
||||
{
|
||||
auto& ppc_state = cached_interpreter.m_ppc_state;
|
||||
const auto& [downcount, num_load_stores, num_fp_inst] = operands;
|
||||
ppc_state.pc = ppc_state.npc;
|
||||
ppc_state.downcount -= data.hex;
|
||||
PowerPC::UpdatePerformanceMonitor(data.hex, 0, 0, ppc_state);
|
||||
ppc_state.downcount -= downcount;
|
||||
PowerPC::UpdatePerformanceMonitor(downcount, num_load_stores, num_fp_inst, ppc_state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CachedInterpreter::UpdateNumLoadStoreInstructions(CachedInterpreter& cached_interpreter,
|
||||
UGeckoInstruction data)
|
||||
template <bool write_pc>
|
||||
s32 CachedInterpreter::Interpret(PowerPC::PowerPCState& ppc_state,
|
||||
const InterpretOperands& operands)
|
||||
{
|
||||
PowerPC::UpdatePerformanceMonitor(0, data.hex, 0, cached_interpreter.m_ppc_state);
|
||||
if constexpr (write_pc)
|
||||
{
|
||||
ppc_state.pc = operands.current_pc;
|
||||
ppc_state.npc = operands.current_pc + 4;
|
||||
}
|
||||
operands.func(operands.interpreter, operands.inst);
|
||||
return sizeof(AnyCallback) + sizeof(operands);
|
||||
}
|
||||
|
||||
void CachedInterpreter::UpdateNumFloatingPointInstructions(CachedInterpreter& cached_interpreter,
|
||||
UGeckoInstruction data)
|
||||
template <bool write_pc>
|
||||
s32 CachedInterpreter::InterpretAndCheckExceptions(
|
||||
PowerPC::PowerPCState& ppc_state, const InterpretAndCheckExceptionsOperands& operands)
|
||||
{
|
||||
PowerPC::UpdatePerformanceMonitor(0, 0, data.hex, cached_interpreter.m_ppc_state);
|
||||
if constexpr (write_pc)
|
||||
{
|
||||
ppc_state.pc = operands.current_pc;
|
||||
ppc_state.npc = operands.current_pc + 4;
|
||||
}
|
||||
operands.func(operands.interpreter, operands.inst);
|
||||
|
||||
if ((ppc_state.Exceptions & (EXCEPTION_DSI | EXCEPTION_PROGRAM)) != 0)
|
||||
{
|
||||
ppc_state.pc = operands.current_pc;
|
||||
ppc_state.downcount -= operands.downcount;
|
||||
operands.power_pc.CheckExceptions();
|
||||
return 0;
|
||||
}
|
||||
return sizeof(AnyCallback) + sizeof(operands);
|
||||
}
|
||||
|
||||
void CachedInterpreter::WritePC(CachedInterpreter& cached_interpreter, UGeckoInstruction data)
|
||||
s32 CachedInterpreter::HLEFunction(PowerPC::PowerPCState& ppc_state,
|
||||
const HLEFunctionOperands& operands)
|
||||
{
|
||||
auto& ppc_state = cached_interpreter.m_ppc_state;
|
||||
ppc_state.pc = data.hex;
|
||||
ppc_state.npc = data.hex + 4;
|
||||
const auto& [system, current_pc, hook_index] = operands;
|
||||
ppc_state.pc = current_pc;
|
||||
HLE::Execute(Core::CPUThreadGuard{system}, current_pc, hook_index);
|
||||
return sizeof(AnyCallback) + sizeof(operands);
|
||||
}
|
||||
|
||||
void CachedInterpreter::WriteBrokenBlockNPC(CachedInterpreter& cached_interpreter,
|
||||
UGeckoInstruction data)
|
||||
s32 CachedInterpreter::WriteBrokenBlockNPC(PowerPC::PowerPCState& ppc_state,
|
||||
const WriteBrokenBlockNPCOperands& operands)
|
||||
{
|
||||
cached_interpreter.m_ppc_state.npc = data.hex;
|
||||
const auto& [current_pc] = operands;
|
||||
ppc_state.npc = current_pc;
|
||||
return sizeof(AnyCallback) + sizeof(operands);
|
||||
}
|
||||
|
||||
bool CachedInterpreter::CheckFPU(CachedInterpreter& cached_interpreter, u32 data)
|
||||
s32 CachedInterpreter::CheckFPU(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands)
|
||||
{
|
||||
auto& ppc_state = cached_interpreter.m_ppc_state;
|
||||
const auto& [power_pc, current_pc, downcount] = operands;
|
||||
if (!ppc_state.msr.FP)
|
||||
{
|
||||
ppc_state.pc = current_pc;
|
||||
ppc_state.downcount -= downcount;
|
||||
ppc_state.Exceptions |= EXCEPTION_FPU_UNAVAILABLE;
|
||||
cached_interpreter.m_system.GetPowerPC().CheckExceptions();
|
||||
ppc_state.downcount -= data;
|
||||
return true;
|
||||
power_pc.CheckExceptions();
|
||||
return 0;
|
||||
}
|
||||
return false;
|
||||
return sizeof(AnyCallback) + sizeof(operands);
|
||||
}
|
||||
|
||||
bool CachedInterpreter::CheckDSI(CachedInterpreter& cached_interpreter, u32 data)
|
||||
s32 CachedInterpreter::CheckBreakpoint(PowerPC::PowerPCState& ppc_state,
|
||||
const CheckHaltOperands& operands)
|
||||
{
|
||||
auto& ppc_state = cached_interpreter.m_ppc_state;
|
||||
if (ppc_state.Exceptions & EXCEPTION_DSI)
|
||||
const auto& [power_pc, current_pc, downcount] = operands;
|
||||
ppc_state.pc = current_pc;
|
||||
if (power_pc.CheckAndHandleBreakPoints())
|
||||
{
|
||||
cached_interpreter.m_system.GetPowerPC().CheckExceptions();
|
||||
ppc_state.downcount -= data;
|
||||
return true;
|
||||
// Accessing PowerPCState through power_pc instead of ppc_state produces better assembly.
|
||||
power_pc.GetPPCState().downcount -= downcount;
|
||||
return 0;
|
||||
}
|
||||
return false;
|
||||
return sizeof(AnyCallback) + sizeof(operands);
|
||||
}
|
||||
|
||||
bool CachedInterpreter::CheckProgramException(CachedInterpreter& cached_interpreter, u32 data)
|
||||
s32 CachedInterpreter::CheckIdle(PowerPC::PowerPCState& ppc_state,
|
||||
const CheckIdleOperands& operands)
|
||||
{
|
||||
auto& ppc_state = cached_interpreter.m_ppc_state;
|
||||
if (ppc_state.Exceptions & EXCEPTION_PROGRAM)
|
||||
{
|
||||
cached_interpreter.m_system.GetPowerPC().CheckExceptions();
|
||||
ppc_state.downcount -= data;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CachedInterpreter::CheckBreakpoint(CachedInterpreter& cached_interpreter, u32 data)
|
||||
{
|
||||
if (cached_interpreter.m_system.GetPowerPC().CheckAndHandleBreakPoints())
|
||||
{
|
||||
cached_interpreter.m_ppc_state.downcount -= data;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CachedInterpreter::CheckIdle(CachedInterpreter& cached_interpreter, u32 idle_pc)
|
||||
{
|
||||
if (cached_interpreter.m_ppc_state.npc == idle_pc)
|
||||
{
|
||||
cached_interpreter.m_system.GetCoreTiming().Idle();
|
||||
}
|
||||
return false;
|
||||
const auto& [core_timing, idle_pc] = operands;
|
||||
if (ppc_state.npc == idle_pc)
|
||||
core_timing.Idle();
|
||||
return sizeof(AnyCallback) + sizeof(operands);
|
||||
}
|
||||
|
||||
bool CachedInterpreter::HandleFunctionHooking(u32 address)
|
||||
|
@ -274,27 +194,56 @@ bool CachedInterpreter::HandleFunctionHooking(u32 address)
|
|||
if (!result)
|
||||
return false;
|
||||
|
||||
m_code.emplace_back(WritePC, address);
|
||||
m_code.emplace_back(Interpreter::HLEFunction, result.hook_index);
|
||||
Write(HLEFunction, {m_system, address, result.hook_index});
|
||||
|
||||
if (result.type != HLE::HookType::Replace)
|
||||
return false;
|
||||
|
||||
m_code.emplace_back(EndBlock, js.downcountAmount);
|
||||
m_code.emplace_back();
|
||||
js.downcountAmount += js.st.numCycles;
|
||||
Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
|
||||
return true;
|
||||
}
|
||||
|
||||
void CachedInterpreter::Jit(u32 address)
|
||||
bool CachedInterpreter::SetEmitterStateToFreeCodeRegion()
|
||||
{
|
||||
if (m_code.size() >= CODE_SIZE / sizeof(Instruction) - 0x1000 ||
|
||||
SConfig::GetInstance().bJITNoBlockCache)
|
||||
const auto free = m_free_ranges.by_size_begin();
|
||||
if (free == m_free_ranges.by_size_end())
|
||||
{
|
||||
WARN_LOG_FMT(DYNA_REC, "Failed to find free memory region in code region.");
|
||||
return false;
|
||||
}
|
||||
SetCodePtr(free.from(), free.to());
|
||||
return true;
|
||||
}
|
||||
|
||||
void CachedInterpreter::FreeRanges()
|
||||
{
|
||||
for (const auto& [from, to] : m_block_cache.GetRangesToFree())
|
||||
m_free_ranges.insert(from, to);
|
||||
m_block_cache.ClearRangesToFree();
|
||||
}
|
||||
|
||||
void CachedInterpreter::ResetFreeMemoryRanges()
|
||||
{
|
||||
m_free_ranges.clear();
|
||||
m_free_ranges.insert(region, region + region_size);
|
||||
}
|
||||
|
||||
void CachedInterpreter::Jit(u32 em_address)
|
||||
{
|
||||
Jit(em_address, true);
|
||||
}
|
||||
|
||||
void CachedInterpreter::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
|
||||
{
|
||||
if (IsAlmostFull() || SConfig::GetInstance().bJITNoBlockCache)
|
||||
{
|
||||
ClearCache();
|
||||
}
|
||||
FreeRanges();
|
||||
|
||||
const u32 nextPC =
|
||||
analyzer.Analyze(m_ppc_state.pc, &code_block, &m_code_buffer, m_code_buffer.size());
|
||||
analyzer.Analyze(em_address, &code_block, &m_code_buffer, m_code_buffer.size());
|
||||
if (code_block.m_memory_exception)
|
||||
{
|
||||
// Address of instruction could not be translated
|
||||
|
@ -305,9 +254,46 @@ void CachedInterpreter::Jit(u32 address)
|
|||
return;
|
||||
}
|
||||
|
||||
JitBlock* b = m_block_cache.AllocateBlock(m_ppc_state.pc);
|
||||
if (SetEmitterStateToFreeCodeRegion())
|
||||
{
|
||||
JitBlock* b = m_block_cache.AllocateBlock(em_address);
|
||||
b->normalEntry = b->near_begin = GetWritableCodePtr();
|
||||
|
||||
js.blockStart = m_ppc_state.pc;
|
||||
if (DoJit(em_address, b, nextPC))
|
||||
{
|
||||
// Record what memory region was used so we know what to free if this block gets invalidated.
|
||||
b->near_end = GetWritableCodePtr();
|
||||
b->far_begin = b->far_end = nullptr;
|
||||
|
||||
b->codeSize = static_cast<u32>(b->near_end - b->normalEntry);
|
||||
b->originalSize = code_block.m_num_instructions;
|
||||
|
||||
// Mark the memory region that this code block uses in the RangeSizeSet.
|
||||
if (b->near_begin != b->near_end)
|
||||
m_free_ranges.erase(b->near_begin, b->near_end);
|
||||
|
||||
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (clear_cache_and_retry_on_failure)
|
||||
{
|
||||
WARN_LOG_FMT(DYNA_REC, "flushing code caches, please report if this happens a lot");
|
||||
ClearCache();
|
||||
Jit(em_address, false);
|
||||
return;
|
||||
}
|
||||
|
||||
PanicAlertFmtT("JIT failed to find code space after a cache clear. This should never happen. "
|
||||
"Please report this incident on the bug tracker. Dolphin will now exit.");
|
||||
std::exit(-1);
|
||||
}
|
||||
|
||||
bool CachedInterpreter::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||
{
|
||||
js.blockStart = em_address;
|
||||
js.firstFPInstructionFound = false;
|
||||
js.fifoBytesSinceCheck = 0;
|
||||
js.downcountAmount = 0;
|
||||
|
@ -315,85 +301,84 @@ void CachedInterpreter::Jit(u32 address)
|
|||
js.numFloatingPointInst = 0;
|
||||
js.curBlock = b;
|
||||
|
||||
b->normalEntry = b->near_begin = GetCodePtr();
|
||||
auto& interpreter = m_system.GetInterpreter();
|
||||
auto& power_pc = m_system.GetPowerPC();
|
||||
auto& cpu = m_system.GetCPU();
|
||||
auto& breakpoints = power_pc.GetBreakPoints();
|
||||
|
||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||
{
|
||||
PPCAnalyst::CodeOp& op = m_code_buffer[i];
|
||||
js.op = &op;
|
||||
|
||||
js.compilerPC = op.address;
|
||||
js.instructionsLeft = (code_block.m_num_instructions - 1) - i;
|
||||
js.downcountAmount += op.opinfo->num_cycles;
|
||||
if (op.opinfo->flags & FL_LOADSTORE)
|
||||
++js.numLoadStoreInst;
|
||||
if (op.opinfo->flags & FL_USE_FPU)
|
||||
++js.numFloatingPointInst;
|
||||
|
||||
if (HandleFunctionHooking(op.address))
|
||||
if (HandleFunctionHooking(js.compilerPC))
|
||||
break;
|
||||
|
||||
if (!op.skip)
|
||||
{
|
||||
const bool breakpoint =
|
||||
m_enable_debugging &&
|
||||
m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address);
|
||||
const bool check_fpu = (op.opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound;
|
||||
const bool endblock = (op.opinfo->flags & FL_ENDBLOCK) != 0;
|
||||
const bool memcheck = (op.opinfo->flags & FL_LOADSTORE) && jo.memcheck;
|
||||
const bool check_program_exception = !endblock && ShouldHandleFPExceptionForInstruction(&op);
|
||||
const bool idle_loop = op.branchIsIdleLoop;
|
||||
|
||||
if (breakpoint || check_fpu || endblock || memcheck || check_program_exception)
|
||||
m_code.emplace_back(WritePC, op.address);
|
||||
|
||||
if (breakpoint)
|
||||
m_code.emplace_back(CheckBreakpoint, js.downcountAmount);
|
||||
|
||||
if (check_fpu)
|
||||
if (IsDebuggingEnabled() && !cpu.IsStepping() &&
|
||||
breakpoints.IsAddressBreakPoint(js.compilerPC))
|
||||
{
|
||||
m_code.emplace_back(CheckFPU, js.downcountAmount);
|
||||
Write(CheckBreakpoint, {power_pc, js.compilerPC, js.downcountAmount});
|
||||
}
|
||||
if (!js.firstFPInstructionFound && (op.opinfo->flags & FL_USE_FPU) != 0)
|
||||
{
|
||||
Write(CheckFPU, {power_pc, js.compilerPC, js.downcountAmount});
|
||||
js.firstFPInstructionFound = true;
|
||||
}
|
||||
|
||||
m_code.emplace_back(Interpreter::GetInterpreterOp(op.inst), op.inst);
|
||||
if (memcheck)
|
||||
m_code.emplace_back(CheckDSI, js.downcountAmount);
|
||||
if (check_program_exception)
|
||||
m_code.emplace_back(CheckProgramException, js.downcountAmount);
|
||||
if (idle_loop)
|
||||
m_code.emplace_back(CheckIdle, js.blockStart);
|
||||
if (endblock)
|
||||
// Instruction may cause a DSI Exception or Program Exception.
|
||||
if ((jo.memcheck && (op.opinfo->flags & FL_LOADSTORE) != 0) ||
|
||||
(!op.canEndBlock && ShouldHandleFPExceptionForInstruction(&op)))
|
||||
{
|
||||
m_code.emplace_back(EndBlock, js.downcountAmount);
|
||||
if (js.numLoadStoreInst != 0)
|
||||
m_code.emplace_back(UpdateNumLoadStoreInstructions, js.numLoadStoreInst);
|
||||
if (js.numFloatingPointInst != 0)
|
||||
m_code.emplace_back(UpdateNumFloatingPointInstructions, js.numFloatingPointInst);
|
||||
const InterpretAndCheckExceptionsOperands operands = {
|
||||
{interpreter, Interpreter::GetInterpreterOp(op.inst), js.compilerPC, op.inst},
|
||||
power_pc,
|
||||
js.downcountAmount};
|
||||
Write(op.canEndBlock ? InterpretAndCheckExceptions<true> :
|
||||
InterpretAndCheckExceptions<false>,
|
||||
operands);
|
||||
}
|
||||
else
|
||||
{
|
||||
const InterpretOperands operands = {interpreter, Interpreter::GetInterpreterOp(op.inst),
|
||||
js.compilerPC, op.inst};
|
||||
Write(op.canEndBlock ? Interpret<true> : Interpret<false>, operands);
|
||||
}
|
||||
|
||||
if (op.branchIsIdleLoop)
|
||||
Write(CheckIdle, {m_system.GetCoreTiming(), js.blockStart});
|
||||
if (op.canEndBlock)
|
||||
Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
|
||||
}
|
||||
}
|
||||
if (code_block.m_broken)
|
||||
{
|
||||
m_code.emplace_back(WriteBrokenBlockNPC, nextPC);
|
||||
m_code.emplace_back(EndBlock, js.downcountAmount);
|
||||
if (js.numLoadStoreInst != 0)
|
||||
m_code.emplace_back(UpdateNumLoadStoreInstructions, js.numLoadStoreInst);
|
||||
if (js.numFloatingPointInst != 0)
|
||||
m_code.emplace_back(UpdateNumFloatingPointInstructions, js.numFloatingPointInst);
|
||||
Write(WriteBrokenBlockNPC, {nextPC});
|
||||
Write(EndBlock, {js.downcountAmount, js.numLoadStoreInst, js.numFloatingPointInst});
|
||||
}
|
||||
m_code.emplace_back();
|
||||
|
||||
b->near_end = GetCodePtr();
|
||||
b->far_begin = nullptr;
|
||||
b->far_end = nullptr;
|
||||
|
||||
b->codeSize = static_cast<u32>(GetCodePtr() - b->normalEntry);
|
||||
b->originalSize = code_block.m_num_instructions;
|
||||
|
||||
m_block_cache.FinalizeBlock(*b, jo.enableBlocklink, code_block.m_physical_addresses);
|
||||
if (HasWriteFailed())
|
||||
{
|
||||
WARN_LOG_FMT(DYNA_REC, "JIT ran out of space in code region during code generation.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void CachedInterpreter::ClearCache()
|
||||
{
|
||||
m_code.clear();
|
||||
m_block_cache.Clear();
|
||||
m_block_cache.ClearRangesToFree();
|
||||
ClearCodeSpace();
|
||||
ResetFreeMemoryRanges();
|
||||
RefreshConfig();
|
||||
}
|
||||
|
|
|
@ -3,14 +3,27 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <cstddef>
|
||||
|
||||
#include <rangeset/rangesizeset.h>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Core/PowerPC/CachedInterpreter/InterpreterBlockCache.h"
|
||||
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h"
|
||||
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
#include "Core/PowerPC/PPCAnalyst.h"
|
||||
|
||||
class CachedInterpreter : public JitBase
|
||||
namespace CoreTiming
|
||||
{
|
||||
class CoreTimingManager;
|
||||
}
|
||||
namespace CPU
|
||||
{
|
||||
enum class State;
|
||||
}
|
||||
class Interpreter;
|
||||
|
||||
class CachedInterpreter : public JitBase, public CachedInterpreterCodeBlock
|
||||
{
|
||||
public:
|
||||
explicit CachedInterpreter(Core::System& system);
|
||||
|
@ -30,32 +43,94 @@ public:
|
|||
void SingleStep() override;
|
||||
|
||||
void Jit(u32 address) override;
|
||||
void Jit(u32 address, bool clear_cache_and_retry_on_failure);
|
||||
bool DoJit(u32 address, JitBlock* b, u32 nextPC);
|
||||
|
||||
JitBaseBlockCache* GetBlockCache() override { return &m_block_cache; }
|
||||
const char* GetName() const override { return "Cached Interpreter"; }
|
||||
const CommonAsmRoutinesBase* GetAsmRoutines() override { return nullptr; }
|
||||
|
||||
private:
|
||||
struct Instruction;
|
||||
|
||||
u8* GetCodePtr();
|
||||
void ExecuteOneBlock();
|
||||
|
||||
bool HandleFunctionHooking(u32 address);
|
||||
|
||||
static void EndBlock(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
|
||||
static void UpdateNumLoadStoreInstructions(CachedInterpreter& cached_interpreter,
|
||||
UGeckoInstruction data);
|
||||
static void UpdateNumFloatingPointInstructions(CachedInterpreter& cached_interpreter,
|
||||
UGeckoInstruction data);
|
||||
static void WritePC(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
|
||||
static void WriteBrokenBlockNPC(CachedInterpreter& cached_interpreter, UGeckoInstruction data);
|
||||
static bool CheckFPU(CachedInterpreter& cached_interpreter, u32 data);
|
||||
static bool CheckDSI(CachedInterpreter& cached_interpreter, u32 data);
|
||||
static bool CheckProgramException(CachedInterpreter& cached_interpreter, u32 data);
|
||||
static bool CheckBreakpoint(CachedInterpreter& cached_interpreter, u32 data);
|
||||
static bool CheckIdle(CachedInterpreter& cached_interpreter, u32 idle_pc);
|
||||
// Finds a free memory region and sets the code emitter to point at that region.
|
||||
// Returns false if no free memory region can be found.
|
||||
bool SetEmitterStateToFreeCodeRegion();
|
||||
|
||||
BlockCache m_block_cache{*this};
|
||||
std::vector<Instruction> m_code;
|
||||
void FreeRanges();
|
||||
void ResetFreeMemoryRanges();
|
||||
|
||||
struct EndBlockOperands;
|
||||
struct InterpretOperands;
|
||||
struct InterpretAndCheckExceptionsOperands;
|
||||
struct HLEFunctionOperands;
|
||||
struct WriteBrokenBlockNPCOperands;
|
||||
struct CheckHaltOperands;
|
||||
struct CheckIdleOperands;
|
||||
|
||||
static s32 EndBlock(PowerPC::PowerPCState& ppc_state, const EndBlockOperands& operands);
|
||||
template <bool write_pc>
|
||||
static s32 Interpret(PowerPC::PowerPCState& ppc_state, const InterpretOperands& operands);
|
||||
template <bool write_pc>
|
||||
static s32 InterpretAndCheckExceptions(PowerPC::PowerPCState& ppc_state,
|
||||
const InterpretAndCheckExceptionsOperands& operands);
|
||||
static s32 HLEFunction(PowerPC::PowerPCState& ppc_state, const HLEFunctionOperands& operands);
|
||||
static s32 WriteBrokenBlockNPC(PowerPC::PowerPCState& ppc_state,
|
||||
const WriteBrokenBlockNPCOperands& operands);
|
||||
static s32 CheckFPU(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands);
|
||||
static s32 CheckBreakpoint(PowerPC::PowerPCState& ppc_state, const CheckHaltOperands& operands);
|
||||
static s32 CheckIdle(PowerPC::PowerPCState& ppc_state, const CheckIdleOperands& operands);
|
||||
|
||||
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges;
|
||||
CachedInterpreterBlockCache m_block_cache;
|
||||
};
|
||||
|
||||
struct CachedInterpreter::EndBlockOperands
|
||||
{
|
||||
u32 downcount;
|
||||
u32 num_load_stores;
|
||||
u32 num_fp_inst;
|
||||
u32 : 32;
|
||||
};
|
||||
|
||||
struct CachedInterpreter::InterpretOperands
|
||||
{
|
||||
Interpreter& interpreter;
|
||||
void (*func)(Interpreter&, UGeckoInstruction); // Interpreter::Instruction
|
||||
u32 current_pc;
|
||||
UGeckoInstruction inst;
|
||||
};
|
||||
|
||||
struct CachedInterpreter::InterpretAndCheckExceptionsOperands : InterpretOperands
|
||||
{
|
||||
PowerPC::PowerPCManager& power_pc;
|
||||
u32 downcount;
|
||||
};
|
||||
|
||||
struct CachedInterpreter::HLEFunctionOperands
|
||||
{
|
||||
Core::System& system;
|
||||
u32 current_pc;
|
||||
u32 hook_index;
|
||||
};
|
||||
|
||||
struct CachedInterpreter::WriteBrokenBlockNPCOperands
|
||||
{
|
||||
u32 current_pc;
|
||||
u32 : 32;
|
||||
};
|
||||
|
||||
struct CachedInterpreter::CheckHaltOperands
|
||||
{
|
||||
PowerPC::PowerPCManager& power_pc;
|
||||
u32 current_pc;
|
||||
u32 downcount;
|
||||
};
|
||||
|
||||
struct CachedInterpreter::CheckIdleOperands
|
||||
{
|
||||
CoreTiming::CoreTimingManager& core_timing;
|
||||
u32 idle_pc;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2024 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterBlockCache.h"
|
||||
|
||||
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
|
||||
CachedInterpreterBlockCache::CachedInterpreterBlockCache(JitBase& jit) : JitBaseBlockCache{jit}
|
||||
{
|
||||
}
|
||||
|
||||
void CachedInterpreterBlockCache::Init()
|
||||
{
|
||||
JitBaseBlockCache::Init();
|
||||
ClearRangesToFree();
|
||||
}
|
||||
|
||||
void CachedInterpreterBlockCache::DestroyBlock(JitBlock& block)
|
||||
{
|
||||
JitBaseBlockCache::DestroyBlock(block);
|
||||
|
||||
if (block.near_begin != block.near_end)
|
||||
m_ranges_to_free_on_next_codegen.emplace_back(block.near_begin, block.near_end);
|
||||
}
|
||||
|
||||
void CachedInterpreterBlockCache::ClearRangesToFree()
|
||||
{
|
||||
m_ranges_to_free_on_next_codegen.clear();
|
||||
}
|
||||
|
||||
void CachedInterpreterBlockCache::WriteLinkBlock(const JitBlock::LinkData& source,
|
||||
const JitBlock* dest)
|
||||
{
|
||||
}
|
||||
|
||||
void CachedInterpreterBlockCache::WriteDestroyBlock(const JitBlock& block)
|
||||
{
|
||||
CachedInterpreterEmitter emitter(block.normalEntry, block.near_end);
|
||||
emitter.Write(CachedInterpreterEmitter::PoisonCallback);
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright 2024 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Core/PowerPC/JitCommon/JitCache.h"
|
||||
|
||||
class JitBase;
|
||||
|
||||
class CachedInterpreterBlockCache final : public JitBaseBlockCache
|
||||
{
|
||||
public:
|
||||
explicit CachedInterpreterBlockCache(JitBase& jit);
|
||||
|
||||
void Init() override;
|
||||
|
||||
void DestroyBlock(JitBlock& block) override;
|
||||
|
||||
void ClearRangesToFree();
|
||||
|
||||
const std::vector<std::pair<u8*, u8*>>& GetRangesToFree() const
|
||||
{
|
||||
return m_ranges_to_free_on_next_codegen;
|
||||
};
|
||||
|
||||
private:
|
||||
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
|
||||
void WriteDestroyBlock(const JitBlock& block) override;
|
||||
|
||||
std::vector<std::pair<u8*, u8*>> m_ranges_to_free_on_next_codegen;
|
||||
};
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright 2024 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "Core/PowerPC/CachedInterpreter/CachedInterpreterEmitter.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
|
||||
void CachedInterpreterEmitter::Write(AnyCallback callback, const void* operands, std::size_t size)
|
||||
{
|
||||
DEBUG_ASSERT(reinterpret_cast<std::uintptr_t>(m_code) % alignof(AnyCallback) == 0);
|
||||
if (m_code + sizeof(callback) + size >= m_code_end)
|
||||
{
|
||||
m_write_failed = true;
|
||||
return;
|
||||
}
|
||||
std::memcpy(m_code, &callback, sizeof(callback));
|
||||
m_code += sizeof(callback);
|
||||
std::memcpy(m_code, operands, size);
|
||||
m_code += size;
|
||||
}
|
||||
|
||||
s32 CachedInterpreterEmitter::PoisonCallback(PowerPC::PowerPCState& ppc_state, const void* operands)
|
||||
{
|
||||
ASSERT_MSG(DYNA_REC, false,
|
||||
"The Cached Interpreter reached a poisoned callback. This should never happen!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CachedInterpreterCodeBlock::PoisonMemory()
|
||||
{
|
||||
DEBUG_ASSERT(reinterpret_cast<std::uintptr_t>(region) % alignof(AnyCallback) == 0);
|
||||
DEBUG_ASSERT(region_size % sizeof(AnyCallback) == 0);
|
||||
std::fill(reinterpret_cast<AnyCallback*>(region),
|
||||
reinterpret_cast<AnyCallback*>(region + region_size), AnyCallbackCast(PoisonCallback));
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2024 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#include "Common/CodeBlock.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
namespace PowerPC
|
||||
{
|
||||
struct PowerPCState;
|
||||
}
|
||||
|
||||
class CachedInterpreterEmitter
|
||||
{
|
||||
protected:
|
||||
// The return value of most callbacks is the distance in memory to the next callback.
|
||||
// If a callback returns 0, the block will be exited. The return value is signed to
|
||||
// support block-linking. 32-bit return values seem to perform better than 64-bit ones.
|
||||
template <class Operands>
|
||||
using Callback = s32 (*)(PowerPC::PowerPCState& ppc_state, const Operands& operands);
|
||||
using AnyCallback = s32 (*)(PowerPC::PowerPCState& ppc_state, const void* operands);
|
||||
|
||||
template <class Operands>
|
||||
static AnyCallback AnyCallbackCast(Callback<Operands> callback)
|
||||
{
|
||||
return reinterpret_cast<AnyCallback>(callback);
|
||||
}
|
||||
static consteval AnyCallback AnyCallbackCast(AnyCallback callback) { return callback; }
|
||||
|
||||
public:
|
||||
CachedInterpreterEmitter() = default;
|
||||
explicit CachedInterpreterEmitter(u8* begin, u8* end) : m_code(begin), m_code_end(end) {}
|
||||
|
||||
template <class Operands>
|
||||
void Write(Callback<Operands> callback, const Operands& operands)
|
||||
{
|
||||
// I would use std::is_trivial_v, but almost every operands struct uses
|
||||
// references instead of pointers to make the callback functions nicer.
|
||||
static_assert(
|
||||
std::is_trivially_copyable_v<Operands> && std::is_trivially_destructible_v<Operands> &&
|
||||
alignof(Operands) <= alignof(AnyCallback) && sizeof(Operands) % alignof(AnyCallback) == 0);
|
||||
Write(AnyCallbackCast(callback), &operands, sizeof(Operands));
|
||||
}
|
||||
void Write(AnyCallback callback) { Write(callback, nullptr, 0); }
|
||||
|
||||
const u8* GetCodePtr() const { return m_code; }
|
||||
u8* GetWritableCodePtr() { return m_code; }
|
||||
const u8* GetCodeEnd() const { return m_code_end; };
|
||||
u8* GetWritableCodeEnd() { return m_code_end; };
|
||||
// Should be checked after a block of code has been generated to see if the code has been
|
||||
// successfully written to memory. Do not call the generated code when this returns true!
|
||||
bool HasWriteFailed() const { return m_write_failed; }
|
||||
|
||||
void SetCodePtr(u8* begin, u8* end)
|
||||
{
|
||||
m_code = begin;
|
||||
m_code_end = end;
|
||||
m_write_failed = false;
|
||||
};
|
||||
|
||||
static s32 PoisonCallback(PowerPC::PowerPCState& ppc_state, const void* operands);
|
||||
|
||||
private:
|
||||
void Write(AnyCallback callback, const void* operands, std::size_t size);
|
||||
|
||||
// Pointer to memory where code will be emitted to.
|
||||
u8* m_code = nullptr;
|
||||
// Pointer past the end of the memory region we're allowed to emit to.
|
||||
// Writes that would reach this memory are refused and will set the m_write_failed flag instead.
|
||||
u8* m_code_end = nullptr;
|
||||
// Set to true when a write request happens that would write past m_code_end.
|
||||
// Must be cleared with SetCodePtr() afterwards.
|
||||
bool m_write_failed = false;
|
||||
};
|
||||
|
||||
class CachedInterpreterCodeBlock : public Common::CodeBlock<CachedInterpreterEmitter, false>
|
||||
{
|
||||
private:
|
||||
void PoisonMemory() override;
|
||||
};
|
|
@ -1,14 +0,0 @@
|
|||
// Copyright 2016 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "Core/PowerPC/CachedInterpreter/InterpreterBlockCache.h"
|
||||
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
|
||||
BlockCache::BlockCache(JitBase& jit) : JitBaseBlockCache{jit}
|
||||
{
|
||||
}
|
||||
|
||||
void BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest)
|
||||
{
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
// Copyright 2016 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Core/PowerPC/JitCommon/JitCache.h"
|
||||
|
||||
class JitBase;
|
||||
|
||||
class BlockCache final : public JitBaseBlockCache
|
||||
{
|
||||
public:
|
||||
explicit BlockCache(JitBase& jit);
|
||||
|
||||
private:
|
||||
void WriteLinkBlock(const JitBlock::LinkData& source, const JitBlock* dest) override;
|
||||
};
|
|
@ -428,7 +428,8 @@
|
|||
<ClInclude Include="Core\PatchEngine.h" />
|
||||
<ClInclude Include="Core\PowerPC\BreakPoints.h" />
|
||||
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.h" />
|
||||
<ClInclude Include="Core\PowerPC\CachedInterpreter\InterpreterBlockCache.h" />
|
||||
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreterBlockCache.h" />
|
||||
<ClInclude Include="Core\PowerPC\CachedInterpreter\CachedInterpreterEmitter.h" />
|
||||
<ClInclude Include="Core\PowerPC\ConditionRegister.h" />
|
||||
<ClInclude Include="Core\PowerPC\CPUCoreBase.h" />
|
||||
<ClInclude Include="Core\PowerPC\Expression.h" />
|
||||
|
@ -1089,7 +1090,8 @@
|
|||
<ClCompile Include="Core\PatchEngine.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\BreakPoints.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreter.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\CachedInterpreter\InterpreterBlockCache.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreterBlockCache.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\CachedInterpreter\CachedInterpreterEmitter.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\ConditionRegister.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\Expression.cpp" />
|
||||
<ClCompile Include="Core\PowerPC\GDBStub.cpp" />
|
||||
|
|
Loading…
Reference in New Issue