Jit: Add feature flag for performance monitor
By making the JIT cache check if the current state of MMCR0 and MMRC1 matches the state they had at the time the JIT block was compiled, we solve a correctness issue (marked in a comment as a speed hack). Not known to affect any games.
This commit is contained in:
parent
ca7e05bbc4
commit
62787085e1
|
@ -761,6 +761,7 @@ static void WriteRegister()
|
|||
break;
|
||||
case 131:
|
||||
ppc_state.spr[SPR_MMCR0] = re32hex(bufptr);
|
||||
PowerPC::MMCRUpdated(ppc_state);
|
||||
break;
|
||||
case 132:
|
||||
ppc_state.spr[SPR_PMC1] = re32hex(bufptr);
|
||||
|
@ -773,6 +774,7 @@ static void WriteRegister()
|
|||
break;
|
||||
case 135:
|
||||
ppc_state.spr[SPR_MMCR1] = re32hex(bufptr);
|
||||
PowerPC::MMCRUpdated(ppc_state);
|
||||
break;
|
||||
case 136:
|
||||
ppc_state.spr[SPR_PMC3] = re32hex(bufptr);
|
||||
|
|
|
@ -930,6 +930,7 @@ enum CPUEmuFeatureFlags : u32
|
|||
{
|
||||
FEATURE_FLAG_MSR_DR = 1 << 0,
|
||||
FEATURE_FLAG_MSR_IR = 1 << 1,
|
||||
FEATURE_FLAG_PERFMON = 1 << 2,
|
||||
};
|
||||
|
||||
constexpr s32 SignExt16(s16 x)
|
||||
|
|
|
@ -491,6 +491,11 @@ void Interpreter::mtspr(Interpreter& interpreter, UGeckoInstruction inst)
|
|||
}
|
||||
break;
|
||||
|
||||
case SPR_MMCR0:
|
||||
case SPR_MMCR1:
|
||||
MMCRUpdated(ppc_state);
|
||||
break;
|
||||
|
||||
case SPR_THRM1:
|
||||
case SPR_THRM2:
|
||||
case SPR_THRM3:
|
||||
|
|
|
@ -337,7 +337,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
|
|||
gpr.Flush();
|
||||
fpr.Flush();
|
||||
|
||||
if (js.op->opinfo->flags & FL_ENDBLOCK)
|
||||
if (js.op->canEndBlock)
|
||||
{
|
||||
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
|
||||
MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
|
||||
|
@ -353,7 +353,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
|
|||
gpr.Reset(js.op->regsOut);
|
||||
fpr.Reset(js.op->GetFregsOut());
|
||||
|
||||
if (js.op->opinfo->flags & FL_ENDBLOCK)
|
||||
if (js.op->canEndBlock)
|
||||
{
|
||||
if (js.isLastInstruction)
|
||||
{
|
||||
|
@ -445,7 +445,6 @@ bool Jit64::Cleanup()
|
|||
did_something = true;
|
||||
}
|
||||
|
||||
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
|
||||
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||
|
|
|
@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
|||
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
|
||||
|
||||
if (js.op->opinfo->flags & FL_ENDBLOCK)
|
||||
if (js.op->canEndBlock)
|
||||
{
|
||||
// also flush the program counter
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
|||
fpr.ResetRegisters(js.op->GetFregsOut());
|
||||
gpr.ResetCRRegisters(js.op->crOut);
|
||||
|
||||
if (js.op->opinfo->flags & FL_ENDBLOCK)
|
||||
if (js.op->canEndBlock)
|
||||
{
|
||||
if (js.isLastInstruction)
|
||||
{
|
||||
|
@ -276,7 +276,6 @@ void JitArm64::Cleanup()
|
|||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
|
||||
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
|
||||
{
|
||||
ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
|
||||
|
|
|
@ -130,8 +130,8 @@ class JitBaseBlockCache
|
|||
{
|
||||
public:
|
||||
// The size of the fast map is determined like this:
|
||||
// ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (2 feature flag bits)
|
||||
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000;
|
||||
// ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits)
|
||||
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000;
|
||||
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000;
|
||||
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1;
|
||||
|
||||
|
|
|
@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func)
|
|||
func->flags = flags;
|
||||
}
|
||||
|
||||
static bool IsMtspr(UGeckoInstruction inst)
|
||||
{
|
||||
return inst.OPCD == 31 && inst.SUBOP10 == 467;
|
||||
}
|
||||
|
||||
static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst)
|
||||
{
|
||||
const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
return index == SPR_MMCR0 || index == SPR_MMCR1;
|
||||
}
|
||||
|
||||
static bool InstructionCanEndBlock(const CodeOp& op)
|
||||
{
|
||||
return (op.opinfo->flags & FL_ENDBLOCK) &&
|
||||
(!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst));
|
||||
}
|
||||
|
||||
bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
|
||||
{
|
||||
const GekkoOPInfo* a_info = a.opinfo;
|
||||
|
@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
|
|||
// [1] https://bugs.dolphin-emu.org/issues/5864#note-7
|
||||
if (a.canCauseException || b.canCauseException)
|
||||
return false;
|
||||
if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
|
||||
if (a.canEndBlock || b.canEndBlock)
|
||||
return false;
|
||||
if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
|
||||
if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
|
||||
return false;
|
||||
if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
|
||||
return false;
|
||||
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
|
||||
return false;
|
||||
|
@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code,
|
|||
|
||||
code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0;
|
||||
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
|
||||
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0;
|
||||
code->canEndBlock = InstructionCanEndBlock(*code);
|
||||
|
||||
code->canCauseException = first_fpu_instruction ||
|
||||
(opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 ||
|
||||
|
@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
|
|||
{
|
||||
// Just pick the next instruction
|
||||
address += 4;
|
||||
if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early
|
||||
if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early
|
||||
{
|
||||
found_exit = true;
|
||||
break;
|
||||
|
|
|
@ -374,7 +374,7 @@ constexpr std::array<GekkoOPTemplate, 107> s_table31{{
|
|||
{210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION},
|
||||
{242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION},
|
||||
{339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION},
|
||||
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION},
|
||||
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION},
|
||||
{371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION},
|
||||
{512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA},
|
||||
{595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION},
|
||||
|
|
|
@ -137,7 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p)
|
|||
}
|
||||
|
||||
RoundingModeUpdated(m_ppc_state);
|
||||
MSRUpdated(m_ppc_state);
|
||||
RecalculateAllFeatureFlags(m_ppc_state);
|
||||
|
||||
auto& mmu = m_system.GetMMU();
|
||||
mmu.IBATUpdated();
|
||||
|
@ -209,7 +209,7 @@ void PowerPCManager::ResetRegisters()
|
|||
SystemTimers::DecrementerSet();
|
||||
|
||||
RoundingModeUpdated(m_ppc_state);
|
||||
MSRUpdated(m_ppc_state);
|
||||
RecalculateAllFeatureFlags(m_ppc_state);
|
||||
}
|
||||
|
||||
void PowerPCManager::InitializeCPUCore(CPUCore cpu_core)
|
||||
|
@ -710,7 +710,27 @@ void MSRUpdated(PowerPCState& ppc_state)
|
|||
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
|
||||
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
|
||||
|
||||
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>((ppc_state.msr.Hex >> 4) & 0x3);
|
||||
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
|
||||
(ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3));
|
||||
}
|
||||
|
||||
void MMCRUpdated(PowerPCState& ppc_state)
|
||||
{
|
||||
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
|
||||
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
|
||||
(ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0));
|
||||
}
|
||||
|
||||
void RecalculateAllFeatureFlags(PowerPCState& ppc_state)
|
||||
{
|
||||
static_assert(UReg_MSR{}.DR.StartBit() == 4);
|
||||
static_assert(UReg_MSR{}.IR.StartBit() == 5);
|
||||
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
|
||||
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
|
||||
|
||||
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
|
||||
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(((ppc_state.msr.Hex >> 4) & 0x3) |
|
||||
(perfmon ? FEATURE_FLAG_PERFMON : 0));
|
||||
}
|
||||
|
||||
void CheckExceptionsFromJIT(PowerPCManager& power_pc)
|
||||
|
|
|
@ -349,5 +349,7 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc);
|
|||
|
||||
void RoundingModeUpdated(PowerPCState& ppc_state);
|
||||
void MSRUpdated(PowerPCState& ppc_state);
|
||||
void MMCRUpdated(PowerPCState& ppc_state);
|
||||
void RecalculateAllFeatureFlags(PowerPCState& ppc_state);
|
||||
|
||||
} // namespace PowerPC
|
||||
|
|
Loading…
Reference in New Issue