Jit: Add feature flag for performance monitor

By making the JIT cache check if the current state of MMCR0 and MMRC1
matches the state they had at the time the JIT block was compiled, we
solve a correctness issue (marked in a comment as a speed hack).

Not known to affect any games.
This commit is contained in:
JosJuice 2023-06-23 17:06:39 +02:00
parent ca7e05bbc4
commit 62787085e1
10 changed files with 63 additions and 16 deletions

View File

@ -761,6 +761,7 @@ static void WriteRegister()
break;
case 131:
ppc_state.spr[SPR_MMCR0] = re32hex(bufptr);
PowerPC::MMCRUpdated(ppc_state);
break;
case 132:
ppc_state.spr[SPR_PMC1] = re32hex(bufptr);
@ -773,6 +774,7 @@ static void WriteRegister()
break;
case 135:
ppc_state.spr[SPR_MMCR1] = re32hex(bufptr);
PowerPC::MMCRUpdated(ppc_state);
break;
case 136:
ppc_state.spr[SPR_PMC3] = re32hex(bufptr);

View File

@ -930,6 +930,7 @@ enum CPUEmuFeatureFlags : u32
{
FEATURE_FLAG_MSR_DR = 1 << 0,
FEATURE_FLAG_MSR_IR = 1 << 1,
FEATURE_FLAG_PERFMON = 1 << 2,
};
constexpr s32 SignExt16(s16 x)

View File

@ -491,6 +491,11 @@ void Interpreter::mtspr(Interpreter& interpreter, UGeckoInstruction inst)
}
break;
case SPR_MMCR0:
case SPR_MMCR1:
MMCRUpdated(ppc_state);
break;
case SPR_THRM1:
case SPR_THRM2:
case SPR_THRM3:

View File

@ -337,7 +337,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
@ -353,7 +353,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Reset(js.op->regsOut);
fpr.Reset(js.op->GetFregsOut());
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
if (js.isLastInstruction)
{
@ -445,7 +445,6 @@ bool Jit64::Cleanup()
did_something = true;
}
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
{
ABI_PushRegistersAndAdjustStack({}, 0);

View File

@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
// also flush the program counter
ARM64Reg WA = gpr.GetReg();
@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
fpr.ResetRegisters(js.op->GetFregsOut());
gpr.ResetCRRegisters(js.op->crOut);
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
if (js.isLastInstruction)
{
@ -276,7 +276,6 @@ void JitArm64::Cleanup()
SetJumpTarget(exit);
}
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
{
ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,

View File

@ -130,8 +130,8 @@ class JitBaseBlockCache
{
public:
// The size of the fast map is determined like this:
// ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (2 feature flag bits)
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000;
// ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits)
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1;

View File

@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func)
func->flags = flags;
}
static bool IsMtspr(UGeckoInstruction inst)
{
return inst.OPCD == 31 && inst.SUBOP10 == 467;
}
static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst)
{
const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
return index == SPR_MMCR0 || index == SPR_MMCR1;
}
static bool InstructionCanEndBlock(const CodeOp& op)
{
return (op.opinfo->flags & FL_ENDBLOCK) &&
(!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst));
}
bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
{
const GekkoOPInfo* a_info = a.opinfo;
@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
// [1] https://bugs.dolphin-emu.org/issues/5864#note-7
if (a.canCauseException || b.canCauseException)
return false;
if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
if (a.canEndBlock || b.canEndBlock)
return false;
if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false;
if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false;
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
return false;
@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code,
code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0;
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0;
code->canEndBlock = InstructionCanEndBlock(*code);
code->canCauseException = first_fpu_instruction ||
(opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 ||
@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
{
// Just pick the next instruction
address += 4;
if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early
if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early
{
found_exit = true;
break;

View File

@ -374,7 +374,7 @@ constexpr std::array<GekkoOPTemplate, 107> s_table31{{
{210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION},
{242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION},
{339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION},
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION},
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION},
{371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION},
{512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA},
{595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION},

View File

@ -137,7 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p)
}
RoundingModeUpdated(m_ppc_state);
MSRUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);
auto& mmu = m_system.GetMMU();
mmu.IBATUpdated();
@ -209,7 +209,7 @@ void PowerPCManager::ResetRegisters()
SystemTimers::DecrementerSet();
RoundingModeUpdated(m_ppc_state);
MSRUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);
}
void PowerPCManager::InitializeCPUCore(CPUCore cpu_core)
@ -710,7 +710,27 @@ void MSRUpdated(PowerPCState& ppc_state)
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>((ppc_state.msr.Hex >> 4) & 0x3);
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3));
}
void MMCRUpdated(PowerPCState& ppc_state)
{
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0));
}
void RecalculateAllFeatureFlags(PowerPCState& ppc_state)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(((ppc_state.msr.Hex >> 4) & 0x3) |
(perfmon ? FEATURE_FLAG_PERFMON : 0));
}
void CheckExceptionsFromJIT(PowerPCManager& power_pc)

View File

@ -349,5 +349,7 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc);
void RoundingModeUpdated(PowerPCState& ppc_state);
void MSRUpdated(PowerPCState& ppc_state);
void MMCRUpdated(PowerPCState& ppc_state);
void RecalculateAllFeatureFlags(PowerPCState& ppc_state);
} // namespace PowerPC