Merge pull request #11988 from JosJuice/jit-feature-flags

Jit: Replace "msrBits" with "featureFlags" and use for performance monitor
This commit is contained in:
Mai 2023-12-05 03:13:50 -05:00 committed by GitHub
commit a65246ec3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 291 additions and 140 deletions

View File

@ -460,6 +460,9 @@ bool CBoot::Load_BS2(Core::System& system, const std::string& boot_rom_filename)
SetupBAT(system, /*is_wii*/ false); SetupBAT(system, /*is_wii*/ false);
ppc_state.pc = 0x81200150; ppc_state.pc = 0x81200150;
PowerPC::MSRUpdated(ppc_state);
return true; return true;
} }

View File

@ -74,6 +74,7 @@ void CBoot::SetupMSR(PowerPC::PowerPCState& ppc_state)
ppc_state.msr.DR = 1; ppc_state.msr.DR = 1;
ppc_state.msr.IR = 1; ppc_state.msr.IR = 1;
ppc_state.msr.FP = 1; ppc_state.msr.FP = 1;
PowerPC::MSRUpdated(ppc_state);
} }
void CBoot::SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii) void CBoot::SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii)

View File

@ -651,6 +651,8 @@ void FifoPlayer::LoadMemory()
ppc_state.spr[SPR_DBAT1U] = 0xc0001fff; ppc_state.spr[SPR_DBAT1U] = 0xc0001fff;
ppc_state.spr[SPR_DBAT1L] = 0x0000002a; ppc_state.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::MSRUpdated(ppc_state);
auto& mmu = system.GetMMU(); auto& mmu = system.GetMMU();
mmu.DBATUpdated(); mmu.DBATUpdated();
mmu.IBATUpdated(); mmu.IBATUpdated();

View File

@ -84,10 +84,15 @@ bool Load()
} }
auto& power_pc = system.GetPowerPC(); auto& power_pc = system.GetPowerPC();
const PowerPC::CoreMode core_mode = power_pc.GetMode(); const PowerPC::CoreMode core_mode = power_pc.GetMode();
power_pc.SetMode(PowerPC::CoreMode::Interpreter); power_pc.SetMode(PowerPC::CoreMode::Interpreter);
power_pc.GetPPCState().msr.Hex = 0;
power_pc.GetPPCState().pc = 0x3400; PowerPC::PowerPCState& ppc_state = power_pc.GetPPCState();
ppc_state.msr.Hex = 0;
ppc_state.pc = 0x3400;
PowerPC::MSRUpdated(ppc_state);
NOTICE_LOG_FMT(IOS, "Loaded MIOS and bootstrapped PPC."); NOTICE_LOG_FMT(IOS, "Loaded MIOS and bootstrapped PPC.");
// IOS writes 0 to 0x30f8 before bootstrapping the PPC. Once started, the IPL eventually writes // IOS writes 0 to 0x30f8 before bootstrapping the PPC. Once started, the IPL eventually writes

View File

@ -661,6 +661,7 @@ static void WriteRegister()
break; break;
case 65: case 65:
ppc_state.msr.Hex = re32hex(bufptr); ppc_state.msr.Hex = re32hex(bufptr);
PowerPC::MSRUpdated(ppc_state);
break; break;
case 66: case 66:
ppc_state.cr.Set(re32hex(bufptr)); ppc_state.cr.Set(re32hex(bufptr));
@ -760,6 +761,7 @@ static void WriteRegister()
break; break;
case 131: case 131:
ppc_state.spr[SPR_MMCR0] = re32hex(bufptr); ppc_state.spr[SPR_MMCR0] = re32hex(bufptr);
PowerPC::MMCRUpdated(ppc_state);
break; break;
case 132: case 132:
ppc_state.spr[SPR_PMC1] = re32hex(bufptr); ppc_state.spr[SPR_PMC1] = re32hex(bufptr);
@ -772,6 +774,7 @@ static void WriteRegister()
break; break;
case 135: case 135:
ppc_state.spr[SPR_MMCR1] = re32hex(bufptr); ppc_state.spr[SPR_MMCR1] = re32hex(bufptr);
PowerPC::MMCRUpdated(ppc_state);
break; break;
case 136: case 136:
ppc_state.spr[SPR_PMC3] = re32hex(bufptr); ppc_state.spr[SPR_PMC3] = re32hex(bufptr);

View File

@ -926,6 +926,13 @@ enum
EXCEPTION_FAKE_MEMCHECK_HIT = 0x00000200, EXCEPTION_FAKE_MEMCHECK_HIT = 0x00000200,
}; };
enum CPUEmuFeatureFlags : u32
{
FEATURE_FLAG_MSR_DR = 1 << 0,
FEATURE_FLAG_MSR_IR = 1 << 1,
FEATURE_FLAG_PERFMON = 1 << 2,
};
constexpr s32 SignExt16(s16 x) constexpr s32 SignExt16(s16 x)
{ {
return (s32)x; return (s32)x;

View File

@ -134,6 +134,9 @@ void Interpreter::rfi(Interpreter& interpreter, UGeckoInstruction inst)
// else // else
// set NPC to saved offset and resume // set NPC to saved offset and resume
ppc_state.npc = SRR0(ppc_state); ppc_state.npc = SRR0(ppc_state);
PowerPC::MSRUpdated(ppc_state);
interpreter.m_end_block = true; interpreter.m_end_block = true;
} }

View File

@ -181,6 +181,8 @@ void Interpreter::mtmsr(Interpreter& interpreter, UGeckoInstruction inst)
ppc_state.msr.Hex = ppc_state.gpr[inst.RS]; ppc_state.msr.Hex = ppc_state.gpr[inst.RS];
PowerPC::MSRUpdated(ppc_state);
// FE0/FE1 may have been set // FE0/FE1 may have been set
CheckFPExceptions(ppc_state); CheckFPExceptions(ppc_state);
@ -489,6 +491,11 @@ void Interpreter::mtspr(Interpreter& interpreter, UGeckoInstruction inst)
} }
break; break;
case SPR_MMCR0:
case SPR_MMCR1:
MMCRUpdated(ppc_state);
break;
case SPR_THRM1: case SPR_THRM1:
case SPR_THRM2: case SPR_THRM2:
case SPR_THRM3: case SPR_THRM3:

View File

@ -337,7 +337,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
if (js.op->opinfo->flags & FL_ENDBLOCK) if (js.op->canEndBlock)
{ {
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4)); MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
@ -353,7 +353,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Reset(js.op->regsOut); gpr.Reset(js.op->regsOut);
fpr.Reset(js.op->GetFregsOut()); fpr.Reset(js.op->GetFregsOut());
if (js.op->opinfo->flags & FL_ENDBLOCK) if (js.op->canEndBlock)
{ {
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
@ -445,8 +445,7 @@ bool Jit64::Cleanup()
did_something = true; did_something = true;
} }
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON)
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
{ {
ABI_PushRegistersAndAdjustStack({}, 0); ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionCCCP(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, ABI_CallFunctionCCCP(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
@ -483,8 +482,7 @@ void Jit64::FakeBLCall(u32 after)
// We may need to fake the BLR stack on inlined CALL instructions. // We may need to fake the BLR stack on inlined CALL instructions.
// Else we can't return to this location any more. // Else we can't return to this location any more.
MOV(64, R(RSCRATCH2), MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after));
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2); PUSH(RSCRATCH2);
FixupBranch skip_exit = CALL(); FixupBranch skip_exit = CALL();
POP(RSCRATCH2); POP(RSCRATCH2);
@ -497,8 +495,11 @@ void Jit64::EmitUpdateMembase()
MOV(64, R(RMEM), PPCSTATE(mem_ptr)); MOV(64, R(RMEM), PPCSTATE(mem_ptr));
} }
void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg) void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
{ {
ASSERT(!msr.IsSimpleReg(scratch_reg));
// Update mem_ptr
auto& memory = m_system.GetMemory(); auto& memory = m_system.GetMemory();
if (msr.IsImm()) if (msr.IsImm())
{ {
@ -513,6 +514,26 @@ void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg)
CMOVcc(64, RMEM, R(scratch_reg), CC_Z); CMOVcc(64, RMEM, R(scratch_reg), CC_Z);
} }
MOV(64, PPCSTATE(mem_ptr), R(RMEM)); MOV(64, PPCSTATE(mem_ptr), R(RMEM));
// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
if (msr.IsImm())
{
MOV(32, PPCSTATE(feature_flags), Imm32(other_feature_flags | ((msr.Imm32() >> 4) & 0x3)));
}
else
{
MOV(32, R(scratch_reg), msr);
SHR(32, R(scratch_reg), Imm8(4));
AND(32, R(scratch_reg), Imm32(0x3));
if (other_feature_flags != 0)
OR(32, R(scratch_reg), Imm32(other_feature_flags));
MOV(32, PPCSTATE(feature_flags), R(scratch_reg));
}
} }
void Jit64::WriteExit(u32 destination, bool bl, u32 after) void Jit64::WriteExit(u32 destination, bool bl, u32 after)
@ -524,8 +545,7 @@ void Jit64::WriteExit(u32 destination, bool bl, u32 after)
if (bl) if (bl)
{ {
MOV(64, R(RSCRATCH2), MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after));
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2); PUSH(RSCRATCH2);
} }
@ -582,8 +602,7 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)
if (bl) if (bl)
{ {
MOV(64, R(RSCRATCH2), MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after));
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2); PUSH(RSCRATCH2);
} }
@ -611,10 +630,9 @@ void Jit64::WriteBLRExit()
bool disturbed = Cleanup(); bool disturbed = Cleanup();
if (disturbed) if (disturbed)
MOV(32, R(RSCRATCH), PPCSTATE(pc)); MOV(32, R(RSCRATCH), PPCSTATE(pc));
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; if (m_ppc_state.feature_flags != 0)
if (msr_bits != 0)
{ {
MOV(32, R(RSCRATCH2), Imm32(msr_bits)); MOV(32, R(RSCRATCH2), Imm32(m_ppc_state.feature_flags));
SHL(64, R(RSCRATCH2), Imm8(32)); SHL(64, R(RSCRATCH2), Imm8(32));
OR(64, R(RSCRATCH), R(RSCRATCH2)); OR(64, R(RSCRATCH), R(RSCRATCH2));
} }

View File

@ -88,7 +88,7 @@ public:
// Utilities for use by opcodes // Utilities for use by opcodes
void EmitUpdateMembase(); void EmitUpdateMembase();
void EmitStoreMembase(const Gen::OpArg& msr, Gen::X64Reg scratch_reg); void MSRUpdated(const Gen::OpArg& msr, Gen::X64Reg scratch_reg);
void FakeBLCall(u32 after); void FakeBLCall(u32 after);
void WriteExit(u32 destination, bool bl = false, u32 after = 0); void WriteExit(u32 destination, bool bl = false, u32 after = 0);
void JustWriteExit(u32 destination, bool bl, u32 after); void JustWriteExit(u32 destination, bool bl, u32 after);

View File

@ -19,6 +19,7 @@
#include "Core/System.h" #include "Core/System.h"
using namespace Gen; using namespace Gen;
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit) Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
{ {
} }
@ -118,19 +119,17 @@ void Jit64AsmRoutineManager::Generate()
{ {
if (m_jit.GetBlockCache()->GetEntryPoints()) if (m_jit.GetBlockCache()->GetEntryPoints())
{ {
MOV(32, R(RSCRATCH2), PPCSTATE(msr)); MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags));
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK)); SHL(64, R(RSCRATCH2), Imm8(32));
SHL(64, R(RSCRATCH2), Imm8(28));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc)); MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH_EXTRA), R(RSCRATCH2)); OR(64, R(RSCRATCH_EXTRA), R(RSCRATCH2));
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetEntryPoints()); u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetEntryPoints());
MOV(64, R(RSCRATCH2), Imm64(icache)); MOV(64, R(RSCRATCH2), Imm64(icache));
// The entry points map is indexed by ((msrBits << 26) | (address >> 2)). // The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)).
// The map contains 8 byte 64-bit pointers and that means we need to shift // The map contains 8-byte pointers and that means we need to shift feature_flags
// msr left by 29 bits and address left by 1 bit to get the correct offset // left by 33 bits and pc left by 1 bit to get the correct offset in the map.
// in the map.
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_2, 0)); MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_2, 0));
} }
else else
@ -160,17 +159,17 @@ void Jit64AsmRoutineManager::Generate()
if (!m_jit.GetBlockCache()->GetEntryPoints()) if (!m_jit.GetBlockCache()->GetEntryPoints())
{ {
// Check block.msrBits. // Check block.feature_flags.
MOV(32, R(RSCRATCH2), PPCSTATE(msr)); MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags));
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
// Also check the block.effectiveAddress. RSCRATCH_EXTRA still has the PC. // Also check the block.effectiveAddress. RSCRATCH_EXTRA still has the PC.
SHL(64, R(RSCRATCH_EXTRA), Imm8(32)); SHL(64, R(RSCRATCH_EXTRA), Imm8(32));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
static_assert(offsetof(JitBlockData, msrBits) + 4 == static_assert(offsetof(JitBlockData, feature_flags) + 4 ==
offsetof(JitBlockData, effectiveAddress)); offsetof(JitBlockData, effectiveAddress));
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, msrBits)))); CMP(64, R(RSCRATCH2),
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, feature_flags))));
state_mismatch = J_CC(CC_NE); state_mismatch = J_CC(CC_NE);
// Success; branch to the block we found. // Success; branch to the block we found.

View File

@ -47,6 +47,7 @@ void Jit64::rfi(UGeckoInstruction inst)
gpr.Flush(); gpr.Flush();
fpr.Flush(); fpr.Flush();
// See Interpreter rfi for details // See Interpreter rfi for details
const u32 mask = 0x87C0FFFF; const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
@ -56,7 +57,9 @@ void Jit64::rfi(UGeckoInstruction inst)
AND(32, R(RSCRATCH), Imm32(mask & clearMSR13)); AND(32, R(RSCRATCH), Imm32(mask & clearMSR13));
OR(32, PPCSTATE(msr), R(RSCRATCH)); OR(32, PPCSTATE(msr), R(RSCRATCH));
EmitStoreMembase(R(RSCRATCH), RSCRATCH2); // Call MSRUpdated to update feature_flags. Only the bits that come from SRR1
// are relevant for this, so it's fine to pass in RSCRATCH in place of msr.
MSRUpdated(R(RSCRATCH), RSCRATCH2);
// NPC = SRR0; // NPC = SRR0;
MOV(32, R(RSCRATCH), PPCSTATE_SRR0); MOV(32, R(RSCRATCH), PPCSTATE_SRR0);

View File

@ -320,7 +320,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
FixupBranch bat_lookup_failed; FixupBranch bat_lookup_failed;
MOV(32, R(effective_address), R(addr)); MOV(32, R(effective_address), R(addr));
const u8* loop_start = GetCodePtr(); const u8* loop_start = GetCodePtr();
if (m_ppc_state.msr.IR) if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
{ {
// Translate effective address to physical address. // Translate effective address to physical address.
bat_lookup_failed = BATAddressLookup(addr, tmp, m_jit.m_mmu.GetIBATTable().data()); bat_lookup_failed = BATAddressLookup(addr, tmp, m_jit.m_mmu.GetIBATTable().data());
@ -349,7 +349,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(invalidate_needed); SetJumpTarget(invalidate_needed);
if (m_ppc_state.msr.IR) if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
SetJumpTarget(bat_lookup_failed); SetJumpTarget(bat_lookup_failed);
BitSet32 registersInUse = CallerSavedRegistersInUse(); BitSet32 registersInUse = CallerSavedRegistersInUse();
@ -421,7 +421,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
end_dcbz_hack = J_CC(CC_L); end_dcbz_hack = J_CC(CC_L);
} }
bool emit_fast_path = m_ppc_state.msr.DR && m_jit.jo.fastmem_arena; bool emit_fast_path = (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR) && m_jit.jo.fastmem_arena;
if (emit_fast_path) if (emit_fast_path)
{ {

View File

@ -23,7 +23,7 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on. // For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!m_ppc_state.msr.DR); FALLBACK_IF(!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
s32 offset = inst.SIMM_12; s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4; bool indexed = inst.OPCD == 4;
@ -112,7 +112,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on. // For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!m_ppc_state.msr.DR); FALLBACK_IF(!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
s32 offset = inst.SIMM_12; s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4; bool indexed = inst.OPCD == 4;

View File

@ -439,7 +439,7 @@ void Jit64::mtmsr(UGeckoInstruction inst)
RegCache::Realize(Rs); RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs); MOV(32, PPCSTATE(msr), Rs);
EmitStoreMembase(Rs, RSCRATCH2); MSRUpdated(Rs, RSCRATCH2);
} }
gpr.Flush(); gpr.Flush();

View File

@ -371,7 +371,8 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
} }
FixupBranch exit; FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR; const bool dr_set =
(flags & SAFE_LOADSTORE_DR_ON) || (m_jit.m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR);
const bool fast_check_address = const bool fast_check_address =
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address) if (fast_check_address)
@ -544,7 +545,8 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
} }
FixupBranch exit; FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR; const bool dr_set =
(flags & SAFE_LOADSTORE_DR_ON) || (m_jit.m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR);
const bool fast_check_address = const bool fast_check_address =
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache; !force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address) if (fast_check_address)

View File

@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
if (js.op->opinfo->flags & FL_ENDBLOCK) if (js.op->canEndBlock)
{ {
// also flush the program counter // also flush the program counter
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
fpr.ResetRegisters(js.op->GetFregsOut()); fpr.ResetRegisters(js.op->GetFregsOut());
gpr.ResetCRRegisters(js.op->crOut); gpr.ResetCRRegisters(js.op->crOut);
if (js.op->opinfo->flags & FL_ENDBLOCK) if (js.op->canEndBlock)
{ {
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
@ -276,8 +276,7 @@ void JitArm64::Cleanup()
SetJumpTarget(exit); SetJumpTarget(exit);
} }
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time. if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON)
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
{ {
ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst, ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
js.numFloatingPointInst, &m_ppc_state); js.numFloatingPointInst, &m_ppc_state);
@ -348,27 +347,61 @@ void JitArm64::EmitUpdateMembase()
LDR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); LDR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
} }
void JitArm64::EmitStoreMembase(u32 msr) void JitArm64::MSRUpdated(u32 msr)
{ {
// Update mem_ptr
auto& memory = m_system.GetMemory(); auto& memory = m_system.GetMemory();
MOVP2R(MEM_REG, MOVP2R(MEM_REG,
UReg_MSR(msr).DR ? UReg_MSR(msr).DR ?
(jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()) : (jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()) :
(jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase())); (jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()));
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
const u32 feature_flags = other_feature_flags | ((msr >> 4) & 0x3);
if (feature_flags == 0)
{
STR(IndexType::Unsigned, ARM64Reg::WZR, PPC_REG, PPCSTATE_OFF(feature_flags));
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, feature_flags);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
gpr.Unlock(WA);
}
} }
void JitArm64::EmitStoreMembase(const ARM64Reg& msr) void JitArm64::MSRUpdated(ARM64Reg msr)
{ {
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
// Update mem_ptr
auto& memory = m_system.GetMemory(); auto& memory = m_system.GetMemory();
ARM64Reg WD = gpr.GetReg();
ARM64Reg XD = EncodeRegTo64(WD);
MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XD, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
TST(msr, LogicalImm(1 << (31 - 27), 32)); TST(msr, LogicalImm(1 << (31 - 27), 32));
CSEL(MEM_REG, MEM_REG, XD, CCFlags::CC_NEQ); CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ);
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
gpr.Unlock(WD);
// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
UBFX(WA, msr, 4, 2);
if (other_feature_flags != 0)
ORR(WA, WA, LogicalImm(32, other_feature_flags));
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
gpr.Unlock(WA);
} }
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return, void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,
@ -383,20 +416,20 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
const u8* host_address_after_return; const u8* host_address_after_return;
if (LK) if (LK)
{ {
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1; ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{ {
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return); MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return);
} }
else if (msr_bits == 0) else if (feature_flags == 0)
{ {
reg_to_push = EncodeRegTo64(exit_address_after_return_reg); reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
} }
else else
{ {
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
ARM64Reg::X1); ARM64Reg::X1);
} }
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2; constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
@ -487,20 +520,20 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
} }
else else
{ {
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1; ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{ {
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return); MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return);
} }
else if (msr_bits == 0) else if (feature_flags == 0)
{ {
reg_to_push = EncodeRegTo64(exit_address_after_return_reg); reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
} }
else else
{ {
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
ARM64Reg::X1); ARM64Reg::X1);
} }
constexpr s32 adr_offset = sizeof(u32) * 3; constexpr s32 adr_offset = sizeof(u32) * 3;
@ -558,17 +591,17 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a
// function has been called! // function has been called!
gpr.Lock(ARM64Reg::W30); gpr.Lock(ARM64Reg::W30);
} }
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack // Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg after_reg = ARM64Reg::INVALID_REG; ARM64Reg after_reg = ARM64Reg::INVALID_REG;
ARM64Reg reg_to_push; ARM64Reg reg_to_push;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG) if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{ {
after_reg = gpr.GetReg(); after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg); reg_to_push = EncodeRegTo64(after_reg);
MOVI2R(reg_to_push, msr_bits << 32 | exit_address_after_return); MOVI2R(reg_to_push, feature_flags << 32 | exit_address_after_return);
} }
else if (msr_bits == 0) else if (feature_flags == 0)
{ {
reg_to_push = EncodeRegTo64(exit_address_after_return_reg); reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
} }
@ -576,7 +609,8 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a
{ {
after_reg = gpr.GetReg(); after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg); reg_to_push = EncodeRegTo64(after_reg);
ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, reg_to_push); ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
reg_to_push);
} }
ARM64Reg code_reg = gpr.GetReg(); ARM64Reg code_reg = gpr.GetReg();
constexpr s32 adr_offset = sizeof(u32) * 3; constexpr s32 adr_offset = sizeof(u32) * 3;
@ -640,16 +674,16 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
Cleanup(); Cleanup();
EndTimeProfile(js.curBlock); EndTimeProfile(js.curBlock);
// Check if {PPC_PC, MSR_BITS} matches the current state, then RET to ARM_PC. // Check if {PPC_PC, feature_flags} matches the current state, then RET to ARM_PC.
LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16); LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16);
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK; const u64 feature_flags = m_ppc_state.feature_flags;
if (msr_bits == 0) if (feature_flags == 0)
{ {
CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC)); CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC));
} }
else else
{ {
ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), msr_bits << 32, ARM64Reg::X0); ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), feature_flags << 32, ARM64Reg::X0);
CMP(ARM64Reg::X1, ARM64Reg::X0); CMP(ARM64Reg::X1, ARM64Reg::X0);
} }
FixupBranch no_match = B(CC_NEQ); FixupBranch no_match = B(CC_NEQ);

View File

@ -310,8 +310,8 @@ protected:
void EndTimeProfile(JitBlock* b); void EndTimeProfile(JitBlock* b);
void EmitUpdateMembase(); void EmitUpdateMembase();
void EmitStoreMembase(u32 msr); void MSRUpdated(u32 msr);
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr); void MSRUpdated(Arm64Gen::ARM64Reg msr);
// Exits // Exits
void void

View File

@ -64,11 +64,11 @@ void JitArm64::rfi(UGeckoInstruction inst)
ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1 ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA
gpr.Unlock(WB, WC);
EmitStoreMembase(WA); MSRUpdated(WA);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0)); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0));
gpr.Unlock(WB, WC);
WriteExceptionExit(WA); WriteExceptionExit(WA);
gpr.Unlock(WA); gpr.Unlock(WA);

View File

@ -727,7 +727,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
// Translate effective address to physical address. // Translate effective address to physical address.
const u8* loop_start = GetCodePtr(); const u8* loop_start = GetCodePtr();
FixupBranch bat_lookup_failed; FixupBranch bat_lookup_failed;
if (m_ppc_state.msr.IR) if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
{ {
bat_lookup_failed = bat_lookup_failed =
BATAddressLookup(physical_addr, effective_addr, WA, m_mmu.GetIBATTable().data()); BATAddressLookup(physical_addr, effective_addr, WA, m_mmu.GetIBATTable().data());
@ -756,7 +756,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
SwitchToFarCode(); SwitchToFarCode();
SetJumpTarget(invalidate_needed); SetJumpTarget(invalidate_needed);
if (m_ppc_state.msr.IR) if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
SetJumpTarget(bat_lookup_failed); SetJumpTarget(bat_lookup_failed);
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();

View File

@ -23,7 +23,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
// If fastmem is enabled, the asm routines assume address translation is on. // If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR); FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem &&
!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
// X30 is LR // X30 is LR
// X0 is the address // X0 is the address
@ -151,7 +152,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff); JITDISABLE(bJITLoadStorePairedOff);
// If fastmem is enabled, the asm routines assume address translation is on. // If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR); FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem &&
!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
// X30 is LR // X30 is LR
// X0 contains the scale // X0 contains the scale

View File

@ -94,12 +94,12 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
const bool imm_value = gpr.IsImm(inst.RS); const bool imm_value = gpr.IsImm(inst.RS);
if (imm_value) if (imm_value)
EmitStoreMembase(gpr.GetImm(inst.RS)); MSRUpdated(gpr.GetImm(inst.RS));
STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr)); STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr));
if (!imm_value) if (!imm_value)
EmitStoreMembase(gpr.R(inst.RS)); MSRUpdated(gpr.R(inst.RS));
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);

View File

@ -100,15 +100,20 @@ void JitArm64::GenerateAsm()
if (GetBlockCache()->GetEntryPoints()) if (GetBlockCache()->GetEntryPoints())
{ {
// Check if there is a block // Check if there is a block
ARM64Reg pc_and_msr = ARM64Reg::X8; ARM64Reg feature_flags = ARM64Reg::W8;
ARM64Reg cache_base = ARM64Reg::X9; ARM64Reg pc_and_feature_flags = ARM64Reg::X9;
ARM64Reg block = ARM64Reg::X10; ARM64Reg cache_base = ARM64Reg::X10;
LDR(IndexType::Unsigned, EncodeRegTo32(pc_and_msr), PPC_REG, PPCSTATE_OFF(msr)); ARM64Reg block = ARM64Reg::X11;
LDR(IndexType::Unsigned, feature_flags, PPC_REG, PPCSTATE_OFF(feature_flags));
MOVP2R(cache_base, GetBlockCache()->GetEntryPoints()); MOVP2R(cache_base, GetBlockCache()->GetEntryPoints());
// The entry points map is indexed by ((msrBits << 26) | (address >> 2)). // The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)).
UBFIZ(pc_and_msr, pc_and_msr, 26, 6); // The map contains 8-byte pointers and that means we need to shift feature_flags
BFXIL(pc_and_msr, EncodeRegTo64(DISPATCHER_PC), 2, 30); // left by 33 bits and pc left by 1 bit to get the correct offset in the map.
LDR(block, cache_base, ArithOption(pc_and_msr, true)); LSL(pc_and_feature_flags, EncodeRegTo64(DISPATCHER_PC), 1);
BFI(pc_and_feature_flags, EncodeRegTo64(feature_flags), 33, 31);
LDR(block, cache_base, pc_and_feature_flags);
FixupBranch not_found = CBZ(block); FixupBranch not_found = CBZ(block);
BR(block); BR(block);
SetJumpTarget(not_found); SetJumpTarget(not_found);
@ -119,8 +124,8 @@ void JitArm64::GenerateAsm()
ARM64Reg cache_base = ARM64Reg::X9; ARM64Reg cache_base = ARM64Reg::X9;
ARM64Reg block = ARM64Reg::X10; ARM64Reg block = ARM64Reg::X10;
ARM64Reg pc = ARM64Reg::W11; ARM64Reg pc = ARM64Reg::W11;
ARM64Reg msr = ARM64Reg::W12; ARM64Reg feature_flags = ARM64Reg::W12;
ARM64Reg msr2 = ARM64Reg::W13; ARM64Reg feature_flags_2 = ARM64Reg::W13;
ARM64Reg entry = ARM64Reg::X14; ARM64Reg entry = ARM64Reg::X14;
// iCache[(address >> 2) & iCache_Mask]; // iCache[(address >> 2) & iCache_Mask];
@ -130,25 +135,24 @@ void JitArm64::GenerateAsm()
LDR(block, cache_base, ArithOption(EncodeRegTo64(pc_masked), true)); LDR(block, cache_base, ArithOption(EncodeRegTo64(pc_masked), true));
FixupBranch not_found = CBZ(block); FixupBranch not_found = CBZ(block);
// b.effectiveAddress != addr || b.msrBits != msr // b.effectiveAddress != addr || b.feature_flags != feature_flags
static_assert(offsetof(JitBlockData, msrBits) + 4 == static_assert(offsetof(JitBlockData, feature_flags) + 4 ==
offsetof(JitBlockData, effectiveAddress)); offsetof(JitBlockData, effectiveAddress));
LDP(IndexType::Signed, msr, pc, block, offsetof(JitBlockData, msrBits)); LDP(IndexType::Signed, feature_flags, pc, block, offsetof(JitBlockData, feature_flags));
LDR(IndexType::Unsigned, msr2, PPC_REG, PPCSTATE_OFF(msr)); LDR(IndexType::Unsigned, feature_flags_2, PPC_REG, PPCSTATE_OFF(feature_flags));
CMP(pc, DISPATCHER_PC); CMP(pc, DISPATCHER_PC);
FixupBranch pc_mismatch = B(CC_NEQ); FixupBranch pc_mismatch = B(CC_NEQ);
LDR(IndexType::Unsigned, entry, block, offsetof(JitBlockData, normalEntry)); LDR(IndexType::Unsigned, entry, block, offsetof(JitBlockData, normalEntry));
AND(msr2, msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32)); CMP(feature_flags, feature_flags_2);
CMP(msr, msr2); FixupBranch feature_flags_mismatch = B(CC_NEQ);
FixupBranch msr_mismatch = B(CC_NEQ);
// return blocks[block_num].normalEntry; // return blocks[block_num].normalEntry;
BR(entry); BR(entry);
SetJumpTarget(not_found); SetJumpTarget(not_found);
SetJumpTarget(pc_mismatch); SetJumpTarget(pc_mismatch);
SetJumpTarget(msr_mismatch); SetJumpTarget(feature_flags_mismatch);
} }
} }

View File

@ -110,7 +110,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
JitBlock& b = block_map.emplace(physical_address, JitBlock())->second; JitBlock& b = block_map.emplace(physical_address, JitBlock())->second;
b.effectiveAddress = em_address; b.effectiveAddress = em_address;
b.physicalAddress = physical_address; b.physicalAddress = physical_address;
b.msrBits = m_jit.m_ppc_state.msr.Hex & JIT_CACHE_MSR_MASK; b.feature_flags = m_jit.m_ppc_state.feature_flags;
b.linkData.clear(); b.linkData.clear();
b.fast_block_map_index = 0; b.fast_block_map_index = 0;
return &b; return &b;
@ -119,7 +119,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link, void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
const std::set<u32>& physical_addresses) const std::set<u32>& physical_addresses)
{ {
size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.msrBits); size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.feature_flags);
if (m_entry_points_ptr) if (m_entry_points_ptr)
m_entry_points_ptr[index] = block.normalEntry; m_entry_points_ptr[index] = block.normalEntry;
else else
@ -159,10 +159,10 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
} }
} }
JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr) JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, CPUEmuFeatureFlags feature_flags)
{ {
u32 translated_addr = addr; u32 translated_addr = addr;
if (UReg_MSR(msr).IR) if (feature_flags & FEATURE_FLAG_MSR_IR)
{ {
auto translated = m_jit.m_mmu.JitCache_TranslateAddress(addr); auto translated = m_jit.m_mmu.JitCache_TranslateAddress(addr);
if (!translated.valid) if (!translated.valid)
@ -176,7 +176,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
for (; iter.first != iter.second; iter.first++) for (; iter.first != iter.second; iter.first++)
{ {
JitBlock& b = iter.first->second; JitBlock& b = iter.first->second;
if (b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK)) if (b.effectiveAddress == addr && b.feature_flags == feature_flags)
return &b; return &b;
} }
@ -189,15 +189,14 @@ const u8* JitBaseBlockCache::Dispatch()
if (m_entry_points_ptr) if (m_entry_points_ptr)
{ {
u8* entry_point = u8* entry_point =
m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)]; m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)];
if (entry_point) if (entry_point)
{ {
return entry_point; return entry_point;
} }
else else
{ {
JitBlock* block = JitBlock* block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags);
MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK);
if (!block) if (!block)
return nullptr; return nullptr;
@ -207,12 +206,12 @@ const u8* JitBaseBlockCache::Dispatch()
} }
JitBlock* block = JitBlock* block =
m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)]; m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)];
if (!block || block->effectiveAddress != ppc_state.pc || if (!block || block->effectiveAddress != ppc_state.pc ||
block->msrBits != (ppc_state.msr.Hex & JIT_CACHE_MSR_MASK)) block->feature_flags != ppc_state.feature_flags)
{ {
block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK); block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags);
} }
if (!block) if (!block)
@ -374,7 +373,7 @@ void JitBaseBlockCache::LinkBlockExits(JitBlock& block)
{ {
if (!e.linkStatus) if (!e.linkStatus)
{ {
JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.msrBits); JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.feature_flags);
if (destinationBlock) if (destinationBlock)
{ {
WriteLinkBlock(e, destinationBlock); WriteLinkBlock(e, destinationBlock);
@ -393,7 +392,7 @@ void JitBaseBlockCache::LinkBlock(JitBlock& block)
for (JitBlock* b2 : it->second) for (JitBlock* b2 : it->second)
{ {
if (block.msrBits == b2->msrBits) if (block.feature_flags == b2->feature_flags)
LinkBlockExits(*b2); LinkBlockExits(*b2);
} }
} }
@ -412,7 +411,7 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block)
return; return;
for (JitBlock* sourceBlock : it->second) for (JitBlock* sourceBlock : it->second)
{ {
if (sourceBlock->msrBits != block.msrBits) if (sourceBlock->feature_flags != block.feature_flags)
continue; continue;
for (auto& e : sourceBlock->linkData) for (auto& e : sourceBlock->linkData)
@ -460,9 +459,9 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block)
WriteDestroyBlock(block); WriteDestroyBlock(block);
} }
JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr) JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, CPUEmuFeatureFlags feature_flags)
{ {
JitBlock* block = GetBlockFromStartAddress(addr, msr); JitBlock* block = GetBlockFromStartAddress(addr, feature_flags);
if (!block) if (!block)
return nullptr; return nullptr;
@ -484,7 +483,7 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
} }
// And create a new one // And create a new one
size_t index = FastLookupIndexForAddress(addr, msr); size_t index = FastLookupIndexForAddress(addr, feature_flags);
if (m_entry_points_ptr) if (m_entry_points_ptr)
m_entry_points_ptr[index] = block->normalEntry; m_entry_points_ptr[index] = block->normalEntry;
else else
@ -494,11 +493,11 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
return block; return block;
} }
size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 msr) size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 feature_flags)
{ {
if (m_entry_points_ptr) if (m_entry_points_ptr)
{ {
return ((msr & JIT_CACHE_MSR_MASK) << 26) | (address >> 2); return (feature_flags << 30) | (address >> 2);
} }
else else
{ {

View File

@ -17,6 +17,7 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "Core/PowerPC/Gekko.h"
class JitBase; class JitBase;
@ -33,8 +34,8 @@ struct JitBlockData
// The normal entry point for the block, returned by Dispatch(). // The normal entry point for the block, returned by Dispatch().
u8* normalEntry; u8* normalEntry;
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK. // The features that this block was compiled with support for.
u32 msrBits; CPUEmuFeatureFlags feature_flags;
// The effective address (PC) for the beginning of the block. // The effective address (PC) for the beginning of the block.
u32 effectiveAddress; u32 effectiveAddress;
// The physical address of the code represented by this block. // The physical address of the code represented by this block.
@ -48,8 +49,8 @@ struct JitBlockData
// The number of PPC instructions represented by this block. Mostly // The number of PPC instructions represented by this block. Mostly
// useful for logging. // useful for logging.
u32 originalSize; u32 originalSize;
// This tracks the position if this block within the fast block cache. // This tracks the position of this block within the fast block cache.
// We allow each block to have only one map entry. // We only allow each block to have one map entry.
size_t fast_block_map_index; size_t fast_block_map_index;
}; };
static_assert(std::is_standard_layout_v<JitBlockData>, "JitBlockData must have a standard layout"); static_assert(std::is_standard_layout_v<JitBlockData>, "JitBlockData must have a standard layout");
@ -128,13 +129,9 @@ public:
class JitBaseBlockCache class JitBaseBlockCache
{ {
public: public:
// Mask for the MSR bits which determine whether a compiled block // The size of the fast map is determined like this:
// is valid (MSR.IR and MSR.DR, the address translation bits). // ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits)
static constexpr u32 JIT_CACHE_MSR_MASK = 0x30; static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000;
// The value for the map is determined like this:
// ((4 GB guest memory space) / (4 bytes per address) * sizeof(JitBlock*)) * (4 for 2 bits of msr)
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1; static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1;
@ -157,7 +154,7 @@ public:
// Look for the block in the slow but accurate way. // Look for the block in the slow but accurate way.
// This function shall be used if FastLookupIndexForAddress() failed. // This function shall be used if FastLookupIndexForAddress() failed.
// This might return nullptr if there is no such block. // This might return nullptr if there is no such block.
JitBlock* GetBlockFromStartAddress(u32 em_address, u32 msr); JitBlock* GetBlockFromStartAddress(u32 em_address, CPUEmuFeatureFlags feature_flags);
// Get the normal entry for the block associated with the current program // Get the normal entry for the block associated with the current program
// counter. This will JIT code if necessary. (This is the reference // counter. This will JIT code if necessary. (This is the reference
@ -185,7 +182,7 @@ private:
void UnlinkBlock(const JitBlock& block); void UnlinkBlock(const JitBlock& block);
void InvalidateICacheInternal(u32 physical_address, u32 address, u32 length, bool forced); void InvalidateICacheInternal(u32 physical_address, u32 address, u32 length, bool forced);
JitBlock* MoveBlockIntoFastCache(u32 em_address, u32 msr); JitBlock* MoveBlockIntoFastCache(u32 em_address, CPUEmuFeatureFlags feature_flags);
// Fast but risky block lookup based on fast_block_map. // Fast but risky block lookup based on fast_block_map.
size_t FastLookupIndexForAddress(u32 address, u32 msr); size_t FastLookupIndexForAddress(u32 address, u32 msr);

View File

@ -187,12 +187,14 @@ JitInterface::GetHostCode(u32 address) const
} }
auto& ppc_state = m_system.GetPPCState(); auto& ppc_state = m_system.GetPPCState();
JitBlock* block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.msr.Hex); JitBlock* block =
m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.feature_flags);
if (!block) if (!block)
{ {
for (int i = 0; i < 500; i++) for (int i = 0; i < 500; i++)
{ {
block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i, ppc_state.msr.Hex); block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i,
ppc_state.feature_flags);
if (block) if (block)
break; break;
} }

View File

@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func)
func->flags = flags; func->flags = flags;
} }
static bool IsMtspr(UGeckoInstruction inst)
{
return inst.OPCD == 31 && inst.SUBOP10 == 467;
}
static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst)
{
const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
return index == SPR_MMCR0 || index == SPR_MMCR1;
}
static bool InstructionCanEndBlock(const CodeOp& op)
{
return (op.opinfo->flags & FL_ENDBLOCK) &&
(!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst));
}
bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
{ {
const GekkoOPInfo* a_info = a.opinfo; const GekkoOPInfo* a_info = a.opinfo;
@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
// [1] https://bugs.dolphin-emu.org/issues/5864#note-7 // [1] https://bugs.dolphin-emu.org/issues/5864#note-7
if (a.canCauseException || b.canCauseException) if (a.canCauseException || b.canCauseException)
return false; return false;
if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) if (a.canEndBlock || b.canEndBlock)
return false; return false;
if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE)) if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false;
if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false; return false;
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA))) if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
return false; return false;
@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code,
code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0; code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0;
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0; code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0; code->canEndBlock = InstructionCanEndBlock(*code);
code->canCauseException = first_fpu_instruction || code->canCauseException = first_fpu_instruction ||
(opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 || (opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 ||
@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
{ {
// Just pick the next instruction // Just pick the next instruction
address += 4; address += 4;
if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early
{ {
found_exit = true; found_exit = true;
break; break;

View File

@ -374,7 +374,7 @@ constexpr std::array<GekkoOPTemplate, 107> s_table31{{
{210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION}, {210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION},
{242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION}, {242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION},
{339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION}, {339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION},
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION}, {467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION},
{371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION}, {371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION},
{512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA}, {512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA},
{595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION}, {595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION},

View File

@ -137,6 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p)
} }
RoundingModeUpdated(m_ppc_state); RoundingModeUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);
auto& mmu = m_system.GetMMU(); auto& mmu = m_system.GetMMU();
mmu.IBATUpdated(); mmu.IBATUpdated();
@ -194,8 +195,6 @@ void PowerPCManager::ResetRegisters()
} }
m_ppc_state.SetXER({}); m_ppc_state.SetXER({});
RoundingModeUpdated(m_ppc_state);
auto& mmu = m_system.GetMMU(); auto& mmu = m_system.GetMMU();
mmu.DBATUpdated(); mmu.DBATUpdated();
mmu.IBATUpdated(); mmu.IBATUpdated();
@ -208,6 +207,9 @@ void PowerPCManager::ResetRegisters()
m_ppc_state.msr.Hex = 0; m_ppc_state.msr.Hex = 0;
m_ppc_state.spr[SPR_DEC] = 0xFFFFFFFF; m_ppc_state.spr[SPR_DEC] = 0xFFFFFFFF;
SystemTimers::DecrementerSet(); SystemTimers::DecrementerSet();
RoundingModeUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);
} }
void PowerPCManager::InitializeCPUCore(CPUCore cpu_core) void PowerPCManager::InitializeCPUCore(CPUCore cpu_core)
@ -581,15 +583,15 @@ void PowerPCManager::CheckExceptions()
DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT"); DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT");
m_ppc_state.Exceptions &= ~EXCEPTION_ALIGNMENT; m_ppc_state.Exceptions &= ~EXCEPTION_ALIGNMENT;
} }
// EXTERNAL INTERRUPT
else else
{ {
// EXTERNAL INTERRUPT
CheckExternalExceptions(); CheckExternalExceptions();
return; return;
} }
m_system.GetJitInterface().UpdateMembase(); m_system.GetJitInterface().UpdateMembase();
MSRUpdated(m_ppc_state);
} }
void PowerPCManager::CheckExternalExceptions() void PowerPCManager::CheckExternalExceptions()
@ -642,6 +644,7 @@ void PowerPCManager::CheckExternalExceptions()
ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}", ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}",
exceptions); exceptions);
} }
MSRUpdated(m_ppc_state);
} }
m_system.GetJitInterface().UpdateMembase(); m_system.GetJitInterface().UpdateMembase();
@ -700,6 +703,36 @@ void RoundingModeUpdated(PowerPCState& ppc_state)
Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI); Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI);
} }
void MSRUpdated(PowerPCState& ppc_state)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3));
}
void MMCRUpdated(PowerPCState& ppc_state)
{
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0));
}
void RecalculateAllFeatureFlags(PowerPCState& ppc_state)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(((ppc_state.msr.Hex >> 4) & 0x3) |
(perfmon ? FEATURE_FLAG_PERFMON : 0));
}
void CheckExceptionsFromJIT(PowerPCManager& power_pc) void CheckExceptionsFromJIT(PowerPCManager& power_pc)
{ {
power_pc.CheckExceptions(); power_pc.CheckExceptions();

View File

@ -141,6 +141,8 @@ struct PowerPCState
UReg_MSR msr; // machine state register UReg_MSR msr; // machine state register
UReg_FPSCR fpscr; // floating point flags/status bits UReg_FPSCR fpscr; // floating point flags/status bits
CPUEmuFeatureFlags feature_flags;
// Exception management. // Exception management.
u32 Exceptions = 0; u32 Exceptions = 0;
@ -346,5 +348,8 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc);
#define TU(ppc_state) (ppc_state).spr[SPR_TU] #define TU(ppc_state) (ppc_state).spr[SPR_TU]
void RoundingModeUpdated(PowerPCState& ppc_state); void RoundingModeUpdated(PowerPCState& ppc_state);
void MSRUpdated(PowerPCState& ppc_state);
void MMCRUpdated(PowerPCState& ppc_state);
void RecalculateAllFeatureFlags(PowerPCState& ppc_state);
} // namespace PowerPC } // namespace PowerPC

View File

@ -448,7 +448,10 @@ void RegisterWidget::PopulateTable()
// MSR // MSR
AddRegister( AddRegister(
23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; }, 23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; },
[this](u64 value) { m_system.GetPPCState().msr.Hex = value; }); [this](u64 value) {
m_system.GetPPCState().msr.Hex = value;
PowerPC::MSRUpdated(m_system.GetPPCState());
});
// SRR 0-1 // SRR 0-1
AddRegister( AddRegister(