Merge pull request #11988 from JosJuice/jit-feature-flags

Jit: Replace "msrBits" with "featureFlags" and use for performance monitor
This commit is contained in:
Mai 2023-12-05 03:13:50 -05:00 committed by GitHub
commit a65246ec3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 291 additions and 140 deletions

View File

@ -460,6 +460,9 @@ bool CBoot::Load_BS2(Core::System& system, const std::string& boot_rom_filename)
SetupBAT(system, /*is_wii*/ false);
ppc_state.pc = 0x81200150;
PowerPC::MSRUpdated(ppc_state);
return true;
}

View File

@ -74,6 +74,7 @@ void CBoot::SetupMSR(PowerPC::PowerPCState& ppc_state)
ppc_state.msr.DR = 1;
ppc_state.msr.IR = 1;
ppc_state.msr.FP = 1;
PowerPC::MSRUpdated(ppc_state);
}
void CBoot::SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii)

View File

@ -651,6 +651,8 @@ void FifoPlayer::LoadMemory()
ppc_state.spr[SPR_DBAT1U] = 0xc0001fff;
ppc_state.spr[SPR_DBAT1L] = 0x0000002a;
PowerPC::MSRUpdated(ppc_state);
auto& mmu = system.GetMMU();
mmu.DBATUpdated();
mmu.IBATUpdated();

View File

@ -84,10 +84,15 @@ bool Load()
}
auto& power_pc = system.GetPowerPC();
const PowerPC::CoreMode core_mode = power_pc.GetMode();
power_pc.SetMode(PowerPC::CoreMode::Interpreter);
power_pc.GetPPCState().msr.Hex = 0;
power_pc.GetPPCState().pc = 0x3400;
PowerPC::PowerPCState& ppc_state = power_pc.GetPPCState();
ppc_state.msr.Hex = 0;
ppc_state.pc = 0x3400;
PowerPC::MSRUpdated(ppc_state);
NOTICE_LOG_FMT(IOS, "Loaded MIOS and bootstrapped PPC.");
// IOS writes 0 to 0x30f8 before bootstrapping the PPC. Once started, the IPL eventually writes

View File

@ -661,6 +661,7 @@ static void WriteRegister()
break;
case 65:
ppc_state.msr.Hex = re32hex(bufptr);
PowerPC::MSRUpdated(ppc_state);
break;
case 66:
ppc_state.cr.Set(re32hex(bufptr));
@ -760,6 +761,7 @@ static void WriteRegister()
break;
case 131:
ppc_state.spr[SPR_MMCR0] = re32hex(bufptr);
PowerPC::MMCRUpdated(ppc_state);
break;
case 132:
ppc_state.spr[SPR_PMC1] = re32hex(bufptr);
@ -772,6 +774,7 @@ static void WriteRegister()
break;
case 135:
ppc_state.spr[SPR_MMCR1] = re32hex(bufptr);
PowerPC::MMCRUpdated(ppc_state);
break;
case 136:
ppc_state.spr[SPR_PMC3] = re32hex(bufptr);

View File

@ -926,6 +926,13 @@ enum
EXCEPTION_FAKE_MEMCHECK_HIT = 0x00000200,
};
enum CPUEmuFeatureFlags : u32
{
FEATURE_FLAG_MSR_DR = 1 << 0,
FEATURE_FLAG_MSR_IR = 1 << 1,
FEATURE_FLAG_PERFMON = 1 << 2,
};
constexpr s32 SignExt16(s16 x)
{
return (s32)x;

View File

@ -134,6 +134,9 @@ void Interpreter::rfi(Interpreter& interpreter, UGeckoInstruction inst)
// else
// set NPC to saved offset and resume
ppc_state.npc = SRR0(ppc_state);
PowerPC::MSRUpdated(ppc_state);
interpreter.m_end_block = true;
}

View File

@ -181,6 +181,8 @@ void Interpreter::mtmsr(Interpreter& interpreter, UGeckoInstruction inst)
ppc_state.msr.Hex = ppc_state.gpr[inst.RS];
PowerPC::MSRUpdated(ppc_state);
// FE0/FE1 may have been set
CheckFPExceptions(ppc_state);
@ -489,6 +491,11 @@ void Interpreter::mtspr(Interpreter& interpreter, UGeckoInstruction inst)
}
break;
case SPR_MMCR0:
case SPR_MMCR1:
MMCRUpdated(ppc_state);
break;
case SPR_THRM1:
case SPR_THRM2:
case SPR_THRM3:

View File

@ -337,7 +337,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
MOV(32, PPCSTATE(npc), Imm32(js.compilerPC + 4));
@ -353,7 +353,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Reset(js.op->regsOut);
fpr.Reset(js.op->GetFregsOut());
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
if (js.isLastInstruction)
{
@ -445,8 +445,7 @@ bool Jit64::Cleanup()
did_something = true;
}
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON)
{
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionCCCP(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
@ -483,8 +482,7 @@ void Jit64::FakeBLCall(u32 after)
// We may need to fake the BLR stack on inlined CALL instructions.
// Else we can't return to this location any more.
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after));
PUSH(RSCRATCH2);
FixupBranch skip_exit = CALL();
POP(RSCRATCH2);
@ -497,8 +495,11 @@ void Jit64::EmitUpdateMembase()
MOV(64, R(RMEM), PPCSTATE(mem_ptr));
}
void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg)
void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
{
ASSERT(!msr.IsSimpleReg(scratch_reg));
// Update mem_ptr
auto& memory = m_system.GetMemory();
if (msr.IsImm())
{
@ -513,6 +514,26 @@ void Jit64::EmitStoreMembase(const OpArg& msr, X64Reg scratch_reg)
CMOVcc(64, RMEM, R(scratch_reg), CC_Z);
}
MOV(64, PPCSTATE(mem_ptr), R(RMEM));
// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
if (msr.IsImm())
{
MOV(32, PPCSTATE(feature_flags), Imm32(other_feature_flags | ((msr.Imm32() >> 4) & 0x3)));
}
else
{
MOV(32, R(scratch_reg), msr);
SHR(32, R(scratch_reg), Imm8(4));
AND(32, R(scratch_reg), Imm32(0x3));
if (other_feature_flags != 0)
OR(32, R(scratch_reg), Imm32(other_feature_flags));
MOV(32, PPCSTATE(feature_flags), R(scratch_reg));
}
}
void Jit64::WriteExit(u32 destination, bool bl, u32 after)
@ -524,8 +545,7 @@ void Jit64::WriteExit(u32 destination, bool bl, u32 after)
if (bl)
{
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after));
PUSH(RSCRATCH2);
}
@ -582,8 +602,7 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)
if (bl)
{
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
MOV(64, R(RSCRATCH2), Imm64(u64(m_ppc_state.feature_flags) << 32 | after));
PUSH(RSCRATCH2);
}
@ -611,10 +630,9 @@ void Jit64::WriteBLRExit()
bool disturbed = Cleanup();
if (disturbed)
MOV(32, R(RSCRATCH), PPCSTATE(pc));
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (msr_bits != 0)
if (m_ppc_state.feature_flags != 0)
{
MOV(32, R(RSCRATCH2), Imm32(msr_bits));
MOV(32, R(RSCRATCH2), Imm32(m_ppc_state.feature_flags));
SHL(64, R(RSCRATCH2), Imm8(32));
OR(64, R(RSCRATCH), R(RSCRATCH2));
}

View File

@ -88,7 +88,7 @@ public:
// Utilities for use by opcodes
void EmitUpdateMembase();
void EmitStoreMembase(const Gen::OpArg& msr, Gen::X64Reg scratch_reg);
void MSRUpdated(const Gen::OpArg& msr, Gen::X64Reg scratch_reg);
void FakeBLCall(u32 after);
void WriteExit(u32 destination, bool bl = false, u32 after = 0);
void JustWriteExit(u32 destination, bool bl, u32 after);

View File

@ -19,6 +19,7 @@
#include "Core/System.h"
using namespace Gen;
Jit64AsmRoutineManager::Jit64AsmRoutineManager(Jit64& jit) : CommonAsmRoutines(jit)
{
}
@ -118,19 +119,17 @@ void Jit64AsmRoutineManager::Generate()
{
if (m_jit.GetBlockCache()->GetEntryPoints())
{
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
SHL(64, R(RSCRATCH2), Imm8(28));
MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags));
SHL(64, R(RSCRATCH2), Imm8(32));
MOV(32, R(RSCRATCH_EXTRA), PPCSTATE(pc));
OR(64, R(RSCRATCH_EXTRA), R(RSCRATCH2));
u64 icache = reinterpret_cast<u64>(m_jit.GetBlockCache()->GetEntryPoints());
MOV(64, R(RSCRATCH2), Imm64(icache));
// The entry points map is indexed by ((msrBits << 26) | (address >> 2)).
// The map contains 8 byte 64-bit pointers and that means we need to shift
// msr left by 29 bits and address left by 1 bit to get the correct offset
// in the map.
// The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)).
// The map contains 8-byte pointers and that means we need to shift feature_flags
// left by 33 bits and pc left by 1 bit to get the correct offset in the map.
MOV(64, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH_EXTRA, SCALE_2, 0));
}
else
@ -160,17 +159,17 @@ void Jit64AsmRoutineManager::Generate()
if (!m_jit.GetBlockCache()->GetEntryPoints())
{
// Check block.msrBits.
MOV(32, R(RSCRATCH2), PPCSTATE(msr));
AND(32, R(RSCRATCH2), Imm32(JitBaseBlockCache::JIT_CACHE_MSR_MASK));
// Check block.feature_flags.
MOV(32, R(RSCRATCH2), PPCSTATE(feature_flags));
// Also check the block.effectiveAddress. RSCRATCH_EXTRA still has the PC.
SHL(64, R(RSCRATCH_EXTRA), Imm8(32));
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA));
static_assert(offsetof(JitBlockData, msrBits) + 4 ==
static_assert(offsetof(JitBlockData, feature_flags) + 4 ==
offsetof(JitBlockData, effectiveAddress));
CMP(64, R(RSCRATCH2), MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, msrBits))));
CMP(64, R(RSCRATCH2),
MDisp(RSCRATCH, static_cast<s32>(offsetof(JitBlockData, feature_flags))));
state_mismatch = J_CC(CC_NE);
// Success; branch to the block we found.

View File

@ -47,6 +47,7 @@ void Jit64::rfi(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
// See Interpreter rfi for details
const u32 mask = 0x87C0FFFF;
const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13]
@ -56,7 +57,9 @@ void Jit64::rfi(UGeckoInstruction inst)
AND(32, R(RSCRATCH), Imm32(mask & clearMSR13));
OR(32, PPCSTATE(msr), R(RSCRATCH));
EmitStoreMembase(R(RSCRATCH), RSCRATCH2);
// Call MSRUpdated to update feature_flags. Only the bits that come from SRR1
// are relevant for this, so it's fine to pass in RSCRATCH in place of msr.
MSRUpdated(R(RSCRATCH), RSCRATCH2);
// NPC = SRR0;
MOV(32, R(RSCRATCH), PPCSTATE_SRR0);

View File

@ -320,7 +320,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
FixupBranch bat_lookup_failed;
MOV(32, R(effective_address), R(addr));
const u8* loop_start = GetCodePtr();
if (m_ppc_state.msr.IR)
if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
{
// Translate effective address to physical address.
bat_lookup_failed = BATAddressLookup(addr, tmp, m_jit.m_mmu.GetIBATTable().data());
@ -349,7 +349,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(invalidate_needed);
if (m_ppc_state.msr.IR)
if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
SetJumpTarget(bat_lookup_failed);
BitSet32 registersInUse = CallerSavedRegistersInUse();
@ -421,7 +421,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
end_dcbz_hack = J_CC(CC_L);
}
bool emit_fast_path = m_ppc_state.msr.DR && m_jit.jo.fastmem_arena;
bool emit_fast_path = (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR) && m_jit.jo.fastmem_arena;
if (emit_fast_path)
{

View File

@ -23,7 +23,7 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!m_ppc_state.msr.DR);
FALLBACK_IF(!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4;
@ -112,7 +112,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
// For performance, the AsmCommon routines assume address translation is on.
FALLBACK_IF(!m_ppc_state.msr.DR);
FALLBACK_IF(!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
s32 offset = inst.SIMM_12;
bool indexed = inst.OPCD == 4;

View File

@ -439,7 +439,7 @@ void Jit64::mtmsr(UGeckoInstruction inst)
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
EmitStoreMembase(Rs, RSCRATCH2);
MSRUpdated(Rs, RSCRATCH2);
}
gpr.Flush();

View File

@ -371,7 +371,8 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
}
FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR;
const bool dr_set =
(flags & SAFE_LOADSTORE_DR_ON) || (m_jit.m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR);
const bool fast_check_address =
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address)
@ -544,7 +545,8 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
}
FixupBranch exit;
const bool dr_set = (flags & SAFE_LOADSTORE_DR_ON) || m_jit.m_ppc_state.msr.DR;
const bool dr_set =
(flags & SAFE_LOADSTORE_DR_ON) || (m_jit.m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR);
const bool fast_check_address =
!force_slow_access && dr_set && m_jit.jo.fastmem_arena && !m_jit.m_ppc_state.m_enable_dcache;
if (fast_check_address)

View File

@ -187,7 +187,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
// also flush the program counter
ARM64Reg WA = gpr.GetReg();
@ -207,7 +207,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
fpr.ResetRegisters(js.op->GetFregsOut());
gpr.ResetCRRegisters(js.op->crOut);
if (js.op->opinfo->flags & FL_ENDBLOCK)
if (js.op->canEndBlock)
{
if (js.isLastInstruction)
{
@ -276,8 +276,7 @@ void JitArm64::Cleanup()
SetJumpTarget(exit);
}
// SPEED HACK: MMCR0/MMCR1 should be checked at run-time, not at compile time.
if (MMCR0(m_ppc_state).Hex || MMCR1(m_ppc_state).Hex)
if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON)
{
ABI_CallFunction(&PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
js.numFloatingPointInst, &m_ppc_state);
@ -348,27 +347,61 @@ void JitArm64::EmitUpdateMembase()
LDR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
}
void JitArm64::EmitStoreMembase(u32 msr)
void JitArm64::MSRUpdated(u32 msr)
{
// Update mem_ptr
auto& memory = m_system.GetMemory();
MOVP2R(MEM_REG,
UReg_MSR(msr).DR ?
(jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()) :
(jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()));
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
const u32 feature_flags = other_feature_flags | ((msr >> 4) & 0x3);
if (feature_flags == 0)
{
STR(IndexType::Unsigned, ARM64Reg::WZR, PPC_REG, PPCSTATE_OFF(feature_flags));
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, feature_flags);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
gpr.Unlock(WA);
}
}
void JitArm64::EmitStoreMembase(const ARM64Reg& msr)
void JitArm64::MSRUpdated(ARM64Reg msr)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
// Update mem_ptr
auto& memory = m_system.GetMemory();
ARM64Reg WD = gpr.GetReg();
ARM64Reg XD = EncodeRegTo64(WD);
MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XD, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
TST(msr, LogicalImm(1 << (31 - 27), 32));
CSEL(MEM_REG, MEM_REG, XD, CCFlags::CC_NEQ);
CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ);
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
gpr.Unlock(WD);
// Update feature_flags
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const u32 other_feature_flags = m_ppc_state.feature_flags & ~0x3;
UBFX(WA, msr, 4, 2);
if (other_feature_flags != 0)
ORR(WA, WA, LogicalImm(32, other_feature_flags));
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
gpr.Unlock(WA);
}
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,
@ -383,20 +416,20 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
const u8* host_address_after_return;
if (LK)
{
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
// Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
else if (feature_flags == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
ARM64Reg::X1);
}
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
@ -487,20 +520,20 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
}
else
{
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
// Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
MOVI2R(ARM64Reg::X1, feature_flags << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
else if (feature_flags == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
ARM64Reg::X1);
}
constexpr s32 adr_offset = sizeof(u32) * 3;
@ -558,17 +591,17 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a
// function has been called!
gpr.Lock(ARM64Reg::W30);
}
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
// Push {ARM_PC (64-bit); PPC_PC (32-bit); feature_flags (32-bit)} on the stack
ARM64Reg after_reg = ARM64Reg::INVALID_REG;
ARM64Reg reg_to_push;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
const u64 feature_flags = m_ppc_state.feature_flags;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg);
MOVI2R(reg_to_push, msr_bits << 32 | exit_address_after_return);
MOVI2R(reg_to_push, feature_flags << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
else if (feature_flags == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
@ -576,7 +609,8 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a
{
after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg);
ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, reg_to_push);
ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), feature_flags << 32,
reg_to_push);
}
ARM64Reg code_reg = gpr.GetReg();
constexpr s32 adr_offset = sizeof(u32) * 3;
@ -640,16 +674,16 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
Cleanup();
EndTimeProfile(js.curBlock);
// Check if {PPC_PC, MSR_BITS} matches the current state, then RET to ARM_PC.
// Check if {PPC_PC, feature_flags} matches the current state, then RET to ARM_PC.
LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16);
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (msr_bits == 0)
const u64 feature_flags = m_ppc_state.feature_flags;
if (feature_flags == 0)
{
CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC));
}
else
{
ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), msr_bits << 32, ARM64Reg::X0);
ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), feature_flags << 32, ARM64Reg::X0);
CMP(ARM64Reg::X1, ARM64Reg::X0);
}
FixupBranch no_match = B(CC_NEQ);

View File

@ -310,8 +310,8 @@ protected:
void EndTimeProfile(JitBlock* b);
void EmitUpdateMembase();
void EmitStoreMembase(u32 msr);
void EmitStoreMembase(const Arm64Gen::ARM64Reg& msr);
void MSRUpdated(u32 msr);
void MSRUpdated(Arm64Gen::ARM64Reg msr);
// Exits
void

View File

@ -64,11 +64,11 @@ void JitArm64::rfi(UGeckoInstruction inst)
ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA
gpr.Unlock(WB, WC);
EmitStoreMembase(WA);
MSRUpdated(WA);
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_SRR0));
gpr.Unlock(WB, WC);
WriteExceptionExit(WA);
gpr.Unlock(WA);

View File

@ -727,7 +727,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
// Translate effective address to physical address.
const u8* loop_start = GetCodePtr();
FixupBranch bat_lookup_failed;
if (m_ppc_state.msr.IR)
if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
{
bat_lookup_failed =
BATAddressLookup(physical_addr, effective_addr, WA, m_mmu.GetIBATTable().data());
@ -756,7 +756,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(invalidate_needed);
if (m_ppc_state.msr.IR)
if (m_ppc_state.feature_flags & FEATURE_FLAG_MSR_IR)
SetJumpTarget(bat_lookup_failed);
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();

View File

@ -23,7 +23,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
// If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR);
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem &&
!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
// X30 is LR
// X0 is the address
@ -151,7 +152,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
JITDISABLE(bJITLoadStorePairedOff);
// If fastmem is enabled, the asm routines assume address translation is on.
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem && !m_ppc_state.msr.DR);
FALLBACK_IF(!js.assumeNoPairedQuantize && jo.fastmem &&
!(m_ppc_state.feature_flags & FEATURE_FLAG_MSR_DR));
// X30 is LR
// X0 contains the scale

View File

@ -94,12 +94,12 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
const bool imm_value = gpr.IsImm(inst.RS);
if (imm_value)
EmitStoreMembase(gpr.GetImm(inst.RS));
MSRUpdated(gpr.GetImm(inst.RS));
STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF(msr));
if (!imm_value)
EmitStoreMembase(gpr.R(inst.RS));
MSRUpdated(gpr.R(inst.RS));
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);

View File

@ -100,15 +100,20 @@ void JitArm64::GenerateAsm()
if (GetBlockCache()->GetEntryPoints())
{
// Check if there is a block
ARM64Reg pc_and_msr = ARM64Reg::X8;
ARM64Reg cache_base = ARM64Reg::X9;
ARM64Reg block = ARM64Reg::X10;
LDR(IndexType::Unsigned, EncodeRegTo32(pc_and_msr), PPC_REG, PPCSTATE_OFF(msr));
ARM64Reg feature_flags = ARM64Reg::W8;
ARM64Reg pc_and_feature_flags = ARM64Reg::X9;
ARM64Reg cache_base = ARM64Reg::X10;
ARM64Reg block = ARM64Reg::X11;
LDR(IndexType::Unsigned, feature_flags, PPC_REG, PPCSTATE_OFF(feature_flags));
MOVP2R(cache_base, GetBlockCache()->GetEntryPoints());
// The entry points map is indexed by ((msrBits << 26) | (address >> 2)).
UBFIZ(pc_and_msr, pc_and_msr, 26, 6);
BFXIL(pc_and_msr, EncodeRegTo64(DISPATCHER_PC), 2, 30);
LDR(block, cache_base, ArithOption(pc_and_msr, true));
// The entry points map is indexed by ((feature_flags << 30) | (pc >> 2)).
// The map contains 8-byte pointers and that means we need to shift feature_flags
// left by 33 bits and pc left by 1 bit to get the correct offset in the map.
LSL(pc_and_feature_flags, EncodeRegTo64(DISPATCHER_PC), 1);
BFI(pc_and_feature_flags, EncodeRegTo64(feature_flags), 33, 31);
LDR(block, cache_base, pc_and_feature_flags);
FixupBranch not_found = CBZ(block);
BR(block);
SetJumpTarget(not_found);
@ -119,8 +124,8 @@ void JitArm64::GenerateAsm()
ARM64Reg cache_base = ARM64Reg::X9;
ARM64Reg block = ARM64Reg::X10;
ARM64Reg pc = ARM64Reg::W11;
ARM64Reg msr = ARM64Reg::W12;
ARM64Reg msr2 = ARM64Reg::W13;
ARM64Reg feature_flags = ARM64Reg::W12;
ARM64Reg feature_flags_2 = ARM64Reg::W13;
ARM64Reg entry = ARM64Reg::X14;
// iCache[(address >> 2) & iCache_Mask];
@ -130,25 +135,24 @@ void JitArm64::GenerateAsm()
LDR(block, cache_base, ArithOption(EncodeRegTo64(pc_masked), true));
FixupBranch not_found = CBZ(block);
// b.effectiveAddress != addr || b.msrBits != msr
static_assert(offsetof(JitBlockData, msrBits) + 4 ==
// b.effectiveAddress != addr || b.feature_flags != feature_flags
static_assert(offsetof(JitBlockData, feature_flags) + 4 ==
offsetof(JitBlockData, effectiveAddress));
LDP(IndexType::Signed, msr, pc, block, offsetof(JitBlockData, msrBits));
LDR(IndexType::Unsigned, msr2, PPC_REG, PPCSTATE_OFF(msr));
LDP(IndexType::Signed, feature_flags, pc, block, offsetof(JitBlockData, feature_flags));
LDR(IndexType::Unsigned, feature_flags_2, PPC_REG, PPCSTATE_OFF(feature_flags));
CMP(pc, DISPATCHER_PC);
FixupBranch pc_mismatch = B(CC_NEQ);
LDR(IndexType::Unsigned, entry, block, offsetof(JitBlockData, normalEntry));
AND(msr2, msr2, LogicalImm(JitBaseBlockCache::JIT_CACHE_MSR_MASK, 32));
CMP(msr, msr2);
FixupBranch msr_mismatch = B(CC_NEQ);
CMP(feature_flags, feature_flags_2);
FixupBranch feature_flags_mismatch = B(CC_NEQ);
// return blocks[block_num].normalEntry;
BR(entry);
SetJumpTarget(not_found);
SetJumpTarget(pc_mismatch);
SetJumpTarget(msr_mismatch);
SetJumpTarget(feature_flags_mismatch);
}
}

View File

@ -110,7 +110,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
JitBlock& b = block_map.emplace(physical_address, JitBlock())->second;
b.effectiveAddress = em_address;
b.physicalAddress = physical_address;
b.msrBits = m_jit.m_ppc_state.msr.Hex & JIT_CACHE_MSR_MASK;
b.feature_flags = m_jit.m_ppc_state.feature_flags;
b.linkData.clear();
b.fast_block_map_index = 0;
return &b;
@ -119,7 +119,7 @@ JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
const std::set<u32>& physical_addresses)
{
size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.msrBits);
size_t index = FastLookupIndexForAddress(block.effectiveAddress, block.feature_flags);
if (m_entry_points_ptr)
m_entry_points_ptr[index] = block.normalEntry;
else
@ -159,10 +159,10 @@ void JitBaseBlockCache::FinalizeBlock(JitBlock& block, bool block_link,
}
}
JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, CPUEmuFeatureFlags feature_flags)
{
u32 translated_addr = addr;
if (UReg_MSR(msr).IR)
if (feature_flags & FEATURE_FLAG_MSR_IR)
{
auto translated = m_jit.m_mmu.JitCache_TranslateAddress(addr);
if (!translated.valid)
@ -176,7 +176,7 @@ JitBlock* JitBaseBlockCache::GetBlockFromStartAddress(u32 addr, u32 msr)
for (; iter.first != iter.second; iter.first++)
{
JitBlock& b = iter.first->second;
if (b.effectiveAddress == addr && b.msrBits == (msr & JIT_CACHE_MSR_MASK))
if (b.effectiveAddress == addr && b.feature_flags == feature_flags)
return &b;
}
@ -189,15 +189,14 @@ const u8* JitBaseBlockCache::Dispatch()
if (m_entry_points_ptr)
{
u8* entry_point =
m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)];
m_entry_points_ptr[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)];
if (entry_point)
{
return entry_point;
}
else
{
JitBlock* block =
MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK);
JitBlock* block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags);
if (!block)
return nullptr;
@ -207,12 +206,12 @@ const u8* JitBaseBlockCache::Dispatch()
}
JitBlock* block =
m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.msr.Hex)];
m_fast_block_map_fallback[FastLookupIndexForAddress(ppc_state.pc, ppc_state.feature_flags)];
if (!block || block->effectiveAddress != ppc_state.pc ||
block->msrBits != (ppc_state.msr.Hex & JIT_CACHE_MSR_MASK))
block->feature_flags != ppc_state.feature_flags)
{
block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.msr.Hex & JIT_CACHE_MSR_MASK);
block = MoveBlockIntoFastCache(ppc_state.pc, ppc_state.feature_flags);
}
if (!block)
@ -374,7 +373,7 @@ void JitBaseBlockCache::LinkBlockExits(JitBlock& block)
{
if (!e.linkStatus)
{
JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.msrBits);
JitBlock* destinationBlock = GetBlockFromStartAddress(e.exitAddress, block.feature_flags);
if (destinationBlock)
{
WriteLinkBlock(e, destinationBlock);
@ -393,7 +392,7 @@ void JitBaseBlockCache::LinkBlock(JitBlock& block)
for (JitBlock* b2 : it->second)
{
if (block.msrBits == b2->msrBits)
if (block.feature_flags == b2->feature_flags)
LinkBlockExits(*b2);
}
}
@ -412,7 +411,7 @@ void JitBaseBlockCache::UnlinkBlock(const JitBlock& block)
return;
for (JitBlock* sourceBlock : it->second)
{
if (sourceBlock->msrBits != block.msrBits)
if (sourceBlock->feature_flags != block.feature_flags)
continue;
for (auto& e : sourceBlock->linkData)
@ -460,9 +459,9 @@ void JitBaseBlockCache::DestroyBlock(JitBlock& block)
WriteDestroyBlock(block);
}
JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, CPUEmuFeatureFlags feature_flags)
{
JitBlock* block = GetBlockFromStartAddress(addr, msr);
JitBlock* block = GetBlockFromStartAddress(addr, feature_flags);
if (!block)
return nullptr;
@ -484,7 +483,7 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
}
// And create a new one
size_t index = FastLookupIndexForAddress(addr, msr);
size_t index = FastLookupIndexForAddress(addr, feature_flags);
if (m_entry_points_ptr)
m_entry_points_ptr[index] = block->normalEntry;
else
@ -494,11 +493,11 @@ JitBlock* JitBaseBlockCache::MoveBlockIntoFastCache(u32 addr, u32 msr)
return block;
}
size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 msr)
size_t JitBaseBlockCache::FastLookupIndexForAddress(u32 address, u32 feature_flags)
{
if (m_entry_points_ptr)
{
return ((msr & JIT_CACHE_MSR_MASK) << 26) | (address >> 2);
return (feature_flags << 30) | (address >> 2);
}
else
{

View File

@ -17,6 +17,7 @@
#include "Common/CommonTypes.h"
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/Gekko.h"
class JitBase;
@ -33,8 +34,8 @@ struct JitBlockData
// The normal entry point for the block, returned by Dispatch().
u8* normalEntry;
// The MSR bits expected for this block to be valid; see JIT_CACHE_MSR_MASK.
u32 msrBits;
// The features that this block was compiled with support for.
CPUEmuFeatureFlags feature_flags;
// The effective address (PC) for the beginning of the block.
u32 effectiveAddress;
// The physical address of the code represented by this block.
@ -48,8 +49,8 @@ struct JitBlockData
// The number of PPC instructions represented by this block. Mostly
// useful for logging.
u32 originalSize;
// This tracks the position if this block within the fast block cache.
// We allow each block to have only one map entry.
// This tracks the position of this block within the fast block cache.
// We only allow each block to have one map entry.
size_t fast_block_map_index;
};
static_assert(std::is_standard_layout_v<JitBlockData>, "JitBlockData must have a standard layout");
@ -128,13 +129,9 @@ public:
class JitBaseBlockCache
{
public:
// Mask for the MSR bits which determine whether a compiled block
// is valid (MSR.IR and MSR.DR, the address translation bits).
static constexpr u32 JIT_CACHE_MSR_MASK = 0x30;
// The value for the map is determined like this:
// ((4 GB guest memory space) / (4 bytes per address) * sizeof(JitBlock*)) * (4 for 2 bits of msr)
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x8'0000'0000;
// The size of the fast map is determined like this:
// ((4 GiB guest memory space) / (4-byte alignment) * sizeof(JitBlock*)) << (3 feature flag bits)
static constexpr u64 FAST_BLOCK_MAP_SIZE = 0x10'0000'0000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_ELEMENTS = 0x10000;
static constexpr u32 FAST_BLOCK_MAP_FALLBACK_MASK = FAST_BLOCK_MAP_FALLBACK_ELEMENTS - 1;
@ -157,7 +154,7 @@ public:
// Look for the block in the slow but accurate way.
// This function shall be used if FastLookupIndexForAddress() failed.
// This might return nullptr if there is no such block.
JitBlock* GetBlockFromStartAddress(u32 em_address, u32 msr);
JitBlock* GetBlockFromStartAddress(u32 em_address, CPUEmuFeatureFlags feature_flags);
// Get the normal entry for the block associated with the current program
// counter. This will JIT code if necessary. (This is the reference
@ -185,7 +182,7 @@ private:
void UnlinkBlock(const JitBlock& block);
void InvalidateICacheInternal(u32 physical_address, u32 address, u32 length, bool forced);
JitBlock* MoveBlockIntoFastCache(u32 em_address, u32 msr);
JitBlock* MoveBlockIntoFastCache(u32 em_address, CPUEmuFeatureFlags feature_flags);
// Fast but risky block lookup based on fast_block_map.
size_t FastLookupIndexForAddress(u32 address, u32 msr);

View File

@ -187,12 +187,14 @@ JitInterface::GetHostCode(u32 address) const
}
auto& ppc_state = m_system.GetPPCState();
JitBlock* block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.msr.Hex);
JitBlock* block =
m_jit->GetBlockCache()->GetBlockFromStartAddress(address, ppc_state.feature_flags);
if (!block)
{
for (int i = 0; i < 500; i++)
{
block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i, ppc_state.msr.Hex);
block = m_jit->GetBlockCache()->GetBlockFromStartAddress(address - 4 * i,
ppc_state.feature_flags);
if (block)
break;
}

View File

@ -202,6 +202,23 @@ static void AnalyzeFunction2(Common::Symbol* func)
func->flags = flags;
}
static bool IsMtspr(UGeckoInstruction inst)
{
return inst.OPCD == 31 && inst.SUBOP10 == 467;
}
static bool IsSprInstructionUsingMmcr(UGeckoInstruction inst)
{
const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
return index == SPR_MMCR0 || index == SPR_MMCR1;
}
static bool InstructionCanEndBlock(const CodeOp& op)
{
return (op.opinfo->flags & FL_ENDBLOCK) &&
(!IsMtspr(op.inst) || IsSprInstructionUsingMmcr(op.inst));
}
bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
{
const GekkoOPInfo* a_info = a.opinfo;
@ -222,9 +239,11 @@ bool PPCAnalyzer::CanSwapAdjacentOps(const CodeOp& a, const CodeOp& b) const
// [1] https://bugs.dolphin-emu.org/issues/5864#note-7
if (a.canCauseException || b.canCauseException)
return false;
if (a_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
if (a.canEndBlock || b.canEndBlock)
return false;
if (b_flags & (FL_ENDBLOCK | FL_TIMER | FL_NO_REORDER | FL_SET_OE))
if (a_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false;
if (b_flags & (FL_TIMER | FL_NO_REORDER | FL_SET_OE))
return false;
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
return false;
@ -597,7 +616,7 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock* block, CodeOp* code,
code->wantsFPRF = (opinfo->flags & FL_READ_FPRF) != 0;
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) != 0;
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) != 0;
code->canEndBlock = InstructionCanEndBlock(*code);
code->canCauseException = first_fpu_instruction ||
(opinfo->flags & (FL_LOADSTORE | FL_PROGRAMEXCEPTION)) != 0 ||
@ -935,7 +954,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer,
{
// Just pick the next instruction
address += 4;
if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) // right now we stop early
if (!conditional_continue && InstructionCanEndBlock(code[i])) // right now we stop early
{
found_exit = true;
break;

View File

@ -374,7 +374,7 @@ constexpr std::array<GekkoOPTemplate, 107> s_table31{{
{210, "mtsr", OpType::System, 1, FL_IN_S | FL_PROGRAMEXCEPTION},
{242, "mtsrin", OpType::System, 1, FL_IN_SB | FL_PROGRAMEXCEPTION},
{339, "mfspr", OpType::SPR, 1, FL_OUT_D | FL_PROGRAMEXCEPTION},
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_PROGRAMEXCEPTION},
{467, "mtspr", OpType::SPR, 2, FL_IN_S | FL_ENDBLOCK | FL_PROGRAMEXCEPTION},
{371, "mftb", OpType::System, 1, FL_OUT_D | FL_TIMER | FL_PROGRAMEXCEPTION},
{512, "mcrxr", OpType::System, 1, FL_SET_CRn | FL_READ_CA | FL_SET_CA},
{595, "mfsr", OpType::System, 3, FL_OUT_D | FL_PROGRAMEXCEPTION},

View File

@ -137,6 +137,7 @@ void PowerPCManager::DoState(PointerWrap& p)
}
RoundingModeUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);
auto& mmu = m_system.GetMMU();
mmu.IBATUpdated();
@ -194,8 +195,6 @@ void PowerPCManager::ResetRegisters()
}
m_ppc_state.SetXER({});
RoundingModeUpdated(m_ppc_state);
auto& mmu = m_system.GetMMU();
mmu.DBATUpdated();
mmu.IBATUpdated();
@ -208,6 +207,9 @@ void PowerPCManager::ResetRegisters()
m_ppc_state.msr.Hex = 0;
m_ppc_state.spr[SPR_DEC] = 0xFFFFFFFF;
SystemTimers::DecrementerSet();
RoundingModeUpdated(m_ppc_state);
RecalculateAllFeatureFlags(m_ppc_state);
}
void PowerPCManager::InitializeCPUCore(CPUCore cpu_core)
@ -581,15 +583,15 @@ void PowerPCManager::CheckExceptions()
DEBUG_LOG_FMT(POWERPC, "EXCEPTION_ALIGNMENT");
m_ppc_state.Exceptions &= ~EXCEPTION_ALIGNMENT;
}
// EXTERNAL INTERRUPT
else
{
// EXTERNAL INTERRUPT
CheckExternalExceptions();
return;
}
m_system.GetJitInterface().UpdateMembase();
MSRUpdated(m_ppc_state);
}
void PowerPCManager::CheckExternalExceptions()
@ -642,6 +644,7 @@ void PowerPCManager::CheckExternalExceptions()
ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}",
exceptions);
}
MSRUpdated(m_ppc_state);
}
m_system.GetJitInterface().UpdateMembase();
@ -700,6 +703,36 @@ void RoundingModeUpdated(PowerPCState& ppc_state)
Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI);
}
void MSRUpdated(PowerPCState& ppc_state)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3));
}
void MMCRUpdated(PowerPCState& ppc_state)
{
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & ~FEATURE_FLAG_PERFMON) | (perfmon ? FEATURE_FLAG_PERFMON : 0));
}
void RecalculateAllFeatureFlags(PowerPCState& ppc_state)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
const bool perfmon = ppc_state.spr[SPR_MMCR0] || ppc_state.spr[SPR_MMCR1];
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(((ppc_state.msr.Hex >> 4) & 0x3) |
(perfmon ? FEATURE_FLAG_PERFMON : 0));
}
void CheckExceptionsFromJIT(PowerPCManager& power_pc)
{
power_pc.CheckExceptions();

View File

@ -141,6 +141,8 @@ struct PowerPCState
UReg_MSR msr; // machine state register
UReg_FPSCR fpscr; // floating point flags/status bits
CPUEmuFeatureFlags feature_flags;
// Exception management.
u32 Exceptions = 0;
@ -346,5 +348,8 @@ void CheckBreakPointsFromJIT(PowerPCManager& power_pc);
#define TU(ppc_state) (ppc_state).spr[SPR_TU]
void RoundingModeUpdated(PowerPCState& ppc_state);
void MSRUpdated(PowerPCState& ppc_state);
void MMCRUpdated(PowerPCState& ppc_state);
void RecalculateAllFeatureFlags(PowerPCState& ppc_state);
} // namespace PowerPC

View File

@ -448,7 +448,10 @@ void RegisterWidget::PopulateTable()
// MSR
AddRegister(
23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; },
[this](u64 value) { m_system.GetPPCState().msr.Hex = value; });
[this](u64 value) {
m_system.GetPPCState().msr.Hex = value;
PowerPC::MSRUpdated(m_system.GetPPCState());
});
// SRR 0-1
AddRegister(