Merge pull request #12141 from JosJuice/jit-blr-msr

Jit: Check MSR state in BLR optimization
This commit is contained in:
Mai 2023-11-28 22:35:35 +01:00 committed by GitHub
commit d095bddbe7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 69 additions and 27 deletions

View File

@ -483,7 +483,8 @@ void Jit64::FakeBLCall(u32 after)
// We may need to fake the BLR stack on inlined CALL instructions.
// Else we can't return to this location any more.
MOV(32, R(RSCRATCH2), Imm32(after));
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2);
FixupBranch skip_exit = CALL();
POP(RSCRATCH2);
@ -523,7 +524,8 @@ void Jit64::WriteExit(u32 destination, bool bl, u32 after)
if (bl)
{
MOV(32, R(RSCRATCH2), Imm32(after));
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2);
}
@ -580,7 +582,8 @@ void Jit64::WriteExitDestInRSCRATCH(bool bl, u32 after)
if (bl)
{
MOV(32, R(RSCRATCH2), Imm32(after));
MOV(64, R(RSCRATCH2),
Imm64(u64(m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK) << 32 | after));
PUSH(RSCRATCH2);
}
@ -608,6 +611,13 @@ void Jit64::WriteBLRExit()
bool disturbed = Cleanup();
if (disturbed)
MOV(32, R(RSCRATCH), PPCSTATE(pc));
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (msr_bits != 0)
{
MOV(32, R(RSCRATCH2), Imm32(msr_bits));
SHL(64, R(RSCRATCH2), Imm8(32));
OR(64, R(RSCRATCH), R(RSCRATCH2));
}
MOV(32, R(RSCRATCH2), Imm32(js.downcountAmount));
CMP(64, R(RSCRATCH), MDisp(RSP, 8));
J_CC(CC_NE, asm_routines.dispatcher_mispredicted_blr);

View File

@ -445,10 +445,6 @@ void Jit64::mtmsr(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
// Our jit cache also stores some MSR bits, as they have changed, we either
// have to validate them in the BLR/RET check, or just flush the stack here.
asm_routines.ResetStack(*this);
// If some exceptions are pending and EE are now enabled, force checking
// external exceptions when going out of mtmsr in order to execute delayed
// interrupts as soon as possible.

View File

@ -382,12 +382,21 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
const u8* host_address_after_return;
if (LK)
{
// Push {ARM_PC; PPC_PC} on the stack
ARM64Reg reg_to_push = exit_address_after_return_reg;
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, exit_address_after_return);
reg_to_push = ARM64Reg::X1;
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ARM64Reg::X1);
}
constexpr s32 adr_offset = JitArm64BlockCache::BLOCK_LINK_SIZE + sizeof(u32) * 2;
host_address_after_return = GetCodePtr() + adr_offset;
@ -477,14 +486,22 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
}
else
{
// Push {ARM_PC, PPC_PC} on the stack
ARM64Reg reg_to_push = exit_address_after_return_reg;
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
ARM64Reg reg_to_push = ARM64Reg::X1;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
MOVI2R(ARM64Reg::X1, exit_address_after_return);
reg_to_push = ARM64Reg::X1;
MOVI2R(ARM64Reg::X1, msr_bits << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
ORRI2R(ARM64Reg::X1, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32,
ARM64Reg::X1);
}
MOVI2R(ARM64Reg::X1, exit_address_after_return);
constexpr s32 adr_offset = sizeof(u32) * 3;
const u8* host_address_after_return = GetCodePtr() + adr_offset;
ADR(ARM64Reg::X0, adr_offset);
@ -540,19 +557,33 @@ void JitArm64::FakeLKExit(u32 exit_address_after_return, ARM64Reg exit_address_a
// function has been called!
gpr.Lock(ARM64Reg::W30);
}
ARM64Reg after_reg = exit_address_after_return_reg;
// Push {ARM_PC (64-bit); PPC_PC (32-bit); MSR_BITS (32-bit)} on the stack
ARM64Reg after_reg = ARM64Reg::INVALID_REG;
ARM64Reg reg_to_push;
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (exit_address_after_return_reg == ARM64Reg::INVALID_REG)
{
after_reg = gpr.GetReg();
MOVI2R(after_reg, exit_address_after_return);
reg_to_push = EncodeRegTo64(after_reg);
MOVI2R(reg_to_push, msr_bits << 32 | exit_address_after_return);
}
else if (msr_bits == 0)
{
reg_to_push = EncodeRegTo64(exit_address_after_return_reg);
}
else
{
after_reg = gpr.GetReg();
reg_to_push = EncodeRegTo64(after_reg);
ORRI2R(reg_to_push, EncodeRegTo64(exit_address_after_return_reg), msr_bits << 32, reg_to_push);
}
ARM64Reg code_reg = gpr.GetReg();
constexpr s32 adr_offset = sizeof(u32) * 3;
const u8* host_address_after_return = GetCodePtr() + adr_offset;
ADR(EncodeRegTo64(code_reg), adr_offset);
STP(IndexType::Pre, EncodeRegTo64(code_reg), EncodeRegTo64(after_reg), ARM64Reg::SP, -16);
STP(IndexType::Pre, EncodeRegTo64(code_reg), reg_to_push, ARM64Reg::SP, -16);
gpr.Unlock(code_reg);
if (after_reg != exit_address_after_return_reg)
if (after_reg != ARM64Reg::INVALID_REG)
gpr.Unlock(after_reg);
FixupBranch skip_exit = BL();
@ -608,9 +639,18 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
Cleanup();
EndTimeProfile(js.curBlock);
// Check if {ARM_PC, PPC_PC} matches the current state.
// Check if {PPC_PC, MSR_BITS} matches the current state, then RET to ARM_PC.
LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16);
CMP(ARM64Reg::W1, DISPATCHER_PC);
const u64 msr_bits = m_ppc_state.msr.Hex & JitBaseBlockCache::JIT_CACHE_MSR_MASK;
if (msr_bits == 0)
{
CMP(ARM64Reg::X1, EncodeRegTo64(DISPATCHER_PC));
}
else
{
ORRI2R(ARM64Reg::X0, EncodeRegTo64(DISPATCHER_PC), msr_bits << 32, ARM64Reg::X0);
CMP(ARM64Reg::X1, ARM64Reg::X0);
}
FixupBranch no_match = B(CC_NEQ);
DoDownCount(); // overwrites X0 + X1

View File

@ -104,10 +104,6 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
// Our jit cache also stores some MSR bits, as they have changed, we either
// have to validate them in the BLR/RET check, or just flush the stack here.
ResetStack();
WriteExceptionExit(js.compilerPC + 4, true);
}

View File

@ -50,7 +50,7 @@ void JitArm64::GenerateAsm()
STR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(stored_stack_pointer));
// Push {nullptr; -1} as invalid destination on the stack.
MOVI2R(ARM64Reg::X0, 0xFFFFFFFF);
MOVI2R(ARM64Reg::X0, 0xFFFF'FFFF'FFFF'FFFF);
STP(IndexType::Pre, ARM64Reg::ZR, ARM64Reg::X0, ARM64Reg::SP, -16);
// The PC will be loaded into DISPATCHER_PC after the call to CoreTiming::Advance().