From 2989ccff79d98a366363c0c33a2c68357dbf6f84 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 8 Jun 2014 01:39:44 -0500 Subject: [PATCH] Support conditional register cache flushing on ARMv7. Similar to PR #451, but for ARM as well. --- .../Core/PowerPC/JitArm32/JitArm_Branch.cpp | 33 +++++++++++---- .../Core/PowerPC/JitArm32/JitArm_Integer.cpp | 11 +++-- .../PowerPC/JitArm32/JitArm_LoadStore.cpp | 6 ++- .../Core/PowerPC/JitArm32/JitFPRCache.cpp | 41 +++++++++++++++---- .../Core/Core/PowerPC/JitArm32/JitFPRCache.h | 2 +- .../Core/PowerPC/JitArm32/JitRegCache.cpp | 40 +++++++++++++++--- .../Core/Core/PowerPC/JitArm32/JitRegCache.h | 11 +++-- 7 files changed, 113 insertions(+), 31 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp index b1b310b5ad..404d90c6e9 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp @@ -150,9 +150,6 @@ void JitArm::bcx(UGeckoInstruction inst) JITDISABLE(bJITBranchOff) // USES_CR - gpr.Flush(); - fpr.Flush(); - ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); FixupBranch pCTRDontBranch; @@ -195,6 +192,9 @@ void JitArm::bcx(UGeckoInstruction inst) destination = SignExt16(inst.BD << 2); else destination = js.compilerPC + SignExt16(inst.BD << 2); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); WriteExit(destination); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) @@ -203,16 +203,17 @@ void JitArm::bcx(UGeckoInstruction inst) SetJumpTarget( pCTRDontBranch ); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + { + gpr.Flush(); + fpr.Flush(); WriteExit(js.compilerPC + 4); + } } void JitArm::bcctrx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITBranchOff) - gpr.Flush(); - fpr.Flush(); - // bcctrx doesn't decrement and/or test CTR _dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); @@ -221,6 +222,9 @@ void JitArm::bcctrx(UGeckoInstruction inst) // BO_2 == 1z1zz -> b always //NPC = CTR & 0xfffffffc; + gpr.Flush(); + fpr.Flush(); + ARMReg rA = gpr.GetReg(); if (inst.LK_3) @@ -262,12 +266,19 @@ void JitArm::bcctrx(UGeckoInstruction inst) //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + WriteExitDestInR(rA); SetJumpTarget(b); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + { + gpr.Flush(); + fpr.Flush(); WriteExit(js.compilerPC + 4); + } } } void JitArm::bclrx(UGeckoInstruction inst) @@ -275,9 +286,6 @@ void JitArm::bclrx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITBranchOff) - gpr.Flush(); - fpr.Flush(); - ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); FixupBranch pCTRDontBranch; @@ -327,6 +335,9 @@ void JitArm::bclrx(UGeckoInstruction inst) //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); WriteExitDestInR(rA); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) @@ -335,5 +346,9 @@ void JitArm::bclrx(UGeckoInstruction inst) SetJumpTarget( pCTRDontBranch ); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + { + gpr.Flush(); + fpr.Flush(); WriteExit(js.compilerPC + 4); + } } diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp index 4b690b966c..6e2cef93d8 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp @@ -954,9 +954,6 @@ void JitArm::twx(UGeckoInstruction inst) s32 a = inst.RA; - gpr.Flush(); - fpr.Flush(); - ARMReg RA = gpr.GetReg(); ARMReg RB = gpr.GetReg(); MOV(RA, inst.TO); @@ -1003,6 +1000,9 @@ void JitArm::twx(UGeckoInstruction inst) SetJumpTarget(take4); SetJumpTarget(take5); + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + LDR(RA, R9, PPCSTATE_OFF(Exceptions)); MOVI2R(RB, EXCEPTION_PROGRAM); // XXX: Can be optimized ORR(RA, RA, RB); @@ -1016,7 +1016,12 @@ void JitArm::twx(UGeckoInstruction inst) SetJumpTarget(exit5); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + { + gpr.Flush(); + fpr.Flush(); + WriteExit(js.compilerPC + 4); + } gpr.Unlock(RA, RB); } diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp index 66e6a062f2..e48126f383 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -446,12 +446,14 @@ void JitArm::lXX(UGeckoInstruction inst) Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { ARMReg RD = gpr.R(d); - gpr.Flush(); - fpr.Flush(); // if it's still 0, we can wait until the next event TST(RD, RD); FixupBranch noIdle = B_CC(CC_NEQ); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + rA = gpr.GetReg(); MOVI2R(rA, (u32)&PowerPC::OnIdle); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp index fcd544a771..9f5d1bfc7d 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp @@ -32,6 +32,27 @@ void ArmFPRCache::Init(ARMXEmitter *emitter) void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats) { + // Make sure the state is wiped on Start + // There is a potential for the state remaining dirty from the previous block + // This is due to conditional branches not clearing the register cache state + + for (u8 a = 0; a < 32; ++a) + { + if (_regs[a][0].GetType() != REG_NOTLOADED) + { + u32 regindex = _regs[a][0].GetRegIndex(); + ArmCRegs[regindex].PPCReg = 33; + ArmCRegs[regindex].LastLoad = 0; + _regs[a][0].Flush(); + } + if (_regs[a][1].GetType() != REG_NOTLOADED) + { + u32 regindex = _regs[a][1].GetRegIndex(); + ArmCRegs[regindex].PPCReg = 33; + ArmCRegs[regindex].LastLoad = 0; + _regs[a][1].Flush(); + } + } } ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count) @@ -162,7 +183,7 @@ ARMReg ArmFPRCache::R1(u32 preg, bool preLoad) return GetPPCReg(preg, true, preLoad); } -void ArmFPRCache::Flush() +void ArmFPRCache::Flush(FlushMode mode) { for (u8 a = 0; a < 32; ++a) { @@ -172,9 +193,12 @@ void ArmFPRCache::Flush() u32 regindex = _regs[a][0].GetRegIndex(); emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[a][0].Flush(); + if (mode == FLUSH_ALL) + { + ArmCRegs[regindex].PPCReg = 33; + ArmCRegs[regindex].LastLoad = 0; + _regs[a][0].Flush(); + } } if (_regs[a][1].GetType() != REG_NOTLOADED) { @@ -182,9 +206,12 @@ void ArmFPRCache::Flush() u32 regindex = _regs[a][1].GetRegIndex(); emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[a][1].Flush(); + if (mode == FLUSH_ALL) + { + ArmCRegs[regindex].PPCReg = 33; + ArmCRegs[regindex].LastLoad = 0; + _regs[a][1].Flush(); + } } } } diff --git a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h index 1e2f1acfa7..719f55a0e2 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h +++ b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h @@ -43,7 +43,7 @@ public: ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. void Unlock(ARMReg V0); - void Flush(); + void Flush(FlushMode mode = FLUSH_ALL); ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register ARMReg R1(u32 preg, bool preLoad = true); }; diff --git a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp index 4ba41eab23..ab51781adf 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp @@ -30,6 +30,19 @@ void ArmRegCache::Init(ARMXEmitter *emitter) } void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats) { + // Make sure the state is wiped on Start + // There is a potential for the state remaining dirty from the previous block + // This is due to conditional branches not clearing the register cache state + for (u8 a = 0; a < 32; ++a) + { + if (regs[a].GetType() == REG_REG) + { + u32 regindex = regs[a].GetRegIndex(); + ArmCRegs[regindex].PPCReg = 33; + ArmCRegs[regindex].LastLoad = 0; + } + regs[a].Flush(); + } } ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count) @@ -192,21 +205,36 @@ void ArmRegCache::SetImmediate(u32 preg, u32 imm) regs[preg].LoadToImm(imm); } -void ArmRegCache::Flush() +void ArmRegCache::Flush(FlushMode mode) { for (u8 a = 0; a < 32; ++a) { if (regs[a].GetType() == REG_IMM) - BindToRegister(a); + { + if (mode == FLUSH_ALL) + { + // This changes the type over to a REG_REG and gets caught below. + BindToRegister(a); + } + else + { + ARMReg tmp = GetReg(); + emit->MOVI2R(tmp, regs[a].GetImm()); + emit->STR(tmp, R9, PPCSTATE_OFF(gpr) + a * 4); + Unlock(tmp); + } + } if (regs[a].GetType() == REG_REG) { u32 regindex = regs[a].GetRegIndex(); emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4); - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; + if (mode == FLUSH_ALL) + { + ArmCRegs[regindex].PPCReg = 33; + ArmCRegs[regindex].LastLoad = 0; + regs[a].Flush(); + } } - - regs[a].Flush(); } } diff --git a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.h b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.h index e1097e9a63..e57c3d9230 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.h +++ b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.h @@ -29,6 +29,12 @@ enum RegType REG_AWAY, // Bound to a register, but not preloaded }; +enum FlushMode +{ + FLUSH_ALL = 0, + FLUSH_MAINTAIN_STATE, +}; + class OpArg { private: @@ -116,9 +122,8 @@ public: void Start(PPCAnalyst::BlockRegStats &stats); ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. - void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = - INVALID_REG); - void Flush(); + void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = INVALID_REG); + void Flush(FlushMode mode = FLUSH_ALL); ARMReg R(u32 preg); // Returns a cached register bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; } u32 GetImm(u32 preg) { return regs[preg].GetImm(); }