Support conditional register cache flushing on ARMv7.

Similar to PR #451, but for ARM as well.
This commit is contained in:
Ryan Houdek 2014-06-08 01:39:44 -05:00
parent a1374dd4ba
commit 2989ccff79
7 changed files with 113 additions and 31 deletions

View File

@ -150,9 +150,6 @@ void JitArm::bcx(UGeckoInstruction inst)
JITDISABLE(bJITBranchOff) JITDISABLE(bJITBranchOff)
// USES_CR // USES_CR
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg(); ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch; FixupBranch pCTRDontBranch;
@ -195,6 +192,9 @@ void JitArm::bcx(UGeckoInstruction inst)
destination = SignExt16(inst.BD << 2); destination = SignExt16(inst.BD << 2);
else else
destination = js.compilerPC + SignExt16(inst.BD << 2); destination = js.compilerPC + SignExt16(inst.BD << 2);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExit(destination); WriteExit(destination);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
@ -203,16 +203,17 @@ void JitArm::bcx(UGeckoInstruction inst)
SetJumpTarget( pCTRDontBranch ); SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4); WriteExit(js.compilerPC + 4);
}
} }
void JitArm::bcctrx(UGeckoInstruction inst) void JitArm::bcctrx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITBranchOff) JITDISABLE(bJITBranchOff)
gpr.Flush();
fpr.Flush();
// bcctrx doesn't decrement and/or test CTR // bcctrx doesn't decrement and/or test CTR
_dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); _dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!");
@ -221,6 +222,9 @@ void JitArm::bcctrx(UGeckoInstruction inst)
// BO_2 == 1z1zz -> b always // BO_2 == 1z1zz -> b always
//NPC = CTR & 0xfffffffc; //NPC = CTR & 0xfffffffc;
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
if (inst.LK_3) if (inst.LK_3)
@ -262,12 +266,19 @@ void JitArm::bcctrx(UGeckoInstruction inst)
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
} }
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA); WriteExitDestInR(rA);
SetJumpTarget(b); SetJumpTarget(b);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4); WriteExit(js.compilerPC + 4);
}
} }
} }
void JitArm::bclrx(UGeckoInstruction inst) void JitArm::bclrx(UGeckoInstruction inst)
@ -275,9 +286,6 @@ void JitArm::bclrx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITBranchOff) JITDISABLE(bJITBranchOff)
gpr.Flush();
fpr.Flush();
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg(); ARMReg rB = gpr.GetReg();
FixupBranch pCTRDontBranch; FixupBranch pCTRDontBranch;
@ -327,6 +335,9 @@ void JitArm::bclrx(UGeckoInstruction inst)
//ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4);
} }
gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
WriteExitDestInR(rA); WriteExitDestInR(rA);
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
@ -335,5 +346,9 @@ void JitArm::bclrx(UGeckoInstruction inst)
SetJumpTarget( pCTRDontBranch ); SetJumpTarget( pCTRDontBranch );
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4); WriteExit(js.compilerPC + 4);
}
} }

View File

@ -954,9 +954,6 @@ void JitArm::twx(UGeckoInstruction inst)
s32 a = inst.RA; s32 a = inst.RA;
gpr.Flush();
fpr.Flush();
ARMReg RA = gpr.GetReg(); ARMReg RA = gpr.GetReg();
ARMReg RB = gpr.GetReg(); ARMReg RB = gpr.GetReg();
MOV(RA, inst.TO); MOV(RA, inst.TO);
@ -1003,6 +1000,9 @@ void JitArm::twx(UGeckoInstruction inst)
SetJumpTarget(take4); SetJumpTarget(take4);
SetJumpTarget(take5); SetJumpTarget(take5);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
LDR(RA, R9, PPCSTATE_OFF(Exceptions)); LDR(RA, R9, PPCSTATE_OFF(Exceptions));
MOVI2R(RB, EXCEPTION_PROGRAM); // XXX: Can be optimized MOVI2R(RB, EXCEPTION_PROGRAM); // XXX: Can be optimized
ORR(RA, RA, RB); ORR(RA, RA, RB);
@ -1016,7 +1016,12 @@ void JitArm::twx(UGeckoInstruction inst)
SetJumpTarget(exit5); SetJumpTarget(exit5);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.compilerPC + 4); WriteExit(js.compilerPC + 4);
}
gpr.Unlock(RA, RB); gpr.Unlock(RA, RB);
} }

View File

@ -446,12 +446,14 @@ void JitArm::lXX(UGeckoInstruction inst)
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{ {
ARMReg RD = gpr.R(d); ARMReg RD = gpr.R(d);
gpr.Flush();
fpr.Flush();
// if it's still 0, we can wait until the next event // if it's still 0, we can wait until the next event
TST(RD, RD); TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ); FixupBranch noIdle = B_CC(CC_NEQ);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
rA = gpr.GetReg(); rA = gpr.GetReg();
MOVI2R(rA, (u32)&PowerPC::OnIdle); MOVI2R(rA, (u32)&PowerPC::OnIdle);

View File

@ -32,6 +32,27 @@ void ArmFPRCache::Init(ARMXEmitter *emitter)
void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats) void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (_regs[a][0].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][0].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
if (_regs[a][1].GetType() != REG_NOTLOADED)
{
u32 regindex = _regs[a][1].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
}
} }
ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count) ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count)
@ -162,7 +183,7 @@ ARMReg ArmFPRCache::R1(u32 preg, bool preLoad)
return GetPPCReg(preg, true, preLoad); return GetPPCReg(preg, true, preLoad);
} }
void ArmFPRCache::Flush() void ArmFPRCache::Flush(FlushMode mode)
{ {
for (u8 a = 0; a < 32; ++a) for (u8 a = 0; a < 32; ++a)
{ {
@ -172,9 +193,12 @@ void ArmFPRCache::Flush()
u32 regindex = _regs[a][0].GetRegIndex(); u32 regindex = _regs[a][0].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33; if (mode == FLUSH_ALL)
ArmCRegs[regindex].LastLoad = 0; {
_regs[a][0].Flush(); ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][0].Flush();
}
} }
if (_regs[a][1].GetType() != REG_NOTLOADED) if (_regs[a][1].GetType() != REG_NOTLOADED)
{ {
@ -182,9 +206,12 @@ void ArmFPRCache::Flush()
u32 regindex = _regs[a][1].GetRegIndex(); u32 regindex = _regs[a][1].GetRegIndex();
emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); emit->VSTR(ArmCRegs[regindex].Reg, R9, offset);
ArmCRegs[regindex].PPCReg = 33; if (mode == FLUSH_ALL)
ArmCRegs[regindex].LastLoad = 0; {
_regs[a][1].Flush(); ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
_regs[a][1].Flush();
}
} }
} }
} }

View File

@ -43,7 +43,7 @@ public:
ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ARMReg V0); void Unlock(ARMReg V0);
void Flush(); void Flush(FlushMode mode = FLUSH_ALL);
ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register
ARMReg R1(u32 preg, bool preLoad = true); ARMReg R1(u32 preg, bool preLoad = true);
}; };

View File

@ -30,6 +30,19 @@ void ArmRegCache::Init(ARMXEmitter *emitter)
} }
void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats) void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats)
{ {
// Make sure the state is wiped on Start
// There is a potential for the state remaining dirty from the previous block
// This is due to conditional branches not clearing the register cache state
for (u8 a = 0; a < 32; ++a)
{
if (regs[a].GetType() == REG_REG)
{
u32 regindex = regs[a].GetRegIndex();
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
}
regs[a].Flush();
}
} }
ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count) ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count)
@ -192,21 +205,36 @@ void ArmRegCache::SetImmediate(u32 preg, u32 imm)
regs[preg].LoadToImm(imm); regs[preg].LoadToImm(imm);
} }
void ArmRegCache::Flush() void ArmRegCache::Flush(FlushMode mode)
{ {
for (u8 a = 0; a < 32; ++a) for (u8 a = 0; a < 32; ++a)
{ {
if (regs[a].GetType() == REG_IMM) if (regs[a].GetType() == REG_IMM)
BindToRegister(a); {
if (mode == FLUSH_ALL)
{
// This changes the type over to a REG_REG and gets caught below.
BindToRegister(a);
}
else
{
ARMReg tmp = GetReg();
emit->MOVI2R(tmp, regs[a].GetImm());
emit->STR(tmp, R9, PPCSTATE_OFF(gpr) + a * 4);
Unlock(tmp);
}
}
if (regs[a].GetType() == REG_REG) if (regs[a].GetType() == REG_REG)
{ {
u32 regindex = regs[a].GetRegIndex(); u32 regindex = regs[a].GetRegIndex();
emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4); emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4);
ArmCRegs[regindex].PPCReg = 33; if (mode == FLUSH_ALL)
ArmCRegs[regindex].LastLoad = 0; {
ArmCRegs[regindex].PPCReg = 33;
ArmCRegs[regindex].LastLoad = 0;
regs[a].Flush();
}
} }
regs[a].Flush();
} }
} }

View File

@ -29,6 +29,12 @@ enum RegType
REG_AWAY, // Bound to a register, but not preloaded REG_AWAY, // Bound to a register, but not preloaded
}; };
enum FlushMode
{
FLUSH_ALL = 0,
FLUSH_MAINTAIN_STATE,
};
class OpArg class OpArg
{ {
private: private:
@ -116,9 +122,8 @@ public:
void Start(PPCAnalyst::BlockRegStats &stats); void Start(PPCAnalyst::BlockRegStats &stats);
ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use.
void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = void Unlock(ARMReg R0, ARMReg R1 = INVALID_REG, ARMReg R2 = INVALID_REG, ARMReg R3 = INVALID_REG);
INVALID_REG); void Flush(FlushMode mode = FLUSH_ALL);
void Flush();
ARMReg R(u32 preg); // Returns a cached register ARMReg R(u32 preg); // Returns a cached register
bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; } bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; }
u32 GetImm(u32 preg) { return regs[preg].GetImm(); } u32 GetImm(u32 preg) { return regs[preg].GetImm(); }