Adds support to ARMv7's register cache for not loading a destination register prior to overwriting.

This extends the register cache's BindToRegister function with a doLoad argument just like x86's.
The speedup is minor for these implemented integer instructions.
This commit is contained in:
Ryan Houdek 2014-10-21 15:35:31 +00:00
parent a9f0bd72d2
commit 0253c35d3a
4 changed files with 108 additions and 21 deletions

View File

@ -143,6 +143,7 @@ void JitArm::subfic(UGeckoInstruction inst)
else else
{ {
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
gpr.BindToRegister(d, false);
MOVI2R(gpr.R(d), imm); MOVI2R(gpr.R(d), imm);
LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); LDRB(tmp, R9, PPCSTATE_OFF(xer_ca));
BIC(tmp, tmp, 1); BIC(tmp, tmp, 1);
@ -445,6 +446,7 @@ void JitArm::arith(UGeckoInstruction inst)
{ {
case 7: case 7:
{ {
gpr.BindToRegister(d, d == a);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
RD = gpr.R(d); RD = gpr.R(d);
RA = gpr.R(a); RA = gpr.R(a);
@ -456,6 +458,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 12: case 12:
case 13: case 13:
{ {
gpr.BindToRegister(d, d == a);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
RD = gpr.R(d); RD = gpr.R(d);
RA = gpr.R(a); RA = gpr.R(a);
@ -468,6 +471,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 15: // Arg2 is always Imm case 15: // Arg2 is always Imm
if (!isImm[0]) if (!isImm[0])
{ {
gpr.BindToRegister(d, d == a);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
RD = gpr.R(d); RD = gpr.R(d);
RA = gpr.R(a); RA = gpr.R(a);
@ -484,6 +488,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 25: case 25:
{ {
dest = a; dest = a;
gpr.BindToRegister(a, s == a);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
RS = gpr.R(s); RS = gpr.R(s);
RA = gpr.R(a); RA = gpr.R(a);
@ -496,6 +501,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 27: case 27:
{ {
dest = a; dest = a;
gpr.BindToRegister(a, s == a);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
RS = gpr.R(s); RS = gpr.R(s);
RA = gpr.R(a); RA = gpr.R(a);
@ -509,6 +515,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 29: case 29:
{ {
dest = a; dest = a;
gpr.BindToRegister(a, s == a);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
RS = gpr.R(s); RS = gpr.R(s);
RA = gpr.R(a); RA = gpr.R(a);
@ -522,6 +529,7 @@ void JitArm::arith(UGeckoInstruction inst)
{ {
case 24: case 24:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -529,12 +537,14 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 28: case 28:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
ANDS(RA, RS, RB); ANDS(RA, RS, RB);
break; break;
case 40: // subfx case 40: // subfx
gpr.BindToRegister(d, d == s || d == b);
RD = gpr.R(d); RD = gpr.R(d);
RA = gpr.R(a); RA = gpr.R(a);
RB = gpr.R(b); RB = gpr.R(b);
@ -542,6 +552,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 60: case 60:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -549,6 +560,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 124: case 124:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -557,6 +569,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 747: case 747:
case 235: case 235:
gpr.BindToRegister(d, d == a || d == b);
RD = gpr.R(d); RD = gpr.R(d);
RA = gpr.R(a); RA = gpr.R(a);
RB = gpr.R(b); RB = gpr.R(b);
@ -564,6 +577,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 284: case 284:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -572,6 +586,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 316: case 316:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -581,6 +596,7 @@ void JitArm::arith(UGeckoInstruction inst)
{ {
dest = a; dest = a;
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -591,6 +607,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 444: case 444:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -598,6 +615,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 476: case 476:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -606,6 +624,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 536: case 536:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -613,6 +632,7 @@ void JitArm::arith(UGeckoInstruction inst)
break; break;
case 792: case 792:
dest = a; dest = a;
gpr.BindToRegister(a, a == s || a == b);
RA = gpr.R(a); RA = gpr.R(a);
RS = gpr.R(s); RS = gpr.R(s);
RB = gpr.R(b); RB = gpr.R(b);
@ -621,6 +641,7 @@ void JitArm::arith(UGeckoInstruction inst)
case 10: // addcx case 10: // addcx
case 266: case 266:
case 778: // both addx case 778: // both addx
gpr.BindToRegister(d, d == a || d == b);
RD = gpr.R(d); RD = gpr.R(d);
RA = gpr.R(a); RA = gpr.R(a);
RB = gpr.R(b); RB = gpr.R(b);
@ -666,6 +687,7 @@ void JitArm::cntlzwx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
u32 a = inst.RA, s = inst.RS; u32 a = inst.RA, s = inst.RS;
gpr.BindToRegister(a, a == s);
ARMReg RA = gpr.R(a); ARMReg RA = gpr.R(a);
ARMReg RS = gpr.R(s); ARMReg RS = gpr.R(s);
CLZ(RA, RS); CLZ(RA, RS);
@ -705,6 +727,8 @@ void JitArm::extshx(UGeckoInstruction inst)
return; return;
} }
gpr.BindToRegister(a, a == s);
ARMReg rA = gpr.R(a); ARMReg rA = gpr.R(a);
ARMReg rS = gpr.R(s); ARMReg rS = gpr.R(s);
SXTH(rA, rS); SXTH(rA, rS);
@ -726,6 +750,8 @@ void JitArm::extsbx(UGeckoInstruction inst)
return; return;
} }
gpr.BindToRegister(a, a == s);
ARMReg rA = gpr.R(a); ARMReg rA = gpr.R(a);
ARMReg rS = gpr.R(s); ARMReg rS = gpr.R(s);
SXTB(rA, rS); SXTB(rA, rS);
@ -768,6 +794,7 @@ void JitArm::negx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
gpr.BindToRegister(inst.RD, inst.RD == inst.RA);
ARMReg RA = gpr.R(inst.RA); ARMReg RA = gpr.R(inst.RA);
ARMReg RD = gpr.R(inst.RD); ARMReg RD = gpr.R(inst.RD);
@ -787,6 +814,7 @@ void JitArm::rlwimix(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
u32 mask = Helper_Mask(inst.MB,inst.ME); u32 mask = Helper_Mask(inst.MB,inst.ME);
gpr.BindToRegister(inst.RA, inst.RA == inst.RS);
ARMReg RA = gpr.R(inst.RA); ARMReg RA = gpr.R(inst.RA);
ARMReg RS = gpr.R(inst.RS); ARMReg RS = gpr.R(inst.RS);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
@ -809,6 +837,7 @@ void JitArm::rlwinmx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
u32 mask = Helper_Mask(inst.MB,inst.ME); u32 mask = Helper_Mask(inst.MB,inst.ME);
gpr.BindToRegister(inst.RA, inst.RA == inst.RS);
ARMReg RA = gpr.R(inst.RA); ARMReg RA = gpr.R(inst.RA);
ARMReg RS = gpr.R(inst.RS); ARMReg RS = gpr.R(inst.RS);
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
@ -829,6 +858,7 @@ void JitArm::rlwnmx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
u32 mask = Helper_Mask(inst.MB,inst.ME); u32 mask = Helper_Mask(inst.MB,inst.ME);
gpr.BindToRegister(inst.RA, inst.RA == inst.RS || inst.RA == inst.RB);
ARMReg RA = gpr.R(inst.RA); ARMReg RA = gpr.R(inst.RA);
ARMReg RS = gpr.R(inst.RS); ARMReg RS = gpr.R(inst.RS);
ARMReg RB = gpr.R(inst.RB); ARMReg RB = gpr.R(inst.RB);
@ -856,6 +886,7 @@ void JitArm::srawix(UGeckoInstruction inst)
int s = inst.RS; int s = inst.RS;
int amount = inst.SH; int amount = inst.SH;
gpr.BindToRegister(a, a == s);
if (amount != 0) if (amount != 0)
{ {
ARMReg RA = gpr.R(a); ARMReg RA = gpr.R(a);

View File

@ -132,6 +132,7 @@ void JitArm::mfspr(UGeckoInstruction inst)
{ {
case SPR_XER: case SPR_XER:
{ {
gpr.BindToRegister(inst.RD, false);
ARMReg RD = gpr.R(inst.RD); ARMReg RD = gpr.R(inst.RD);
ARMReg tmp = gpr.GetReg(); ARMReg tmp = gpr.GetReg();
LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl)); LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl));
@ -150,6 +151,7 @@ void JitArm::mfspr(UGeckoInstruction inst)
case SPR_TU: case SPR_TU:
FALLBACK_IF(true); FALLBACK_IF(true);
default: default:
gpr.BindToRegister(inst.RD, false);
ARMReg RD = gpr.R(inst.RD); ARMReg RD = gpr.R(inst.RD);
LDR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4); LDR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4);
break; break;
@ -169,6 +171,7 @@ void JitArm::mfsr(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR])); LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR]));
} }
@ -191,6 +194,7 @@ void JitArm::mfmsr(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(msr)); LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(msr));
} }

View File

@ -143,7 +143,7 @@ bool ArmRegCache::FindFreeRegister(u32 &regindex)
ARMReg ArmRegCache::R(u32 preg) ARMReg ArmRegCache::R(u32 preg)
{ {
if (regs[preg].GetType() == REG_IMM) if (regs[preg].GetType() == REG_IMM)
return BindToRegister(preg); return BindToRegister(preg, true, true);
u32 lastRegIndex = GetLeastUsedRegister(true); u32 lastRegIndex = GetLeastUsedRegister(true);
@ -181,31 +181,76 @@ ARMReg ArmRegCache::R(u32 preg)
return ArmCRegs[lastRegIndex].Reg; return ArmCRegs[lastRegIndex].Reg;
} }
ARMReg ArmRegCache::BindToRegister(u32 preg) void ArmRegCache::BindToRegister(u32 preg, bool doLoad)
{
BindToRegister(preg, doLoad, false);
}
ARMReg ArmRegCache::BindToRegister(u32 preg, bool doLoad, bool kill_imm)
{ {
_assert_msg_(DYNA_REC, regs[preg].GetType() == REG_IMM, "Can't BindToRegister with a REG");
u32 lastRegIndex = GetLeastUsedRegister(false); u32 lastRegIndex = GetLeastUsedRegister(false);
u32 freeRegIndex; u32 freeRegIndex;
if (FindFreeRegister(freeRegIndex)) bool found_free = FindFreeRegister(freeRegIndex);
if (regs[preg].GetType() == REG_IMM)
{ {
emit->MOVI2R(ArmCRegs[freeRegIndex].Reg, regs[preg].GetImm()); if (!kill_imm)
ArmCRegs[freeRegIndex].PPCReg = preg; return INVALID_REG;
ArmCRegs[freeRegIndex].LastLoad = 0; if (found_free)
regs[preg].LoadToReg(freeRegIndex); {
return ArmCRegs[freeRegIndex].Reg; if (doLoad)
emit->MOVI2R(ArmCRegs[freeRegIndex].Reg, regs[preg].GetImm());
ArmCRegs[freeRegIndex].PPCReg = preg;
ArmCRegs[freeRegIndex].LastLoad = 0;
regs[preg].LoadToReg(freeRegIndex);
return ArmCRegs[freeRegIndex].Reg;
}
else
{
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
if (doLoad)
emit->MOVI2R(ArmCRegs[lastRegIndex].Reg, regs[preg].GetImm());
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
}
else if (regs[preg].GetType() == REG_NOTLOADED)
{
if (found_free)
{
if (doLoad)
emit->LDR(ArmCRegs[freeRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
ArmCRegs[freeRegIndex].PPCReg = preg;
ArmCRegs[freeRegIndex].LastLoad = 0;
regs[preg].LoadToReg(freeRegIndex);
return ArmCRegs[freeRegIndex].Reg;
}
else
{
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4);
if (doLoad)
emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4);
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
}
} }
else else
{ {
emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4); u8 a = regs[preg].GetRegIndex();
emit->MOVI2R(ArmCRegs[lastRegIndex].Reg, regs[preg].GetImm()); return ArmCRegs[a].Reg;
regs[ArmCRegs[lastRegIndex].PPCReg].Flush();
ArmCRegs[lastRegIndex].PPCReg = preg;
ArmCRegs[lastRegIndex].LastLoad = 0;
regs[preg].LoadToReg(lastRegIndex);
return ArmCRegs[lastRegIndex].Reg;
} }
} }
@ -230,7 +275,7 @@ void ArmRegCache::Flush(FlushMode mode)
if (mode == FLUSH_ALL) if (mode == FLUSH_ALL)
{ {
// This changes the type over to a REG_REG and gets caught below. // This changes the type over to a REG_REG and gets caught below.
BindToRegister(a); BindToRegister(a, true, true);
} }
else else
{ {

View File

@ -110,6 +110,10 @@ private:
u32 GetLeastUsedRegister(bool increment); u32 GetLeastUsedRegister(bool increment);
bool FindFreeRegister(u32 &regindex); bool FindFreeRegister(u32 &regindex);
// Private function can kill immediates
ArmGen::ARMReg BindToRegister(u32 preg, bool doLoad, bool kill_imm);
protected: protected:
ArmGen::ARMXEmitter *emit; ArmGen::ARMXEmitter *emit;
@ -127,5 +131,8 @@ public:
bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; } bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; }
u32 GetImm(u32 preg) { return regs[preg].GetImm(); } u32 GetImm(u32 preg) { return regs[preg].GetImm(); }
void SetImmediate(u32 preg, u32 imm); void SetImmediate(u32 preg, u32 imm);
ArmGen::ARMReg BindToRegister(u32 preg);
// Public function doesn't kill immediates
// In reality when you call R(u32) it'll bind an immediate there
void BindToRegister(u32 preg, bool doLoad = true);
}; };