Merge pull request #2216 from Sonicadvance1/aarch64_dirty_dirty

[AArch64] Implement dirty register tracking.
This commit is contained in:
Ryan Houdek 2015-03-22 11:32:43 -05:00
commit 5dbfebcd30
8 changed files with 126 additions and 53 deletions

View File

@ -22,8 +22,10 @@ void JitArm64::fabsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(EncodeRegToDouble(V0), EncodeRegToDouble(VB));
@ -38,10 +40,12 @@ void JitArm64::faddsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
@ -53,9 +57,12 @@ void JitArm64::faddx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FADD(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
@ -92,6 +99,7 @@ void JitArm64::fmaddx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -111,8 +119,11 @@ void JitArm64::fmrx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.INS(64, VD, 0, VB, 0);
}
@ -145,6 +156,7 @@ void JitArm64::fmsubx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -164,10 +176,12 @@ void JitArm64::fmulsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
m_float_emit.INS(64, VD, 1, VD, 0);
@ -179,9 +193,12 @@ void JitArm64::fmulx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VC = fpr.R(inst.FC);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
@ -196,8 +213,11 @@ void JitArm64::fnabsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FABS(EncodeRegToDouble(V0), EncodeRegToDouble(VB));
@ -213,8 +233,11 @@ void JitArm64::fnegx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FNEG(EncodeRegToDouble(V0), EncodeRegToDouble(VB));
@ -252,6 +275,7 @@ void JitArm64::fnmaddx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -295,6 +319,7 @@ void JitArm64::fnmsubx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
@ -315,11 +340,14 @@ void JitArm64::fselx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VD = fpr.R(d);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg V0 = fpr.GetReg();
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VC = fpr.R(inst.FC);
m_float_emit.FCMPE(EncodeRegToDouble(VA));
m_float_emit.FCSEL(EncodeRegToDouble(V0), EncodeRegToDouble(VC), EncodeRegToDouble(VB), CC_GE);
@ -334,10 +362,12 @@ void JitArm64::fsubsx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
@ -349,9 +379,12 @@ void JitArm64::fsubx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
FALLBACK_IF(inst.Rc);
ARM64Reg VA = fpr.R(inst.FA);
ARM64Reg VB = fpr.R(inst.FB);
ARM64Reg VD = fpr.R(inst.FD);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, true);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FSUB(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VB));

View File

@ -284,6 +284,7 @@ void JitArm64::addx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(d, d == a || d == b);
ADD(gpr.R(d), gpr.R(a), gpr.R(b));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
@ -297,8 +298,6 @@ void JitArm64::extsXx(UGeckoInstruction inst)
int a = inst.RA, s = inst.RS;
int size = inst.SUBOP10 == 922 ? 16 : 8;
gpr.BindToRegister(a, a == s);
if (gpr.IsImm(s))
{
gpr.SetImmediate(a, (u32)(s32)(size == 16 ? (s16)gpr.GetImm(s) : (s8)gpr.GetImm(s)));
@ -307,11 +306,11 @@ void JitArm64::extsXx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(a, a == s);
SBFM(gpr.R(a), gpr.R(s), 0, size - 1);
if (inst.Rc)
ComputeRC(gpr.R(a), 0);
}
}
void JitArm64::cntlzwx(UGeckoInstruction inst)
@ -321,8 +320,6 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
gpr.BindToRegister(a, a == s);
if (gpr.IsImm(s))
{
gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s)));
@ -331,6 +328,7 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(a, a == s);
CLZ(gpr.R(a), gpr.R(s));
if (inst.Rc)
ComputeRC(gpr.R(a), 0);
@ -346,8 +344,6 @@ void JitArm64::negx(UGeckoInstruction inst)
FALLBACK_IF(inst.OE);
gpr.BindToRegister(d, d == a);
if (gpr.IsImm(a))
{
gpr.SetImmediate(d, ~((u32)gpr.GetImm(a)) + 1);
@ -356,6 +352,7 @@ void JitArm64::negx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), WSP, gpr.R(a), ArithOption(gpr.R(a), ST_LSL, 0));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
@ -487,26 +484,27 @@ void JitArm64::rlwinmx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
u32 a = inst.RA, s = inst.RS;
u32 mask = Helper_Mask(inst.MB, inst.ME);
if (gpr.IsImm(inst.RS))
{
gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask);
gpr.SetImmediate(a, _rotl(gpr.GetImm(s), inst.SH) & mask);
if (inst.Rc)
ComputeRC(gpr.GetImm(inst.RA), 0);
ComputeRC(gpr.GetImm(a), 0);
return;
}
gpr.BindToRegister(inst.RA, inst.RA == inst.RS);
gpr.BindToRegister(a, a == s);
ARM64Reg WA = gpr.GetReg();
ArithOption Shift(gpr.R(inst.RS), ST_ROR, 32 - inst.SH);
ArithOption Shift(gpr.R(s), ST_ROR, 32 - inst.SH);
MOVI2R(WA, mask);
AND(gpr.R(inst.RA), WA, gpr.R(inst.RS), Shift);
AND(gpr.R(a), WA, gpr.R(s), Shift);
gpr.Unlock(WA);
if (inst.Rc)
ComputeRC(gpr.R(inst.RA), 0);
ComputeRC(gpr.R(a), 0);
}
void JitArm64::srawix(UGeckoInstruction inst)
@ -685,6 +683,7 @@ void JitArm64::subfx(UGeckoInstruction inst)
}
else
{
gpr.BindToRegister(d, d == a || d == b);
SUB(gpr.R(d), gpr.R(b), gpr.R(a));
if (inst.Rc)
ComputeRC(gpr.R(d), 0);

View File

@ -145,7 +145,10 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOVI2R(XA, imm_addr);
if (update)
{
gpr.BindToRegister(addr, false);
MOV(gpr.R(addr), addr_reg);
}
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -493,6 +496,8 @@ void JitArm64::stX(UGeckoInstruction inst)
if (update)
{
gpr.BindToRegister(a, false);
ARM64Reg WA = gpr.GetReg();
ARM64Reg RB;
ARM64Reg RA = gpr.R(a);

View File

@ -71,6 +71,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
u32 imm_addr = 0;
bool is_immediate = false;
fpr.BindToRegister(inst.FD, false);
ARM64Reg VD = fpr.R(inst.FD);
ARM64Reg addr_reg = W0;
@ -172,7 +173,10 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOVI2R(XA, imm_addr);
if (update)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
@ -360,7 +364,10 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOVI2R(XA, imm_addr);
if (update)
{
gpr.BindToRegister(a, false);
MOV(gpr.R(a), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();

View File

@ -57,7 +57,10 @@ void JitArm64::psq_l(UGeckoInstruction inst)
UBFM(scale_reg, scale_reg, 24, 29); // Scale
if (update)
{
gpr.BindToRegister(inst.RA, false);
MOV(arm_addr, addr_reg);
}
MOVI2R(X30, (u64)&asm_routines.pairedLoadQuantized[inst.W * 8]);
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
@ -125,7 +128,10 @@ void JitArm64::psq_st(UGeckoInstruction inst)
UBFM(scale_reg, scale_reg, 8, 13); // Scale
if (update)
{
gpr.BindToRegister(inst.RA, false);
MOV(arm_addr, addr_reg);
}
m_float_emit.FCVTN(32, D0, VS);

View File

@ -95,6 +95,7 @@ void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state)
if (reg.GetType() == REG_REG)
{
ARM64Reg host_reg = reg.GetReg();
if (reg.IsDirty())
m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
if (!maintain_state)
@ -169,6 +170,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
ARM64Reg host_reg = GetReg();
m_emit->MOVI2R(host_reg, reg.GetImm());
reg.LoadToReg(host_reg);
reg.SetDirty(true);
return host_reg;
}
break;
@ -178,6 +180,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
// This can also happen on cases where PPCAnalyst isn't feeing us proper register usage statistics
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
reg.SetDirty(false);
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
return host_reg;
}
@ -202,6 +205,7 @@ void Arm64GPRCache::BindToRegister(u32 preg, bool do_load)
{
OpArg& reg = m_guest_registers[preg];
reg.SetDirty(true);
if (reg.GetType() == REG_NOTLOADED)
{
ARM64Reg host_reg = GetReg();
@ -292,6 +296,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg)
{
ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg);
reg.SetDirty(false);
m_float_emit->LDR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
return host_reg;
}
@ -308,6 +313,7 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load)
{
OpArg& reg = m_guest_registers[preg];
reg.SetDirty(true);
if (reg.GetType() == REG_NOTLOADED)
{
ARM64Reg host_reg = GetReg();
@ -355,7 +361,9 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
{
ARM64Reg host_reg = reg.GetReg();
if (reg.IsDirty())
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
if (!maintain_state)
{
UnlockRegister(host_reg);

View File

@ -85,6 +85,9 @@ public:
void ResetLastUsed() { m_last_used = 0; }
void IncrementLastUsed() { ++m_last_used; }
void SetDirty(bool dirty) { m_dirty = dirty; }
bool IsDirty() { return m_dirty; }
private:
// For REG_REG
RegType m_type; // store type
@ -94,6 +97,8 @@ private:
u32 m_value; // IMM value
u32 m_last_used;
bool m_dirty;
};
class HostReg

View File

@ -51,6 +51,7 @@ void JitArm64::mtmsr(UGeckoInstruction inst)
// Don't interpret this, if we do we get thrown out
//JITDISABLE(bJITSystemRegistersOff)
gpr.BindToRegister(inst.RS, true);
STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(msr));
gpr.Flush(FlushMode::FLUSH_ALL);
@ -64,6 +65,7 @@ void JitArm64::mfmsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(msr));
}
@ -87,6 +89,7 @@ void JitArm64::mfsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RD, false);
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), X29, PPCSTATE_OFF(sr[inst.SR]));
}
@ -95,6 +98,7 @@ void JitArm64::mtsr(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
gpr.BindToRegister(inst.RS, true);
STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(sr[inst.SR]));
}
@ -103,13 +107,16 @@ void JitArm64::mfsrin(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 b = inst.RB, d = inst.RD;
gpr.BindToRegister(d, d == b);
ARM64Reg index = gpr.GetReg();
ARM64Reg index64 = EncodeRegTo64(index);
ARM64Reg RB = gpr.R(inst.RB);
ARM64Reg RB = gpr.R(b);
UBFM(index, RB, 28, 31);
ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2));
LDR(INDEX_UNSIGNED, gpr.R(inst.RD), index64, PPCSTATE_OFF(sr[0]));
LDR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0]));
gpr.Unlock(index);
}
@ -119,13 +126,16 @@ void JitArm64::mtsrin(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
u32 b = inst.RB, d = inst.RD;
gpr.BindToRegister(d, d == b);
ARM64Reg index = gpr.GetReg();
ARM64Reg index64 = EncodeRegTo64(index);
ARM64Reg RB = gpr.R(inst.RB);
ARM64Reg RB = gpr.R(b);
UBFM(index, RB, 28, 31);
ADD(index64, X29, index64, ArithOption(index64, ST_LSL, 2));
STR(INDEX_UNSIGNED, gpr.R(inst.RD), index64, PPCSTATE_OFF(sr[0]));
STR(INDEX_UNSIGNED, gpr.R(d), index64, PPCSTATE_OFF(sr[0]));
gpr.Unlock(index);
}