Merge pull request #3956 from mmastrac/mov_sum

Add MOV optimizations and MOV_sum
This commit is contained in:
Markus Wick 2016-06-27 23:53:27 +02:00 committed by GitHub
commit 93a5efa347
5 changed files with 92 additions and 88 deletions

View File

@ -1540,6 +1540,13 @@ void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2)
}
void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2)
{
// Shortcut to zero a register
if (a2.IsZero() && a1.IsSimpleReg() && !flags_locked)
{
XOR(bits, a1, a1);
return;
}
if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
ERROR_LOG(DYNA_REC, "Redundant MOV @ %p - bug in JIT?", code);
WriteNormalOp(bits, nrmMOV, a1, a2);
@ -1572,6 +1579,76 @@ void XEmitter::CMP_or_TEST(int bits, const OpArg& a1, const OpArg& a2)
}
}
void XEmitter::MOV_sum(int bits, X64Reg dest, const OpArg& a1, const OpArg& a2)
{
// This stomps on flags, so ensure they aren't locked
_dbg_assert_(DYNA_REC, !flags_locked);
// Zero shortcuts (note that this can generate no code in the case where a1 == dest && a2 == zero
// or a2 == dest && a1 == zero)
if (a1.IsZero())
{
if (!a2.IsSimpleReg() || a2.GetSimpleReg() != dest)
{
MOV(bits, R(dest), a2);
}
return;
}
if (a2.IsZero())
{
if (!a1.IsSimpleReg() || a1.GetSimpleReg() != dest)
{
MOV(bits, R(dest), a1);
}
return;
}
// If dest == a1 or dest == a2 we can simplify this
if (a1.IsSimpleReg() && a1.GetSimpleReg() == dest)
{
ADD(bits, R(dest), a2);
return;
}
if (a2.IsSimpleReg() && a2.GetSimpleReg() == dest)
{
ADD(bits, R(dest), a1);
return;
}
// TODO: 32-bit optimizations may apply to other bit sizes (confirm)
if (bits == 32)
{
if (a1.IsImm() && a2.IsImm())
{
MOV(32, R(dest), Imm32(a1.Imm32() + a2.Imm32()));
return;
}
if (a1.IsSimpleReg() && a2.IsSimpleReg())
{
LEA(32, dest, MRegSum(a1.GetSimpleReg(), a2.GetSimpleReg()));
return;
}
if (a1.IsSimpleReg() && a2.IsImm())
{
LEA(32, dest, MDisp(a1.GetSimpleReg(), a2.Imm32()));
return;
}
if (a1.IsImm() && a2.IsSimpleReg())
{
LEA(32, dest, MDisp(a2.GetSimpleReg(), a1.Imm32()));
return;
}
}
// Fallback
MOV(bits, R(dest), a1);
ADD(bits, R(dest), a2);
}
void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)
{
CheckFlags();

View File

@ -310,6 +310,7 @@ struct OpArg
}
bool IsSimpleReg() const { return scale == SCALE_NONE; }
bool IsSimpleReg(X64Reg reg) const { return IsSimpleReg() && GetSimpleReg() == reg; }
bool IsZero() const { return IsImm() && offset == 0; }
int GetImmBits() const
{
switch (scale)
@ -639,6 +640,7 @@ public:
void TEST(int bits, const OpArg& a1, const OpArg& a2);
void CMP_or_TEST(int bits, const OpArg& a1, const OpArg& a2);
void MOV_sum(int bits, X64Reg dest, const OpArg& a1, const OpArg& a2);
// Are these useful at all? Consider removing.
void XCHG(int bits, const OpArg& a1, const OpArg& a2);

View File

@ -236,27 +236,9 @@ void Jit64::lXXx(UGeckoInstruction inst)
opAddress = R(RSCRATCH2);
storeAddress = true;
if (use_constant_offset)
{
if (gpr.R(a).IsSimpleReg() && offset != 0)
{
LEA(32, RSCRATCH2, MDisp(gpr.RX(a), offset));
}
MOV_sum(32, RSCRATCH2, gpr.R(a), Imm32((u32)offset));
else
{
MOV(32, opAddress, gpr.R(a));
if (offset != 0)
ADD(32, opAddress, Imm32((u32)offset));
}
}
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
}
else
{
MOV(32, opAddress, gpr.R(a));
ADD(32, opAddress, gpr.R(b));
}
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
}
}
}
@ -307,16 +289,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
X64Reg tmp = gpr.GetFreeXReg();
gpr.FlushLockX(tmp);
if (inst.RA && gpr.R(inst.RA).IsSimpleReg() && gpr.R(inst.RB).IsSimpleReg())
{
LEA(32, addr, MRegSum(gpr.RX(inst.RA), gpr.RX(inst.RB)));
}
else
{
MOV(32, R(addr), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(addr), gpr.R(inst.RA));
}
MOV_sum(32, addr, inst.RA ? gpr.R(inst.RA) : Imm32(0), gpr.R(inst.RB));
// Check whether a JIT cache line needs to be invalidated.
LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits)
@ -521,15 +494,7 @@ void Jit64::stXx(UGeckoInstruction inst)
if (update)
gpr.BindToRegister(a, true, true);
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
}
else
{
MOV(32, R(RSCRATCH2), gpr.R(a));
ADD(32, R(RSCRATCH2), gpr.R(b));
}
MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b));
int accessSize;
switch (inst.SUBOP10 & ~32)

View File

@ -50,14 +50,7 @@ void Jit64::lfXXX(UGeckoInstruction inst)
else
{
addr = R(RSCRATCH2);
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
else
{
MOV(32, addr, gpr.R(b));
if (a)
ADD(32, addr, gpr.R(a));
}
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
}
}
else
@ -162,14 +155,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
gpr.BindToRegister(a, true, true);
if (indexed)
{
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b)));
else
{
MOV(32, R(RSCRATCH2), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH2), gpr.R(a));
}
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
}
else
{
@ -209,9 +195,7 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
MOV(32, R(RSCRATCH2), gpr.R(b));
if (a)
ADD(32, R(RSCRATCH2), gpr.R(a));
MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b));
if (fpr.R(s).IsSimpleReg())
MOVD_xmm(R(RSCRATCH), fpr.RX(s));

View File

@ -43,21 +43,9 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
gpr.FlushLockX(RSCRATCH_EXTRA);
if (update)
gpr.BindToRegister(a, true, true);
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
if (indexed)
LEA(32, RSCRATCH_EXTRA, MRegSum(gpr.RX(a), gpr.RX(b)));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}
MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
@ -143,21 +131,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true);
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
if (indexed)
LEA(32, RSCRATCH_EXTRA, MRegSum(gpr.RX(a), gpr.RX(b)));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}
MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset));
// In memcheck mode, don't update the address until the exception check
if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));