From 1e08ad0d978683e096bc8161778b71f763cc52a6 Mon Sep 17 00:00:00 2001 From: Matt Mastracci Date: Sun, 26 Jun 2016 20:05:45 -0600 Subject: [PATCH] Add MOV optimization and MOV_sum Replaces incarnations of the A=B+C pattern throughout the code so we can apply optimizations consistently. --- Source/Core/Common/x64Emitter.cpp | 77 +++++++++++++++++++ Source/Core/Common/x64Emitter.h | 2 + .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 43 +---------- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 22 +----- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 36 ++------- 5 files changed, 92 insertions(+), 88 deletions(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 8ad1d46e40..9a4ca705a0 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1540,6 +1540,13 @@ void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) } void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) { + // Shortcut to zero a register + if (a2.IsZero() && a1.IsSimpleReg() && !flags_locked) + { + XOR(bits, a1, a1); + return; + } + if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) ERROR_LOG(DYNA_REC, "Redundant MOV @ %p - bug in JIT?", code); WriteNormalOp(bits, nrmMOV, a1, a2); @@ -1572,6 +1579,76 @@ void XEmitter::CMP_or_TEST(int bits, const OpArg& a1, const OpArg& a2) } } +void XEmitter::MOV_sum(int bits, X64Reg dest, const OpArg& a1, const OpArg& a2) +{ + // This stomps on flags, so ensure they aren't locked + _dbg_assert_(DYNA_REC, !flags_locked); + + // Zero shortcuts (note that this can generate no code in the case where a1 == dest && a2 == zero + // or a2 == dest && a1 == zero) + if (a1.IsZero()) + { + if (!a2.IsSimpleReg() || a2.GetSimpleReg() != dest) + { + MOV(bits, R(dest), a2); + } + return; + } + if (a2.IsZero()) + { + if (!a1.IsSimpleReg() || a1.GetSimpleReg() != dest) + { + MOV(bits, R(dest), a1); + } + return; + } + + // If dest == a1 or dest == a2 we can simplify this + if (a1.IsSimpleReg() && a1.GetSimpleReg() == dest) + { + ADD(bits, R(dest), a2); + return; + } + + if (a2.IsSimpleReg() && a2.GetSimpleReg() == dest) + { + ADD(bits, R(dest), a1); + return; + } + + // TODO: 32-bit optimizations may apply to other bit sizes (confirm) + if (bits == 32) + { + if (a1.IsImm() && a2.IsImm()) + { + MOV(32, R(dest), Imm32(a1.Imm32() + a2.Imm32())); + return; + } + + if (a1.IsSimpleReg() && a2.IsSimpleReg()) + { + LEA(32, dest, MRegSum(a1.GetSimpleReg(), a2.GetSimpleReg())); + return; + } + + if (a1.IsSimpleReg() && a2.IsImm()) + { + LEA(32, dest, MDisp(a1.GetSimpleReg(), a2.Imm32())); + return; + } + + if (a1.IsImm() && a2.IsSimpleReg()) + { + LEA(32, dest, MDisp(a2.GetSimpleReg(), a1.Imm32())); + return; + } + } + + // Fallback + MOV(bits, R(dest), a1); + ADD(bits, R(dest), a2); +} + void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) { CheckFlags(); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 73f1d69721..7e717ad62c 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -310,6 +310,7 @@ struct OpArg } bool IsSimpleReg() const { return scale == SCALE_NONE; } bool IsSimpleReg(X64Reg reg) const { return IsSimpleReg() && GetSimpleReg() == reg; } + bool IsZero() const { return IsImm() && offset == 0; } int GetImmBits() const { switch (scale) @@ -639,6 +640,7 @@ public: void TEST(int bits, const OpArg& a1, const OpArg& a2); void CMP_or_TEST(int bits, const OpArg& a1, const OpArg& a2); + void MOV_sum(int bits, X64Reg dest, const OpArg& a1, const OpArg& a2); // Are these useful at all? Consider removing. void XCHG(int bits, const OpArg& a1, const OpArg& a2); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index b28297df0a..32a19b7a2b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -236,27 +236,9 @@ void Jit64::lXXx(UGeckoInstruction inst) opAddress = R(RSCRATCH2); storeAddress = true; if (use_constant_offset) - { - if (gpr.R(a).IsSimpleReg() && offset != 0) - { - LEA(32, RSCRATCH2, MDisp(gpr.RX(a), offset)); - } - else - { - MOV(32, opAddress, gpr.R(a)); - if (offset != 0) - ADD(32, opAddress, Imm32((u32)offset)); - } - } - else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - { - LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b))); - } + MOV_sum(32, RSCRATCH2, gpr.R(a), Imm32((u32)offset)); else - { - MOV(32, opAddress, gpr.R(a)); - ADD(32, opAddress, gpr.R(b)); - } + MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b)); } } } @@ -307,16 +289,7 @@ void Jit64::dcbx(UGeckoInstruction inst) X64Reg tmp = gpr.GetFreeXReg(); gpr.FlushLockX(tmp); - if (inst.RA && gpr.R(inst.RA).IsSimpleReg() && gpr.R(inst.RB).IsSimpleReg()) - { - LEA(32, addr, MRegSum(gpr.RX(inst.RA), gpr.RX(inst.RB))); - } - else - { - MOV(32, R(addr), gpr.R(inst.RB)); - if (inst.RA) - ADD(32, R(addr), gpr.R(inst.RA)); - } + MOV_sum(32, addr, inst.RA ? gpr.R(inst.RA) : Imm32(0), gpr.R(inst.RB)); // Check whether a JIT cache line needs to be invalidated. LEA(32, value, MScaled(addr, SCALE_8, 0)); // addr << 3 (masks the first 3 bits) @@ -521,15 +494,7 @@ void Jit64::stXx(UGeckoInstruction inst) if (update) gpr.BindToRegister(a, true, true); - if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - { - LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b))); - } - else - { - MOV(32, R(RSCRATCH2), gpr.R(a)); - ADD(32, R(RSCRATCH2), gpr.R(b)); - } + MOV_sum(32, RSCRATCH2, gpr.R(a), gpr.R(b)); int accessSize; switch (inst.SUBOP10 & ~32) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index aba308c458..2e6f5bbf01 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -50,14 +50,7 @@ void Jit64::lfXXX(UGeckoInstruction inst) else { addr = R(RSCRATCH2); - if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b))); - else - { - MOV(32, addr, gpr.R(b)); - if (a) - ADD(32, addr, gpr.R(a)); - } + MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b)); } } else @@ -162,14 +155,7 @@ void Jit64::stfXXX(UGeckoInstruction inst) gpr.BindToRegister(a, true, true); if (indexed) { - if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) - LEA(32, RSCRATCH2, MRegSum(gpr.RX(a), gpr.RX(b))); - else - { - MOV(32, R(RSCRATCH2), gpr.R(b)); - if (a) - ADD(32, R(RSCRATCH2), gpr.R(a)); - } + MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b)); } else { @@ -209,9 +195,7 @@ void Jit64::stfiwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; - MOV(32, R(RSCRATCH2), gpr.R(b)); - if (a) - ADD(32, R(RSCRATCH2), gpr.R(a)); + MOV_sum(32, RSCRATCH2, a ? gpr.R(a) : Imm32(0), gpr.R(b)); if (fpr.R(s).IsSimpleReg()) MOVD_xmm(R(RSCRATCH), fpr.RX(s)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 0784fa1f77..4e35329187 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -43,21 +43,9 @@ void Jit64::psq_stXX(UGeckoInstruction inst) gpr.FlushLockX(RSCRATCH_EXTRA); if (update) gpr.BindToRegister(a, true, true); - if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset)) - { - if (indexed) - LEA(32, RSCRATCH_EXTRA, MRegSum(gpr.RX(a), gpr.RX(b))); - else - LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); - } - else - { - MOV(32, R(RSCRATCH_EXTRA), gpr.R(a)); - if (indexed) - ADD(32, R(RSCRATCH_EXTRA), gpr.R(b)); - else if (offset) - ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); - } + + MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset)); + // In memcheck mode, don't update the address until the exception check if (update && !jo.memcheck) MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); @@ -143,21 +131,9 @@ void Jit64::psq_lXX(UGeckoInstruction inst) gpr.FlushLockX(RSCRATCH_EXTRA); gpr.BindToRegister(a, true, update); fpr.BindToRegister(s, false, true); - if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset)) - { - if (indexed) - LEA(32, RSCRATCH_EXTRA, MRegSum(gpr.RX(a), gpr.RX(b))); - else - LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset)); - } - else - { - MOV(32, R(RSCRATCH_EXTRA), gpr.R(a)); - if (indexed) - ADD(32, R(RSCRATCH_EXTRA), gpr.R(b)); - else if (offset) - ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset)); - } + + MOV_sum(32, RSCRATCH_EXTRA, gpr.R(a), indexed ? gpr.R(b) : Imm32((u32)offset)); + // In memcheck mode, don't update the address until the exception check if (update && !jo.memcheck) MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));