JitArm64: Avoid MOVI2R is possible.

Just use all kind of ADDI2R, SUBI2R, ...
They have some optimizations internally.
This commit is contained in:
degasus 2016-10-26 20:47:03 +02:00
parent 838b234317
commit df250b84cc
9 changed files with 75 additions and 256 deletions

View File

@ -4145,6 +4145,24 @@ void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
}
}
void ARM64XEmitter::ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
{
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift))
{
ADDS(Rd, Rn, val, shift);
}
else
{
_assert_msg_(DYNA_REC, scratch != INVALID_REG,
"ADDSI2R - failed to construct arithmetic immediate value from %08x, need scratch",
(u32)imm);
MOVI2R(scratch, imm);
ADDS(Rd, Rn, scratch);
}
}
void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
{
u32 val;
@ -4163,6 +4181,23 @@ void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
}
}
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
{
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift))
{
SUBS(Rd, Rn, val, shift);
}
else
{
_assert_msg_(DYNA_REC, scratch != INVALID_REG,
"ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
SUBS(Rd, Rn, scratch);
}
}
void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
{
u32 val;
@ -4320,21 +4355,4 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch)
DUP(32, Rd, Rd, 0);
}
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
{
u32 val;
bool shift;
if (IsImmArithmetic(imm, &val, &shift))
{
SUBS(Rd, Rn, val, shift);
}
else
{
_assert_msg_(DYNA_REC, scratch != INVALID_REG,
"ANDSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
MOVI2R(scratch, imm);
SUBS(Rd, Rn, scratch);
}
}
} // namespace

View File

@ -855,6 +855,7 @@ public:
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);

View File

@ -183,20 +183,10 @@ void JitArm64::DoDownCount()
{
ARM64Reg WA = gpr.GetReg();
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
if (js.downcountAmount < 4096) // We can enlarge this if we used rotations
{
SUBS(WA, WA, js.downcountAmount);
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
}
else
{
ARM64Reg WB = gpr.GetReg();
MOVI2R(WB, js.downcountAmount);
SUBS(WA, WA, WB);
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
gpr.Unlock(WB);
}
gpr.Unlock(WA);
ARM64Reg WB = gpr.GetReg();
SUBSI2R(WA, WA, js.downcountAmount, WB);
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
gpr.Unlock(WA, WB);
}
// Exits

View File

@ -54,17 +54,14 @@ void JitArm64::rfi(UGeckoInstruction inst)
ARM64Reg WB = gpr.GetReg();
ARM64Reg WC = gpr.GetReg();
MOVI2R(WA, (~mask) & clearMSR13);
MOVI2R(WB, mask & clearMSR13);
LDR(INDEX_UNSIGNED, WC, PPC_REG, PPCSTATE_OFF(msr));
AND(WC, WC, WA); // rD = Masked MSR
ANDI2R(WC, WC, (~mask) & clearMSR13, WA); // rD = Masked MSR
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here
AND(WA, WA, WB); // rB contains masked SRR1 here
ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1
ANDI2R(WA, WA, mask & clearMSR13, WB); // rB contains masked SRR1 here
ORR(WA, WA, WC); // rB = Masked MSR OR masked SRR1
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr)); // STR rB in to rA

View File

@ -154,29 +154,9 @@ void JitArm64::addix(UGeckoInstruction inst)
{
gpr.BindToRegister(d, d == a);
if (imm < 4096)
{
ADD(gpr.R(d), gpr.R(a), imm);
}
else if (imm % 4096 == 0 && imm < 4096 * 4096)
{
ADD(gpr.R(d), gpr.R(a), imm / 4096, true);
}
else if (imm_neg < 4096)
{
SUB(gpr.R(d), gpr.R(a), imm_neg);
}
else if (imm_neg % 4096 == 0 && imm_neg < 4096 * 4096)
{
SUB(gpr.R(d), gpr.R(a), imm_neg / 4096, true);
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, imm);
ADD(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}
ARM64Reg WA = gpr.GetReg();
ADDI2R(gpr.R(d), gpr.R(a), imm, WA);
gpr.Unlock(WA);
}
}
else
@ -316,17 +296,9 @@ void JitArm64::addx(UGeckoInstruction inst)
int imm_reg = gpr.IsImm(a) ? a : b;
int in_reg = gpr.IsImm(a) ? b : a;
gpr.BindToRegister(d, d == in_reg);
if (gpr.GetImm(imm_reg) < 4096)
{
ADD(gpr.R(d), gpr.R(in_reg), gpr.GetImm(imm_reg));
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, gpr.GetImm(imm_reg));
ADD(gpr.R(d), gpr.R(in_reg), WA);
gpr.Unlock(WA);
}
ARM64Reg WA = gpr.GetReg();
ADDI2R(gpr.R(d), gpr.R(in_reg), gpr.GetImm(imm_reg), WA);
gpr.Unlock(WA);
if (inst.Rc)
ComputeRC(gpr.R(d), 0);
}
@ -479,15 +451,7 @@ void JitArm64::cmpi(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 4096)
{
SUB(WA, gpr.R(a), inst.SIMM_16);
}
else
{
MOVI2R(WA, inst.SIMM_16);
SUB(WA, gpr.R(a), WA);
}
SUBI2R(WA, gpr.R(a), inst.SIMM_16, WA);
ComputeRC(WA, crf);
@ -516,15 +480,7 @@ void JitArm64::cmpli(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
if (inst.UIMM < 4096)
{
SUB(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM);
}
else
{
MOVI2R(WA, inst.UIMM);
SUB(XA, EncodeRegTo64(gpr.R(a)), XA);
}
SUBI2R(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM, XA);
STR(INDEX_UNSIGNED, XA, PPC_REG,
PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
@ -664,24 +620,9 @@ void JitArm64::addic(UGeckoInstruction inst)
else
{
gpr.BindToRegister(d, d == a);
if (imm < 4096)
{
ADDS(gpr.R(d), gpr.R(a), imm);
}
else if (simm > -4096 && simm < 0)
{
SUBS(gpr.R(d), gpr.R(a), std::abs(simm));
}
else
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, std::abs(simm));
if (simm < 0)
SUBS(gpr.R(d), gpr.R(a), WA);
else
ADDS(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
}
ARM64Reg WA = gpr.GetReg();
ADDSI2R(gpr.R(d), gpr.R(a), simm, WA);
gpr.Unlock(WA);
ComputeCarry();
if (rc)
@ -850,10 +791,9 @@ void JitArm64::subfex(UGeckoInstruction inst)
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.BindToRegister(d, false);
MOVI2R(gpr.R(d), ~i + j);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADD(gpr.R(d), gpr.R(d), WA);
ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d));
gpr.Unlock(WA);
bool must_have_carry = Interpreter::Helper_Carry(~i, j);
@ -971,10 +911,9 @@ void JitArm64::addex(UGeckoInstruction inst)
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.BindToRegister(d, false);
MOVI2R(gpr.R(d), i + j);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADD(gpr.R(d), gpr.R(d), WA);
ADDI2R(gpr.R(d), WA, i + j, gpr.R(d));
gpr.Unlock(WA);
bool must_have_carry = Interpreter::Helper_Carry(i, j);

View File

@ -58,19 +58,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
else
{
if (offset >= 0 && offset < 4096)
{
ADD(addr_reg, up_reg, offset);
}
else if (offset < 0 && offset > -4096)
{
SUB(addr_reg, up_reg, std::abs(offset));
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, up_reg);
}
ADDI2R(addr_reg, up_reg, offset, addr_reg);
}
}
else
@ -91,28 +79,12 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg))
{
u32 reg_offset = gpr.GetImm(addr);
if (reg_offset < 4096)
{
ADD(addr_reg, off_reg, reg_offset);
}
else
{
MOVI2R(addr_reg, gpr.GetImm(addr));
ADD(addr_reg, addr_reg, off_reg);
}
ADDI2R(addr_reg, off_reg, reg_offset, addr_reg);
}
else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
u32 reg_offset = gpr.GetImm(offsetReg);
if (reg_offset < 4096)
{
ADD(addr_reg, up_reg, reg_offset);
}
else
{
MOVI2R(addr_reg, gpr.GetImm(offsetReg));
ADD(addr_reg, addr_reg, up_reg);
}
ADDI2R(addr_reg, up_reg, reg_offset, addr_reg);
}
else
{
@ -202,19 +174,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
else
{
if (offset >= 0 && offset < 4096)
{
ADD(addr_reg, reg_dest, offset);
}
else if (offset < 0 && offset > -4096)
{
SUB(addr_reg, reg_dest, std::abs(offset));
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, reg_dest);
}
ADDI2R(addr_reg, reg_dest, offset, addr_reg);
}
}
else
@ -235,28 +195,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset))
{
u32 reg_offset = gpr.GetImm(dest);
if (reg_offset < 4096)
{
ADD(addr_reg, reg_off, reg_offset);
}
else
{
MOVI2R(addr_reg, reg_offset);
ADD(addr_reg, addr_reg, reg_off);
}
ADDI2R(addr_reg, reg_off, reg_offset, addr_reg);
}
else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
u32 reg_offset = gpr.GetImm(regOffset);
if (reg_offset < 4096)
{
ADD(addr_reg, reg_dest, reg_offset);
}
else
{
MOVI2R(addr_reg, gpr.GetImm(regOffset));
ADD(addr_reg, addr_reg, reg_dest);
}
ADDI2R(addr_reg, reg_dest, reg_offset, addr_reg);
}
else
{
@ -526,8 +470,7 @@ void JitArm64::stX(UGeckoInstruction inst)
RB = gpr.R(regOffset);
if (regOffset == -1)
{
MOVI2R(WA, offset);
ADD(RA, RA, WA);
ADDI2R(RA, RA, offset, WA);
}
else
{
@ -572,14 +515,13 @@ void JitArm64::lmw(UGeckoInstruction inst)
SUB(WA, WA, remaining, true);
}
}
ADD(XA, XA, MEM_REG);
}
else
{
MOVI2R(WA, (u32)(s32)(s16)inst.SIMM_16);
ADDI2R(XA, MEM_REG, (u32)(s32)(s16)inst.SIMM_16, XA);
}
ADD(XA, XA, MEM_REG);
for (int i = inst.RD; i < 32; i++)
{
int remaining = 32 - i;
@ -660,14 +602,13 @@ void JitArm64::stmw(UGeckoInstruction inst)
SUB(WA, WA, remaining, true);
}
}
ADD(XA, XA, MEM_REG);
}
else
{
MOVI2R(WA, (u32)(s32)(s16)inst.SIMM_16);
ADDI2R(XA, MEM_REG, (u32)(s32)(s16)inst.SIMM_16, XA);
}
ADD(XA, XA, MEM_REG);
for (int i = inst.RD; i < 32; i++)
{
ARM64Reg RX = gpr.R(i);
@ -782,15 +723,7 @@ void JitArm64::dcbz(UGeckoInstruction inst)
// Only one register is an immediate
ARM64Reg base = is_imm_a ? gpr.R(b) : gpr.R(a);
u32 imm_offset = is_imm_a ? gpr.GetImm(a) : gpr.GetImm(b);
if (imm_offset < 4096)
{
ADD(addr_reg, base, imm_offset);
}
else
{
MOVI2R(addr_reg, imm_offset);
ADD(addr_reg, addr_reg, base);
}
ADDI2R(addr_reg, base, imm_offset, addr_reg);
}
else
{

View File

@ -101,19 +101,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
{
if (offset_reg == -1)
{
if (offset >= 0 && offset < 4096)
{
ADD(addr_reg, gpr.R(a), offset);
}
else if (offset < 0 && offset > -4096)
{
SUB(addr_reg, gpr.R(a), std::abs(offset));
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{
@ -132,19 +120,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
}
else if (a)
{
if (offset >= 0 && offset < 4096)
{
ADD(addr_reg, gpr.R(a), offset);
}
else if (offset < 0 && offset > -4096)
{
SUB(addr_reg, gpr.R(a), std::abs(offset));
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{
@ -298,19 +274,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
{
if (offset_reg == -1)
{
if (offset >= 0 && offset < 4096)
{
ADD(addr_reg, gpr.R(a), offset);
}
else if (offset < 0 && offset > -4096)
{
SUB(addr_reg, gpr.R(a), std::abs(offset));
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{
@ -329,19 +293,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
}
else if (a)
{
if (offset >= 0 && offset < 4096)
{
ADD(addr_reg, gpr.R(a), offset);
}
else if (offset < 0 && offset > -4096)
{
SUB(addr_reg, gpr.R(a), std::abs(offset));
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, gpr.R(a));
}
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{

View File

@ -151,16 +151,7 @@ void JitArm64::twx(UGeckoInstruction inst)
if (inst.OPCD == 3) // twi
{
if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 4096)
{
// Can fit in immediate in to the instruction encoding
CMP(gpr.R(a), inst.SIMM_16);
}
else
{
MOVI2R(WA, (s32)(s16)inst.SIMM_16);
CMP(gpr.R(a), WA);
}
CMPI2R(gpr.R(a), (s32)(s16)inst.SIMM_16, WA);
}
else // tw
{

View File

@ -64,8 +64,7 @@ private:
}
else
{
m_emit->MOVI2R(W1, mask);
m_emit->AND(W1, m_src_reg, W1, ArithOption(W1, ST_LSL, 0));
m_emit->ANDI2R(W1, m_src_reg, mask, W1);
StoreFromRegister(sbits, W1);
}
}
@ -159,8 +158,7 @@ private:
else
{
LoadToRegister(sbits, true);
m_emit->MOVI2R(W0, mask);
m_emit->AND(m_dst_reg, m_dst_reg, W0, ArithOption(W0, ST_LSL, 0));
m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, W0);
if (m_sign_extend)
m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);
}