JitArm64: divwx - Optimize constant dividend
When the dividend is known at compile time, we can eliminate some of the branching and precompute the result for the overflow case.
This commit is contained in:
parent
559de262a1
commit
09cdb076a3
|
@ -1373,6 +1373,10 @@ void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift)
|
|||
{
|
||||
EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? ARM64Reg::SP : ARM64Reg::WSP);
|
||||
}
|
||||
void ARM64XEmitter::CMN(ARM64Reg Rn, u32 imm, bool shift)
|
||||
{
|
||||
EncodeAddSubImmInst(0, true, shift, imm, Rn, Is64Bit(Rn) ? ARM64Reg::SP : ARM64Reg::WSP);
|
||||
}
|
||||
|
||||
// Data Processing (Immediate)
|
||||
void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos)
|
||||
|
|
|
@ -1006,6 +1006,7 @@ public:
|
|||
void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
|
||||
void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
|
||||
void CMP(ARM64Reg Rn, u32 imm, bool shift = false);
|
||||
void CMN(ARM64Reg Rn, u32 imm, bool shift = false);
|
||||
|
||||
// Data Processing (Immediate)
|
||||
void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = ShiftAmount::Shift0);
|
||||
|
|
|
@ -1327,6 +1327,36 @@ void JitArm64::divwx(UGeckoInstruction inst)
|
|||
if (inst.Rc)
|
||||
ComputeRC0(imm_d);
|
||||
}
|
||||
else if (gpr.IsImm(a))
|
||||
{
|
||||
const u32 dividend = gpr.GetImm(a);
|
||||
|
||||
gpr.BindToRegister(d, d == b);
|
||||
|
||||
ARM64Reg RB = gpr.R(b);
|
||||
ARM64Reg RD = gpr.R(d);
|
||||
|
||||
FixupBranch overflow1 = CBZ(RB);
|
||||
FixupBranch overflow2;
|
||||
if (dividend == 0x80000000)
|
||||
{
|
||||
CMN(RB, 1);
|
||||
overflow2 = B(CC_EQ);
|
||||
}
|
||||
SDIV(RD, gpr.R(a), RB);
|
||||
FixupBranch done = B();
|
||||
|
||||
SetJumpTarget(overflow1);
|
||||
if (dividend == 0x80000000)
|
||||
SetJumpTarget(overflow2);
|
||||
|
||||
MOVI2R(RD, dividend & 0x80000000 ? 0xFFFFFFFF : 0);
|
||||
|
||||
SetJumpTarget(done);
|
||||
|
||||
if (inst.Rc)
|
||||
ComputeRC0(RD);
|
||||
}
|
||||
else if (gpr.IsImm(b) && gpr.GetImm(b) != 0 && gpr.GetImm(b) != UINT32_C(0xFFFFFFFF))
|
||||
{
|
||||
ARM64Reg WA = gpr.GetReg();
|
||||
|
@ -1352,16 +1382,16 @@ void JitArm64::divwx(UGeckoInstruction inst)
|
|||
ARM64Reg RB = gpr.R(b);
|
||||
ARM64Reg RD = gpr.R(d);
|
||||
|
||||
FixupBranch slow1 = CBZ(RB);
|
||||
FixupBranch overflow1 = CBZ(RB);
|
||||
MOVI2R(WA, -0x80000000LL);
|
||||
CMP(RA, WA);
|
||||
CCMN(RB, 1, 0, CC_EQ);
|
||||
FixupBranch slow2 = B(CC_EQ);
|
||||
FixupBranch overflow2 = B(CC_EQ);
|
||||
SDIV(RD, RA, RB);
|
||||
FixupBranch done = B();
|
||||
|
||||
SetJumpTarget(slow1);
|
||||
SetJumpTarget(slow2);
|
||||
SetJumpTarget(overflow1);
|
||||
SetJumpTarget(overflow2);
|
||||
|
||||
ASR(RD, RA, 31);
|
||||
|
||||
|
|
Loading…
Reference in New Issue