JitArm64: divwx - Optimize power-of-two divisors
Power-of-two divisors can be done more elegantly, so handle them separately.
This commit is contained in:
parent
eb8581c26d
commit
9889e7eb33
|
@ -896,7 +896,15 @@ public:
|
||||||
CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
|
CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
|
||||||
}
|
}
|
||||||
void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
|
void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
|
||||||
|
void NEG(ARM64Reg Rd, ARM64Reg Rs, ArithOption Option)
|
||||||
|
{
|
||||||
|
SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs, Option);
|
||||||
|
}
|
||||||
void NEGS(ARM64Reg Rd, ARM64Reg Rs) { SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
|
void NEGS(ARM64Reg Rd, ARM64Reg Rs) { SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
|
||||||
|
void NEGS(ARM64Reg Rd, ARM64Reg Rs, ArithOption Option)
|
||||||
|
{
|
||||||
|
SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs, Option);
|
||||||
|
}
|
||||||
// Data-Processing 1 source
|
// Data-Processing 1 source
|
||||||
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
|
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
void REV16(ARM64Reg Rd, ARM64Reg Rn);
|
void REV16(ARM64Reg Rd, ARM64Reg Rn);
|
||||||
|
|
|
@ -1366,7 +1366,7 @@ void JitArm64::divwx(UGeckoInstruction inst)
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC0(RD);
|
ComputeRC0(RD);
|
||||||
}
|
}
|
||||||
else if (gpr.IsImm(b) && gpr.GetImm(b) != UINT32_C(0x80000000))
|
else if (gpr.IsImm(b))
|
||||||
{
|
{
|
||||||
const s32 divisor = s32(gpr.GetImm(b));
|
const s32 divisor = s32(gpr.GetImm(b));
|
||||||
|
|
||||||
|
@ -1394,6 +1394,28 @@ void JitArm64::divwx(UGeckoInstruction inst)
|
||||||
|
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
|
else if (MathUtil::IsPow2(divisor) || MathUtil::IsPow2(-static_cast<s64>(divisor)))
|
||||||
|
{
|
||||||
|
const u32 abs_val = static_cast<u32>(std::abs(static_cast<s64>(divisor)));
|
||||||
|
|
||||||
|
ARM64Reg RA = gpr.R(a);
|
||||||
|
ARM64Reg RD = gpr.R(d);
|
||||||
|
|
||||||
|
const bool allocate_reg = a == d;
|
||||||
|
ARM64Reg WA = allocate_reg ? gpr.GetReg() : RD;
|
||||||
|
|
||||||
|
TST(RA, RA);
|
||||||
|
ADDI2R(WA, RA, abs_val - 1, WA);
|
||||||
|
CSEL(WA, RA, WA, CCFlags::CC_PL);
|
||||||
|
|
||||||
|
if (divisor < 0)
|
||||||
|
NEG(RD, WA, ArithOption(WA, ShiftType::ASR, IntLog2(abs_val)));
|
||||||
|
else
|
||||||
|
ASR(RD, WA, IntLog2(abs_val));
|
||||||
|
|
||||||
|
if (allocate_reg)
|
||||||
|
gpr.Unlock(WA);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Optimize signed 32-bit integer division by a constant
|
// Optimize signed 32-bit integer division by a constant
|
||||||
|
|
Loading…
Reference in New Issue