JitArm64: divwx - Optimize power-of-two divisors

Power-of-two divisors can be done more elegantly, so handle them
separately.
This commit is contained in:
JosJuice 2021-08-21 18:47:31 +02:00
parent eb8581c26d
commit 9889e7eb33
2 changed files with 31 additions and 1 deletions

View File

@ -896,7 +896,15 @@ public:
CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1)); CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
} }
void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); } void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
void NEG(ARM64Reg Rd, ARM64Reg Rs, ArithOption Option)
{
SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs, Option);
}
void NEGS(ARM64Reg Rd, ARM64Reg Rs) { SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); } void NEGS(ARM64Reg Rd, ARM64Reg Rs) { SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
void NEGS(ARM64Reg Rd, ARM64Reg Rs, ArithOption Option)
{
SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs, Option);
}
// Data-Processing 1 source // Data-Processing 1 source
void RBIT(ARM64Reg Rd, ARM64Reg Rn); void RBIT(ARM64Reg Rd, ARM64Reg Rn);
void REV16(ARM64Reg Rd, ARM64Reg Rn); void REV16(ARM64Reg Rd, ARM64Reg Rn);

View File

@ -1366,7 +1366,7 @@ void JitArm64::divwx(UGeckoInstruction inst)
if (inst.Rc) if (inst.Rc)
ComputeRC0(RD); ComputeRC0(RD);
} }
else if (gpr.IsImm(b) && gpr.GetImm(b) != UINT32_C(0x80000000)) else if (gpr.IsImm(b))
{ {
const s32 divisor = s32(gpr.GetImm(b)); const s32 divisor = s32(gpr.GetImm(b));
@ -1394,6 +1394,28 @@ void JitArm64::divwx(UGeckoInstruction inst)
gpr.Unlock(WA); gpr.Unlock(WA);
} }
else if (MathUtil::IsPow2(divisor) || MathUtil::IsPow2(-static_cast<s64>(divisor)))
{
const u32 abs_val = static_cast<u32>(std::abs(static_cast<s64>(divisor)));
ARM64Reg RA = gpr.R(a);
ARM64Reg RD = gpr.R(d);
const bool allocate_reg = a == d;
ARM64Reg WA = allocate_reg ? gpr.GetReg() : RD;
TST(RA, RA);
ADDI2R(WA, RA, abs_val - 1, WA);
CSEL(WA, RA, WA, CCFlags::CC_PL);
if (divisor < 0)
NEG(RD, WA, ArithOption(WA, ShiftType::ASR, IntLog2(abs_val)));
else
ASR(RD, WA, IntLog2(abs_val));
if (allocate_reg)
gpr.Unlock(WA);
}
else else
{ {
// Optimize signed 32-bit integer division by a constant // Optimize signed 32-bit integer division by a constant