Jit64: divwx - Optimize power-of-two divisors

Power-of-two divisors can be done more elegantly, so handle them
separately.

- Division by 4
Before:
41 BD 04 00 00 00    mov         r13d,4
41 8B C0             mov         eax,r8d
45 85 ED             test        r13d,r13d
74 0D                je          overflow
3D 00 00 00 80       cmp         eax,80000000h
75 0E                jne         normal_path
41 83 FD FF          cmp         r13d,0FFFFFFFFh
75 08                jne         normal_path
overflow:
C1 F8 1F             sar         eax,1Fh
44 8B E8             mov         r13d,eax
EB 07                jmp         done
normal_path:
99                   cdq
41 F7 FD             idiv        eax,r13d
44 8B E8             mov         r13d,eax
done:

After:
45 85 C0             test        r8d,r8d
45 8D 68 03          lea         r13d,[r8+3]
45 0F 49 E8          cmovns      r13d,r8d
41 C1 FD 02          sar         r13d,2
This commit is contained in:
Sintendo 2021-03-04 22:16:52 +01:00
parent 530475dce8
commit 0637a7ec59
1 changed files with 21 additions and 0 deletions

View File

@ -1460,6 +1460,27 @@ void Jit64::divwx(UGeckoInstruction inst)
SetJumpTarget(done);
}
else if (MathUtil::IsPow2(divisor) || MathUtil::IsPow2(-divisor))
{
u32 abs_val = std::abs(divisor);
X64Reg tmp = RSCRATCH;
if (Ra.IsSimpleReg() && Ra.GetSimpleReg() != Rd)
tmp = Ra.GetSimpleReg();
else
MOV(32, R(tmp), Ra);
TEST(32, R(tmp), R(tmp));
LEA(32, Rd, MDisp(tmp, abs_val - 1));
CMOVcc(32, Rd, R(tmp), CC_NS);
SAR(32, Rd, Imm8(IntLog2(abs_val)));
if (divisor < 0)
NEG(32, Rd);
if (inst.OE)
GenerateConstantOverflow(false);
}
else
{
// Optimize signed 32-bit integer division by a constant