Jit64: divwx - Optimize power-of-two divisors
Power-of-two divisors can be done more elegantly, so handle them separately. - Division by 4 Before: 41 BD 04 00 00 00 mov r13d,4 41 8B C0 mov eax,r8d 45 85 ED test r13d,r13d 74 0D je overflow 3D 00 00 00 80 cmp eax,80000000h 75 0E jne normal_path 41 83 FD FF cmp r13d,0FFFFFFFFh 75 08 jne normal_path overflow: C1 F8 1F sar eax,1Fh 44 8B E8 mov r13d,eax EB 07 jmp done normal_path: 99 cdq 41 F7 FD idiv eax,r13d 44 8B E8 mov r13d,eax done: After: 45 85 C0 test r8d,r8d 45 8D 68 03 lea r13d,[r8+3] 45 0F 49 E8 cmovns r13d,r8d 41 C1 FD 02 sar r13d,2
This commit is contained in:
parent
530475dce8
commit
0637a7ec59
|
@ -1460,6 +1460,27 @@ void Jit64::divwx(UGeckoInstruction inst)
|
||||||
|
|
||||||
SetJumpTarget(done);
|
SetJumpTarget(done);
|
||||||
}
|
}
|
||||||
|
else if (MathUtil::IsPow2(divisor) || MathUtil::IsPow2(-divisor))
|
||||||
|
{
|
||||||
|
u32 abs_val = std::abs(divisor);
|
||||||
|
|
||||||
|
X64Reg tmp = RSCRATCH;
|
||||||
|
if (Ra.IsSimpleReg() && Ra.GetSimpleReg() != Rd)
|
||||||
|
tmp = Ra.GetSimpleReg();
|
||||||
|
else
|
||||||
|
MOV(32, R(tmp), Ra);
|
||||||
|
|
||||||
|
TEST(32, R(tmp), R(tmp));
|
||||||
|
LEA(32, Rd, MDisp(tmp, abs_val - 1));
|
||||||
|
CMOVcc(32, Rd, R(tmp), CC_NS);
|
||||||
|
SAR(32, Rd, Imm8(IntLog2(abs_val)));
|
||||||
|
|
||||||
|
if (divisor < 0)
|
||||||
|
NEG(32, Rd);
|
||||||
|
|
||||||
|
if (inst.OE)
|
||||||
|
GenerateConstantOverflow(false);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Optimize signed 32-bit integer division by a constant
|
// Optimize signed 32-bit integer division by a constant
|
||||||
|
|
Loading…
Reference in New Issue