Jit64: addx revisited

This doesn't really add any new optimizations, but fixes an issue that
prevented the optimizations introduced in #8551 and #8755 from being
applied in specific cases. A similar issue was solved for subfx as part
of #9425.

Consider the case where the destination register is also an input
register and happens to hold an immediate value. This results in a set
of constraints that forces the RegCache to allocate a register and move
the immediate value into it for us. By the time we check for immediate
values in the JIT, we're too late.

We solve this by refactoring the code in such a way that we can check
for immediates before involving the RegCache.

- Example 1
Before:
41 BF 00 68 00 CC    mov         r15d,0CC006800h
44 03 FF             add         r15d,edi

After:
44 8D BF 00 68 00 CC lea         r15d,[rdi-33FF9800h]

- Example 2
Before:
41 BE 00 00 00 00    mov         r14d,0
44 03 F7             add         r14d,edi

After:
44 8B F7             mov         r14d,edi

- Example 3
Before:
41 BD 03 00 00 00    mov         r13d,3
44 03 6D 8C          add         r13d,dword ptr [rbp-74h]

After:
44 8B 6D 8C          mov         r13d,dword ptr [rbp-74h]
41 83 C5 03          add         r13d,3
This commit is contained in:
Sintendo 2021-01-22 23:41:22 +01:00
parent caff472dbf
commit 6a51c1564f
1 changed files with 43 additions and 39 deletions

View File

@ -1398,6 +1398,47 @@ void Jit64::addx(UGeckoInstruction inst)
if (inst.OE) if (inst.OE)
GenerateConstantOverflow((s64)i + (s64)j); GenerateConstantOverflow((s64)i + (s64)j);
} }
else if (gpr.IsImm(a) || gpr.IsImm(b))
{
auto [i, j] = gpr.IsImm(a) ? std::pair(a, b) : std::pair(b, a);
s32 imm = gpr.SImm32(i);
RCOpArg Rj = gpr.Use(j, RCMode::Read);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Rj, Rd);
if (imm == 0)
{
if (d != j)
MOV(32, Rd, Rj);
if (inst.OE)
GenerateConstantOverflow(false);
}
else if (d == j)
{
ADD(32, Rd, Imm32(imm));
if (inst.OE)
GenerateOverflow();
}
else if (Rj.IsSimpleReg() && !inst.OE)
{
LEA(32, Rd, MDisp(Rj.GetSimpleReg(), imm));
}
else if (imm >= -128 && imm <= 127)
{
MOV(32, Rd, Rj);
ADD(32, Rd, Imm32(imm));
if (inst.OE)
GenerateOverflow();
}
else
{
MOV(32, Rd, Imm32(imm));
ADD(32, Rd, Rj);
if (inst.OE)
GenerateOverflow();
}
}
else else
{ {
RCOpArg Ra = gpr.Use(a, RCMode::Read); RCOpArg Ra = gpr.Use(a, RCMode::Read);
@ -1405,52 +1446,15 @@ void Jit64::addx(UGeckoInstruction inst)
RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rb, Rd); RegCache::Realize(Ra, Rb, Rd);
if ((d == a) || (d == b)) if (d == a || d == b)
{ {
RCOpArg& Rnotd = (d == a) ? Rb : Ra; RCOpArg& Rnotd = (d == a) ? Rb : Ra;
if (!Rnotd.IsZero() || inst.OE) ADD(32, Rd, Rnotd);
{
ADD(32, Rd, Rnotd);
}
} }
else if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !inst.OE) else if (Ra.IsSimpleReg() && Rb.IsSimpleReg() && !inst.OE)
{ {
LEA(32, Rd, MRegSum(Ra.GetSimpleReg(), Rb.GetSimpleReg())); LEA(32, Rd, MRegSum(Ra.GetSimpleReg(), Rb.GetSimpleReg()));
} }
else if ((Ra.IsSimpleReg() || Rb.IsSimpleReg()) && (Ra.IsImm() || Rb.IsImm()) && !inst.OE)
{
RCOpArg& Rimm = Ra.IsImm() ? Ra : Rb;
RCOpArg& Rreg = Ra.IsImm() ? Rb : Ra;
if (Rimm.IsZero())
{
MOV(32, Rd, Rreg);
}
else
{
LEA(32, Rd, MDisp(Rreg.GetSimpleReg(), Rimm.SImm32()));
}
}
else if (Ra.IsImm() || Rb.IsImm())
{
RCOpArg& Rimm = Ra.IsImm() ? Ra : Rb;
RCOpArg& Rother = Ra.IsImm() ? Rb : Ra;
s32 imm = Rimm.SImm32();
if (imm >= -128 && imm <= 127)
{
MOV(32, Rd, Rother);
if (imm != 0 || inst.OE)
{
ADD(32, Rd, Rimm);
}
}
else
{
MOV(32, Rd, Rimm);
ADD(32, Rd, Rother);
}
}
else else
{ {
MOV(32, Rd, Ra); MOV(32, Rd, Ra);