JIT: merge paired timebase reads where possible

Combined with the previous patch, ~1% faster overall on F-Zero GX.
This commit is contained in:
Fiora 2014-08-20 00:42:04 -07:00
parent 816d056657
commit 6875d911f1
1 changed files with 26 additions and 4 deletions

View File

@ -182,11 +182,33 @@ void Jit64::mfspr(UGeckoInstruction inst)
if (offset > 0)
ADD(64, R(RAX), Imm32(offset));
MOV(64, M(&TL), R(RAX));
// Two calls of TU/TL next to each other are extremely common in typical usage, so merge them
// if we can.
u32 nextIndex = (js.next_inst.SPRU << 5) | (js.next_inst.SPRL & 0x1F);
// Be careful; the actual opcode is for mftb (371), not mfspr (339)
if (js.next_inst.OPCD == 31 && js.next_inst.SUBOP10 == 371 && (nextIndex == SPR_TU || nextIndex == SPR_TL))
{
int n = js.next_inst.RD;
js.downcountAmount++;
js.skipnext = true;
gpr.Lock(d, n);
if (iIndex == SPR_TL)
MOV(32, gpr.R(d), R(EAX));
if (nextIndex == SPR_TL)
MOV(32, gpr.R(n), R(EAX));
SHR(64, R(RAX), Imm8(32));
if (iIndex == SPR_TU)
MOV(32, gpr.R(d), R(EAX));
if (nextIndex == SPR_TU)
MOV(32, gpr.R(n), R(EAX));
}
else
{
gpr.Lock(d);
gpr.BindToRegister(d, false);
if (iIndex == SPR_TU)
SHR(64, R(RAX), Imm8(32));
MOV(32, gpr.R(d), R(EAX));
}
break;
}
case SPR_WPAR: