JitArm64: Optimize mfsrin/mtsrin address calculations

This does require another register, but we skip having to use shifted
ADD, which takes two cycles on some CPUs, and we gain instruction-level
parallelism.
This commit is contained in:
JosJuice 2023-08-19 21:37:06 +02:00
parent ed7894924c
commit d34d3bd513
1 changed files with 11 additions and 9 deletions

View File

@ -183,13 +183,14 @@ void JitArm64::mfsrin(UGeckoInstruction inst)
u32 b = inst.RB, d = inst.RD;
gpr.BindToRegister(d, d == b);
ARM64Reg index = gpr.GetReg();
ARM64Reg index64 = EncodeRegTo64(index);
ARM64Reg RB = gpr.R(b);
ARM64Reg RD = gpr.R(d);
ARM64Reg index = gpr.GetReg();
ARM64Reg addr = EncodeRegTo64(RD);
UBFM(index, RB, 28, 31);
ADD(index64, PPC_REG, index64, ArithOption(index64, ShiftType::LSL, 2));
LDR(IndexType::Unsigned, gpr.R(d), index64, PPCSTATE_OFF_SR(0));
ADDI2R(addr, PPC_REG, PPCSTATE_OFF_SR(0), addr);
LDR(RD, addr, ArithOption(EncodeRegTo64(index), true));
gpr.Unlock(index);
}
@ -202,15 +203,16 @@ void JitArm64::mtsrin(UGeckoInstruction inst)
u32 b = inst.RB, d = inst.RD;
gpr.BindToRegister(d, d == b);
ARM64Reg index = gpr.GetReg();
ARM64Reg index64 = EncodeRegTo64(index);
ARM64Reg RB = gpr.R(b);
ARM64Reg RD = gpr.R(d);
ARM64Reg index = gpr.GetReg();
ARM64Reg addr = gpr.GetReg();
UBFM(index, RB, 28, 31);
ADD(index64, PPC_REG, index64, ArithOption(index64, ShiftType::LSL, 2));
STR(IndexType::Unsigned, gpr.R(d), index64, PPCSTATE_OFF_SR(0));
ADDI2R(addr, PPC_REG, PPCSTATE_OFF_SR(0), addr);
STR(RD, addr, ArithOption(EncodeRegTo64(index), true));
gpr.Unlock(index);
gpr.Unlock(index, addr);
}
void JitArm64::twx(UGeckoInstruction inst)