JitArm64: Improve pipelining of lmw/stmw

The calculation of each address in lmw/stmw currently has a dependency
on the calculation of the previous address. By removing this dependency,
the host CPU should be able to pipeline the loads/stores better. The cost
we pay for this is up to one extra register and one extra MOV instruction
per guest instruction, but often nothing.

Making EmitBackpatchRoutine support using any register as the address
register would let us get rid of the MOV, but I consider that to be too
big of a task to do in one go at the same time as this.
This commit is contained in:
JosJuice 2022-11-20 14:56:35 +01:00
parent 82e87cf7b9
commit 701ba7cd43
1 changed files with 41 additions and 31 deletions

View File

@ -527,19 +527,21 @@ void JitArm64::lmw(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W2);
// MMU games make use of a >= d despite this being invalid according to the PEM.
// Because of this, make sure to not re-read rA after starting doing the loads.
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W0;
if (a)
{
if (gpr.IsImm(a))
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
else if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
else if (a < d && offset + (31 - d) * 4 < 0x1000)
a_is_addr_base_reg = true;
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{
MOVI2R(addr_reg, offset);
}
ARM64Reg addr_base_reg = a_is_addr_base_reg ? ARM64Reg::INVALID_REG : gpr.GetReg();
if (!a_is_addr_base_reg)
MOV(addr_base_reg, addr_reg);
// TODO: This doesn't handle rollback on DSI correctly
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_SIZE_32;
@ -548,12 +550,16 @@ void JitArm64::lmw(UGeckoInstruction inst)
gpr.BindToRegister(i, false, false);
ARM64Reg dest_reg = gpr.R(i);
if (a_is_addr_base_reg)
ADDI2R(addr_reg, gpr.R(a), offset + (i - d) * 4);
else if (i != d)
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
@ -562,14 +568,13 @@ void JitArm64::lmw(UGeckoInstruction inst)
gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i));
if (i != 31)
ADD(addr_reg, addr_reg, 4);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
}
void JitArm64::stmw(UGeckoInstruction inst)
@ -585,17 +590,19 @@ void JitArm64::stmw(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W2);
ARM64Reg addr_reg = ARM64Reg::W1;
if (a)
{
if (gpr.IsImm(a))
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
else if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
else if (offset + (31 - s) * 4 < 0x1000)
a_is_addr_base_reg = true;
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
}
else
{
MOVI2R(addr_reg, offset);
}
ARM64Reg addr_base_reg = a_is_addr_base_reg ? ARM64Reg::INVALID_REG : gpr.GetReg();
if (!a_is_addr_base_reg)
MOV(addr_base_reg, addr_reg);
// TODO: This doesn't handle rollback on DSI correctly
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_SIZE_32;
@ -603,24 +610,27 @@ void JitArm64::stmw(UGeckoInstruction inst)
{
ARM64Reg src_reg = gpr.R(i);
if (a_is_addr_base_reg)
ADDI2R(addr_reg, gpr.R(a), offset + (i - s) * 4);
else if (i != s)
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
regs_in_use[DecodeReg(addr_reg)] = 0;
if (!jo.fastmem_arena)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
if (i == 31)
regs_in_use[DecodeReg(addr_reg)] = 0;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
if (i != 31)
ADD(addr_reg, addr_reg, 4);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem_arena)
gpr.Unlock(ARM64Reg::W2);
if (!a_is_addr_base_reg)
gpr.Unlock(addr_base_reg);
}
void JitArm64::dcbx(UGeckoInstruction inst)