[AArch64] Optimize gatherpipe stores
This can be even more optimized in the future when the register cache supports constant caching.
This commit is contained in:
parent
8ae12d8005
commit
46c3a11d01
|
@ -360,10 +360,17 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
|
||||
ARM64Reg XA = EncodeRegTo64(addr_reg);
|
||||
|
||||
if (is_immediate)
|
||||
if (is_immediate && !(jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)))
|
||||
{
|
||||
MOVI2R(XA, imm_addr);
|
||||
|
||||
if (update)
|
||||
if (update)
|
||||
{
|
||||
gpr.BindToRegister(a, false);
|
||||
MOV(gpr.R(a), addr_reg);
|
||||
}
|
||||
}
|
||||
else if (!is_immediate && update)
|
||||
{
|
||||
gpr.BindToRegister(a, false);
|
||||
MOV(gpr.R(a), addr_reg);
|
||||
|
@ -386,25 +393,44 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
else
|
||||
accessSize = 32;
|
||||
|
||||
MOVI2R(X30, (u64)&GPFifo::m_gatherPipeCount);
|
||||
MOVI2R(X1, (u64)GPFifo::m_gatherPipe);
|
||||
LDR(INDEX_UNSIGNED, W0, X30, 0);
|
||||
ADD(X1, X1, X0);
|
||||
u64 base_ptr = std::min((u64)&GPFifo::m_gatherPipeCount, (u64)&GPFifo::m_gatherPipe);
|
||||
u32 count_off = (u64)&GPFifo::m_gatherPipeCount - base_ptr;
|
||||
u32 pipe_off = (u64)&GPFifo::m_gatherPipe - base_ptr;
|
||||
|
||||
MOVI2R(X30, base_ptr);
|
||||
|
||||
if (pipe_off)
|
||||
ADD(X1, X30, pipe_off);
|
||||
|
||||
LDR(INDEX_UNSIGNED, W0, X30, count_off);
|
||||
if (accessSize == 64)
|
||||
{
|
||||
m_float_emit.REV64(8, Q0, V0);
|
||||
m_float_emit.STR(64, INDEX_UNSIGNED, Q0, X1, 0);
|
||||
if (pipe_off)
|
||||
m_float_emit.STR(64, Q0, X1, ArithOption(X0));
|
||||
else
|
||||
m_float_emit.STR(64, Q0, X30, ArithOption(X0));
|
||||
}
|
||||
else if (accessSize == 32)
|
||||
{
|
||||
m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0));
|
||||
m_float_emit.REV32(8, D0, D0);
|
||||
m_float_emit.STR(32, INDEX_UNSIGNED, D0, X1, 0);
|
||||
if (pipe_off)
|
||||
m_float_emit.STR(32, D0, X1, ArithOption(X0));
|
||||
else
|
||||
m_float_emit.STR(32, D0, X30, ArithOption(X0));
|
||||
|
||||
}
|
||||
ADD(W0, W0, accessSize >> 3);
|
||||
STR(INDEX_UNSIGNED, W0, X30, 0);
|
||||
STR(INDEX_UNSIGNED, W0, X30, count_off);
|
||||
jit->js.fifoBytesThisBlock += accessSize >> 3;
|
||||
|
||||
if (update)
|
||||
{
|
||||
// Chance of this happening is fairly low, but support it
|
||||
gpr.BindToRegister(a, false);
|
||||
MOVI2R(gpr.R(a), imm_addr);
|
||||
}
|
||||
}
|
||||
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue