Merge pull request #2886 from Sonicadvance1/aarch64_faster_lfd
[AArch64] Optimize lfd instructions if possible.
This commit is contained in:
commit
5f628749ff
|
@ -71,9 +71,22 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
|
|||
}
|
||||
else // 64-bit float
|
||||
{
|
||||
// Real register is in the INS instruction
|
||||
u32 ins_inst = *(u32*)(ptr + 8);
|
||||
*reg = (ARM64Reg)(ins_inst & 0x1F);
|
||||
u32 ldr_reg = inst & 0x1F;
|
||||
|
||||
if (ldr_reg)
|
||||
{
|
||||
// Loads directly in to the target register
|
||||
// No need to dump the flag in to flags here
|
||||
// The slowmem path always first returns in Q0
|
||||
// then moves to the destination register
|
||||
*reg = (ARM64Reg)(ldr_reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Real register is in the INS instruction
|
||||
u32 ins_inst = *(u32*)(ptr + 8);
|
||||
*reg = (ARM64Reg)(ins_inst & 0x1F);
|
||||
}
|
||||
}
|
||||
*flags |= BackPatchInfo::FLAG_LOAD;
|
||||
return true;
|
||||
|
@ -165,9 +178,17 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
|||
}
|
||||
else
|
||||
{
|
||||
m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
m_float_emit.REV64(8, D0, D0);
|
||||
m_float_emit.INS(64, RS, 0, Q0, 0);
|
||||
if (flags & BackPatchInfo::FLAG_ONLY_LOWER)
|
||||
{
|
||||
m_float_emit.LDR(64, INDEX_UNSIGNED, EncodeRegToDouble(RS), addr, 0);
|
||||
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
m_float_emit.REV64(8, D0, D0);
|
||||
m_float_emit.INS(64, RS, 0, Q0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||
|
@ -217,7 +238,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
|||
handler.addr_reg = addr;
|
||||
handler.gprs = gprs_to_push;
|
||||
handler.fprs = fprs_to_push;
|
||||
handler.flags = flags;
|
||||
handler.flags = flags & ~BackPatchInfo::FLAG_ONLY_LOWER;
|
||||
|
||||
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_start];
|
||||
auto handler_loc_iter = m_handler_to_loc.find(handler);
|
||||
|
|
|
@ -71,12 +71,19 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
u32 imm_addr = 0;
|
||||
bool is_immediate = false;
|
||||
|
||||
// 64 bit loads only load PSR0
|
||||
fpr.BindToRegister(inst.FD, flags & BackPatchInfo::FLAG_SIZE_F64, flags & BackPatchInfo::FLAG_SIZE_F64);
|
||||
bool only_lower = !!(flags & BackPatchInfo::FLAG_SIZE_F64);
|
||||
|
||||
ARM64Reg VD = fpr.R(inst.FD, flags & BackPatchInfo::FLAG_SIZE_F64);
|
||||
fpr.BindToRegister(inst.FD, false, only_lower);
|
||||
|
||||
ARM64Reg VD = fpr.R(inst.FD, only_lower);
|
||||
ARM64Reg addr_reg = W0;
|
||||
|
||||
if (!fpr.IsLower(inst.FD))
|
||||
only_lower = false;
|
||||
|
||||
if (only_lower)
|
||||
flags |= BackPatchInfo::FLAG_ONLY_LOWER;
|
||||
|
||||
gpr.Lock(W0, W30);
|
||||
fpr.Lock(Q0);
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ struct BackPatchInfo
|
|||
FLAG_SIZE_F64 = (1 << 6),
|
||||
FLAG_REVERSE = (1 << 7),
|
||||
FLAG_EXTEND = (1 << 8),
|
||||
FLAG_ONLY_LOWER = (1 << 9),
|
||||
};
|
||||
|
||||
static u32 GetFlagSize(u32 flags)
|
||||
|
|
Loading…
Reference in New Issue