[AArch64] Use a register as a constant for the memory base.
Removes a /lot/ of redundant movk operations in fastmem loadstores. Improves performance of the povray bench by ~5%
This commit is contained in:
parent
c325c310d6
commit
dba579c52f
|
@ -362,6 +362,10 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
|
||||||
gpr.Start(js.gpa);
|
gpr.Start(js.gpa);
|
||||||
fpr.Start(js.fpa);
|
fpr.Start(js.fpa);
|
||||||
|
|
||||||
|
// Setup memory base register
|
||||||
|
u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base;
|
||||||
|
MOVI2R(X28, (u64)base);
|
||||||
|
|
||||||
if (!SConfig::GetInstance().bEnableDebugging)
|
if (!SConfig::GetInstance().bEnableDebugging)
|
||||||
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
|
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);
|
||||||
|
|
||||||
|
|
|
@ -40,106 +40,6 @@ static void DoBacktrace(uintptr_t access_address, SContext* ctx)
|
||||||
ERROR_LOG(DYNA_REC, "Full block: %s", pc_memory.c_str());
|
ERROR_LOG(DYNA_REC, "Full block: %s", pc_memory.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
|
|
||||||
{
|
|
||||||
u32 inst = *(u32*)ptr;
|
|
||||||
u32 prev_inst = *(u32*)(ptr - 4);
|
|
||||||
u32 next_inst = *(u32*)(ptr + 4);
|
|
||||||
|
|
||||||
u8 op = (inst >> 22) & 0xFF;
|
|
||||||
u8 size = (inst >> 30) & 0x3;
|
|
||||||
|
|
||||||
if (size == 0) // 8-bit
|
|
||||||
*flags |= BackPatchInfo::FLAG_SIZE_8;
|
|
||||||
else if (size == 1) // 16-bit
|
|
||||||
*flags |= BackPatchInfo::FLAG_SIZE_16;
|
|
||||||
else if (size == 2) // 32-bit
|
|
||||||
*flags |= BackPatchInfo::FLAG_SIZE_32;
|
|
||||||
else if (size == 3) // 64-bit
|
|
||||||
*flags |= BackPatchInfo::FLAG_SIZE_F64;
|
|
||||||
|
|
||||||
if (op == 0xF5) // NEON LDR
|
|
||||||
{
|
|
||||||
if (size == 2) // 32-bit float
|
|
||||||
{
|
|
||||||
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
|
|
||||||
*flags |= BackPatchInfo::FLAG_SIZE_F32;
|
|
||||||
|
|
||||||
// Loads directly in to the target register
|
|
||||||
// Duplicates bottom result in to top register
|
|
||||||
*reg = (ARM64Reg)(inst & 0x1F);
|
|
||||||
}
|
|
||||||
else // 64-bit float
|
|
||||||
{
|
|
||||||
u32 ldr_reg = inst & 0x1F;
|
|
||||||
|
|
||||||
if (ldr_reg)
|
|
||||||
{
|
|
||||||
// Loads directly in to the target register
|
|
||||||
// No need to dump the flag in to flags here
|
|
||||||
// The slowmem path always first returns in Q0
|
|
||||||
// then moves to the destination register
|
|
||||||
*reg = (ARM64Reg)(ldr_reg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Real register is in the INS instruction
|
|
||||||
u32 ins_inst = *(u32*)(ptr + 8);
|
|
||||||
*reg = (ARM64Reg)(ins_inst & 0x1F);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*flags |= BackPatchInfo::FLAG_LOAD;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (op == 0xF4) // NEON STR
|
|
||||||
{
|
|
||||||
if (size == 2) // 32-bit float
|
|
||||||
{
|
|
||||||
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
|
|
||||||
*flags |= BackPatchInfo::FLAG_SIZE_F32;
|
|
||||||
|
|
||||||
// Real register is in the first FCVT conversion instruction
|
|
||||||
u32 fcvt_inst = *(u32*)(ptr - 8);
|
|
||||||
*reg = (ARM64Reg)((fcvt_inst >> 5) & 0x1F);
|
|
||||||
}
|
|
||||||
else // 64-bit float
|
|
||||||
{
|
|
||||||
// Real register is in the previous REV64 instruction
|
|
||||||
*reg = (ARM64Reg)((prev_inst >> 5) & 0x1F);
|
|
||||||
}
|
|
||||||
*flags |= BackPatchInfo::FLAG_STORE;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (op == 0xE5) // Load
|
|
||||||
{
|
|
||||||
*flags |= BackPatchInfo::FLAG_LOAD;
|
|
||||||
*reg = (ARM64Reg)(inst & 0x1F);
|
|
||||||
if ((next_inst & 0x7FFFF000) == 0x5AC00000) // REV
|
|
||||||
{
|
|
||||||
u32 sxth_inst = *(u32*)(ptr + 8);
|
|
||||||
if ((sxth_inst & 0x7F800000) == 0x13000000) // SXTH
|
|
||||||
*flags |= BackPatchInfo::FLAG_EXTEND;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*flags |= BackPatchInfo::FLAG_REVERSE;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (op == 0xE4) // Store
|
|
||||||
{
|
|
||||||
*flags |= BackPatchInfo::FLAG_STORE;
|
|
||||||
|
|
||||||
if (size == 0) // 8-bit
|
|
||||||
*reg = (ARM64Reg)(inst & 0x1F);
|
|
||||||
else // 16-bit/32-bit register is in previous REV instruction
|
|
||||||
*reg = (ARM64Reg)((prev_inst >> 5) & 0x1F);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||||
ARM64Reg RS, ARM64Reg addr,
|
ARM64Reg RS, ARM64Reg addr,
|
||||||
BitSet32 gprs_to_push, BitSet32 fprs_to_push)
|
BitSet32 gprs_to_push, BitSet32 fprs_to_push)
|
||||||
|
@ -149,8 +49,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||||
|
|
||||||
if (fastmem)
|
if (fastmem)
|
||||||
{
|
{
|
||||||
u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base;
|
|
||||||
MOVK(addr, ((u64)base >> 32) & 0xFFFF, SHIFT_32);
|
|
||||||
|
|
||||||
if (flags & BackPatchInfo::FLAG_STORE &&
|
if (flags & BackPatchInfo::FLAG_STORE &&
|
||||||
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
|
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
|
||||||
|
@ -159,12 +57,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||||
{
|
{
|
||||||
m_float_emit.FCVT(32, 64, D0, RS);
|
m_float_emit.FCVT(32, 64, D0, RS);
|
||||||
m_float_emit.REV32(8, D0, D0);
|
m_float_emit.REV32(8, D0, D0);
|
||||||
m_float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
|
m_float_emit.STR(32, D0, X28, addr);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_float_emit.REV64(8, Q0, RS);
|
m_float_emit.REV64(8, Q0, RS);
|
||||||
m_float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
m_float_emit.STR(64, Q0, X28, addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_LOAD &&
|
else if (flags & BackPatchInfo::FLAG_LOAD &&
|
||||||
|
@ -172,7 +70,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||||
{
|
{
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||||
{
|
{
|
||||||
m_float_emit.LD1R(32, EncodeRegToDouble(RS), addr);
|
m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr);
|
||||||
|
m_float_emit.INS(32, RS, 1, RS, 0);
|
||||||
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||||
m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||||
}
|
}
|
||||||
|
@ -180,12 +79,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||||
{
|
{
|
||||||
if (flags & BackPatchInfo::FLAG_ONLY_LOWER)
|
if (flags & BackPatchInfo::FLAG_ONLY_LOWER)
|
||||||
{
|
{
|
||||||
m_float_emit.LDR(64, INDEX_UNSIGNED, EncodeRegToDouble(RS), addr, 0);
|
m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr);
|
||||||
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
m_float_emit.LDR(64, Q0, X28, addr);
|
||||||
m_float_emit.REV64(8, D0, D0);
|
m_float_emit.REV64(8, D0, D0);
|
||||||
m_float_emit.INS(64, RS, 0, Q0, 0);
|
m_float_emit.INS(64, RS, 0, Q0, 0);
|
||||||
}
|
}
|
||||||
|
@ -200,20 +99,20 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||||
REV16(temp, RS);
|
REV16(temp, RS);
|
||||||
|
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||||
STR(INDEX_UNSIGNED, temp, addr, 0);
|
STR(temp, X28, addr);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||||
STRH(INDEX_UNSIGNED, temp, addr, 0);
|
STRH(temp, X28, addr);
|
||||||
else
|
else
|
||||||
STRB(INDEX_UNSIGNED, RS, addr, 0);
|
STRB(RS, X28, addr);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||||
LDR(INDEX_UNSIGNED, RS, addr, 0);
|
LDR(RS, X28, addr);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||||
LDRH(INDEX_UNSIGNED, RS, addr, 0);
|
LDRH(RS, X28, addr);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
||||||
LDRB(INDEX_UNSIGNED, RS, addr, 0);
|
LDRB(RS, X28, addr);
|
||||||
|
|
||||||
if (!(flags & BackPatchInfo::FLAG_REVERSE))
|
if (!(flags & BackPatchInfo::FLAG_REVERSE))
|
||||||
{
|
{
|
||||||
|
@ -369,17 +268,6 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
ARM64Reg reg = INVALID_REG;
|
|
||||||
u32 flags = 0;
|
|
||||||
|
|
||||||
if (!DisasmLoadStore((const u8*)ctx->CTX_PC, &flags, ®))
|
|
||||||
{
|
|
||||||
ERROR_LOG(DYNA_REC, "Error disassembling address 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC));
|
|
||||||
|
|
||||||
DoBacktrace(access_address, ctx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto slow_handler_iter = m_fault_to_handler.upper_bound((const u8*)ctx->CTX_PC);
|
auto slow_handler_iter = m_fault_to_handler.upper_bound((const u8*)ctx->CTX_PC);
|
||||||
slow_handler_iter--;
|
slow_handler_iter--;
|
||||||
|
|
||||||
|
@ -403,12 +291,5 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
||||||
|
|
||||||
emitter.FlushIcache();
|
emitter.FlushIcache();
|
||||||
ctx->CTX_PC = (u64)slow_handler_iter->first;
|
ctx->CTX_PC = (u64)slow_handler_iter->first;
|
||||||
|
|
||||||
// Wipe the top bits of the addr_register
|
|
||||||
if (flags & BackPatchInfo::FLAG_STORE &&
|
|
||||||
!(flags & BackPatchInfo::FLAG_SIZE_F64))
|
|
||||||
ctx->CTX_REG(1) &= 0xFFFFFFFFUll;
|
|
||||||
else
|
|
||||||
ctx->CTX_REG(0) &= 0xFFFFFFFFUll;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -222,7 +222,7 @@ void Arm64GPRCache::GetAllocationOrder()
|
||||||
const std::vector<ARM64Reg> allocation_order =
|
const std::vector<ARM64Reg> allocation_order =
|
||||||
{
|
{
|
||||||
// Callee saved
|
// Callee saved
|
||||||
W28, W27, W26, W25, W24, W23, W22, W21, W20,
|
W27, W26, W25, W24, W23, W22, W21, W20,
|
||||||
W19,
|
W19,
|
||||||
|
|
||||||
// Caller saved
|
// Caller saved
|
||||||
|
|
Loading…
Reference in New Issue