[AArch64] Use a register as a constant for the memory base.

Removes a /lot/ of redundant movk operations in fastmem loadstores.
Improves performance of the povray bench by ~5%
This commit is contained in:
Ryan Houdek 2015-08-22 00:57:19 -05:00
parent c325c310d6
commit dba579c52f
3 changed files with 17 additions and 132 deletions

View File

@ -362,6 +362,10 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
gpr.Start(js.gpa); gpr.Start(js.gpa);
fpr.Start(js.fpa); fpr.Start(js.fpa);
// Setup memory base register
u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base;
MOVI2R(X28, (u64)base);
if (!SConfig::GetInstance().bEnableDebugging) if (!SConfig::GetInstance().bEnableDebugging)
js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address);

View File

@ -40,106 +40,6 @@ static void DoBacktrace(uintptr_t access_address, SContext* ctx)
ERROR_LOG(DYNA_REC, "Full block: %s", pc_memory.c_str()); ERROR_LOG(DYNA_REC, "Full block: %s", pc_memory.c_str());
} }
bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
{
u32 inst = *(u32*)ptr;
u32 prev_inst = *(u32*)(ptr - 4);
u32 next_inst = *(u32*)(ptr + 4);
u8 op = (inst >> 22) & 0xFF;
u8 size = (inst >> 30) & 0x3;
if (size == 0) // 8-bit
*flags |= BackPatchInfo::FLAG_SIZE_8;
else if (size == 1) // 16-bit
*flags |= BackPatchInfo::FLAG_SIZE_16;
else if (size == 2) // 32-bit
*flags |= BackPatchInfo::FLAG_SIZE_32;
else if (size == 3) // 64-bit
*flags |= BackPatchInfo::FLAG_SIZE_F64;
if (op == 0xF5) // NEON LDR
{
if (size == 2) // 32-bit float
{
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
*flags |= BackPatchInfo::FLAG_SIZE_F32;
// Loads directly in to the target register
// Duplicates bottom result in to top register
*reg = (ARM64Reg)(inst & 0x1F);
}
else // 64-bit float
{
u32 ldr_reg = inst & 0x1F;
if (ldr_reg)
{
// Loads directly in to the target register
// No need to dump the flag in to flags here
// The slowmem path always first returns in Q0
// then moves to the destination register
*reg = (ARM64Reg)(ldr_reg);
}
else
{
// Real register is in the INS instruction
u32 ins_inst = *(u32*)(ptr + 8);
*reg = (ARM64Reg)(ins_inst & 0x1F);
}
}
*flags |= BackPatchInfo::FLAG_LOAD;
return true;
}
else if (op == 0xF4) // NEON STR
{
if (size == 2) // 32-bit float
{
*flags &= ~BackPatchInfo::FLAG_SIZE_32;
*flags |= BackPatchInfo::FLAG_SIZE_F32;
// Real register is in the first FCVT conversion instruction
u32 fcvt_inst = *(u32*)(ptr - 8);
*reg = (ARM64Reg)((fcvt_inst >> 5) & 0x1F);
}
else // 64-bit float
{
// Real register is in the previous REV64 instruction
*reg = (ARM64Reg)((prev_inst >> 5) & 0x1F);
}
*flags |= BackPatchInfo::FLAG_STORE;
return true;
}
else if (op == 0xE5) // Load
{
*flags |= BackPatchInfo::FLAG_LOAD;
*reg = (ARM64Reg)(inst & 0x1F);
if ((next_inst & 0x7FFFF000) == 0x5AC00000) // REV
{
u32 sxth_inst = *(u32*)(ptr + 8);
if ((sxth_inst & 0x7F800000) == 0x13000000) // SXTH
*flags |= BackPatchInfo::FLAG_EXTEND;
}
else
{
*flags |= BackPatchInfo::FLAG_REVERSE;
}
return true;
}
else if (op == 0xE4) // Store
{
*flags |= BackPatchInfo::FLAG_STORE;
if (size == 0) // 8-bit
*reg = (ARM64Reg)(inst & 0x1F);
else // 16-bit/32-bit register is in previous REV instruction
*reg = (ARM64Reg)((prev_inst >> 5) & 0x1F);
return true;
}
return false;
}
void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
ARM64Reg RS, ARM64Reg addr, ARM64Reg RS, ARM64Reg addr,
BitSet32 gprs_to_push, BitSet32 fprs_to_push) BitSet32 gprs_to_push, BitSet32 fprs_to_push)
@ -149,8 +49,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
if (fastmem) if (fastmem)
{ {
u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base;
MOVK(addr, ((u64)base >> 32) & 0xFFFF, SHIFT_32);
if (flags & BackPatchInfo::FLAG_STORE && if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
@ -159,12 +57,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
{ {
m_float_emit.FCVT(32, 64, D0, RS); m_float_emit.FCVT(32, 64, D0, RS);
m_float_emit.REV32(8, D0, D0); m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0); m_float_emit.STR(32, D0, X28, addr);
} }
else else
{ {
m_float_emit.REV64(8, Q0, RS); m_float_emit.REV64(8, Q0, RS);
m_float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0); m_float_emit.STR(64, Q0, X28, addr);
} }
} }
else if (flags & BackPatchInfo::FLAG_LOAD && else if (flags & BackPatchInfo::FLAG_LOAD &&
@ -172,7 +70,8 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
{ {
if (flags & BackPatchInfo::FLAG_SIZE_F32) if (flags & BackPatchInfo::FLAG_SIZE_F32)
{ {
m_float_emit.LD1R(32, EncodeRegToDouble(RS), addr); m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr);
m_float_emit.INS(32, RS, 1, RS, 0);
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
} }
@ -180,12 +79,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
{ {
if (flags & BackPatchInfo::FLAG_ONLY_LOWER) if (flags & BackPatchInfo::FLAG_ONLY_LOWER)
{ {
m_float_emit.LDR(64, INDEX_UNSIGNED, EncodeRegToDouble(RS), addr, 0); m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr);
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
} }
else else
{ {
m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0); m_float_emit.LDR(64, Q0, X28, addr);
m_float_emit.REV64(8, D0, D0); m_float_emit.REV64(8, D0, D0);
m_float_emit.INS(64, RS, 0, Q0, 0); m_float_emit.INS(64, RS, 0, Q0, 0);
} }
@ -200,20 +99,20 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
REV16(temp, RS); REV16(temp, RS);
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
STR(INDEX_UNSIGNED, temp, addr, 0); STR(temp, X28, addr);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
STRH(INDEX_UNSIGNED, temp, addr, 0); STRH(temp, X28, addr);
else else
STRB(INDEX_UNSIGNED, RS, addr, 0); STRB(RS, X28, addr);
} }
else else
{ {
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
LDR(INDEX_UNSIGNED, RS, addr, 0); LDR(RS, X28, addr);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
LDRH(INDEX_UNSIGNED, RS, addr, 0); LDRH(RS, X28, addr);
else if (flags & BackPatchInfo::FLAG_SIZE_8) else if (flags & BackPatchInfo::FLAG_SIZE_8)
LDRB(INDEX_UNSIGNED, RS, addr, 0); LDRB(RS, X28, addr);
if (!(flags & BackPatchInfo::FLAG_REVERSE)) if (!(flags & BackPatchInfo::FLAG_REVERSE))
{ {
@ -369,17 +268,6 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
return false; return false;
} }
ARM64Reg reg = INVALID_REG;
u32 flags = 0;
if (!DisasmLoadStore((const u8*)ctx->CTX_PC, &flags, &reg))
{
ERROR_LOG(DYNA_REC, "Error disassembling address 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC));
DoBacktrace(access_address, ctx);
return false;
}
auto slow_handler_iter = m_fault_to_handler.upper_bound((const u8*)ctx->CTX_PC); auto slow_handler_iter = m_fault_to_handler.upper_bound((const u8*)ctx->CTX_PC);
slow_handler_iter--; slow_handler_iter--;
@ -403,12 +291,5 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
emitter.FlushIcache(); emitter.FlushIcache();
ctx->CTX_PC = (u64)slow_handler_iter->first; ctx->CTX_PC = (u64)slow_handler_iter->first;
// Wipe the top bits of the addr_register
if (flags & BackPatchInfo::FLAG_STORE &&
!(flags & BackPatchInfo::FLAG_SIZE_F64))
ctx->CTX_REG(1) &= 0xFFFFFFFFUll;
else
ctx->CTX_REG(0) &= 0xFFFFFFFFUll;
return true; return true;
} }

View File

@ -222,7 +222,7 @@ void Arm64GPRCache::GetAllocationOrder()
const std::vector<ARM64Reg> allocation_order = const std::vector<ARM64Reg> allocation_order =
{ {
// Callee saved // Callee saved
W28, W27, W26, W25, W24, W23, W22, W21, W20, W27, W26, W25, W24, W23, W22, W21, W20,
W19, W19,
// Caller saved // Caller saved