Merge pull request #2814 from Sonicadvance1/aarch64_faster_fastmem

[AArch64] Banish slowmem operations to farcode.
This commit is contained in:
Ryan Houdek 2015-08-14 12:03:46 -05:00
commit 3bc5505272
8 changed files with 216 additions and 437 deletions

View File

@ -155,6 +155,8 @@ public:
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; } const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
bool operator==(BitSet other) const { return m_val == other.m_val; } bool operator==(BitSet other) const { return m_val == other.m_val; }
bool operator!=(BitSet other) const { return m_val != other.m_val; } bool operator!=(BitSet other) const { return m_val != other.m_val; }
bool operator<(BitSet other) const { return m_val < other.m_val; }
bool operator>(BitSet other) const { return m_val > other.m_val; }
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); } BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); } BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); } BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }

View File

@ -22,9 +22,19 @@ private:
protected: protected:
u8 *region; u8 *region;
size_t region_size; size_t region_size;
size_t parent_region_size;
bool m_has_child;
bool m_is_child;
CodeBlock* m_child;
public: public:
CodeBlock() : region(nullptr), region_size(0) {} CodeBlock()
: region(nullptr), region_size(0), parent_region_size(0),
m_has_child(false), m_is_child(false), m_child(nullptr)
{
}
virtual ~CodeBlock() { if (region) FreeCodeSpace(); } virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
// Call this before you generate any code. // Call this before you generate any code.
@ -49,6 +59,12 @@ public:
FreeMemoryPages(region, region_size); FreeMemoryPages(region, region_size);
region = nullptr; region = nullptr;
region_size = 0; region_size = 0;
parent_region_size = 0;
if (m_has_child)
{
m_child->region = nullptr;
m_child->region_size = 0;
}
} }
bool IsInSpace(u8 *ptr) const bool IsInSpace(u8 *ptr) const
@ -70,7 +86,7 @@ public:
size_t GetSpaceLeft() const size_t GetSpaceLeft() const
{ {
return region_size - (T::GetCodePtr() - region); return (m_has_child ? parent_region_size : region_size) - (T::GetCodePtr() - region);
} }
bool IsAlmostFull() const bool IsAlmostFull() const
@ -78,5 +94,16 @@ public:
// This should be bigger than the biggest block ever. // This should be bigger than the biggest block ever.
return GetSpaceLeft() < 0x10000; return GetSpaceLeft() < 0x10000;
} }
void AddChildCodeSpace(CodeBlock* child, size_t size)
{
_assert_msg_(DYNA_REC, !m_has_child, "Already have a child! Can't have another!");
m_child = child;
m_has_child = true;
m_child->m_is_child = true;
u8* child_region = region + region_size - size;
m_child->region = child_region;
m_child->region_size = size;
m_child->ResetCodePtr();
parent_region_size = region_size - size;
}
}; };

View File

@ -14,10 +14,13 @@
using namespace Arm64Gen; using namespace Arm64Gen;
static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16;
void JitArm64::Init() void JitArm64::Init()
{ {
AllocCodeSpace(CODE_SIZE); size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : AARCH64_FARCODE_SIZE;
farcode.Init(SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE); AllocCodeSpace(CODE_SIZE + child_code_size);
AddChildCodeSpace(&farcode, child_code_size);
jo.enableBlocklink = true; jo.enableBlocklink = true;
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
UpdateMemoryOptions(); UpdateMemoryOptions();
@ -30,22 +33,23 @@ void JitArm64::Init()
code_block.m_stats = &js.st; code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa; code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa; code_block.m_fpa = &js.fpa;
InitBackpatch();
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
} }
void JitArm64::ClearCache() void JitArm64::ClearCache()
{ {
m_fault_to_handler.clear();
m_handler_to_loc.clear();
blocks.Clear();
ClearCodeSpace(); ClearCodeSpace();
farcode.ClearCodeSpace(); farcode.ClearCodeSpace();
blocks.Clear();
UpdateMemoryOptions(); UpdateMemoryOptions();
} }
void JitArm64::Shutdown() void JitArm64::Shutdown()
{ {
FreeCodeSpace(); FreeCodeSpace();
farcode.Shutdown();
blocks.Shutdown(); blocks.Shutdown();
asm_routines.Shutdown(); asm_routines.Shutdown();
} }

View File

@ -18,15 +18,6 @@
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) #define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken
// exception branches.
class FarCodeCacheArm64 : public Arm64Gen::ARM64CodeBlock
{
public:
void Init(int size) { AllocCodeSpace(size); }
void Shutdown() { FreeCodeSpace(); }
};
// Some asserts to make sure we will be able to load everything // Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
@ -185,6 +176,40 @@ public:
void psq_st(UGeckoInstruction inst); void psq_st(UGeckoInstruction inst);
private: private:
struct SlowmemHandler
{
ARM64Reg dest_reg;
ARM64Reg addr_reg;
BitSet32 gprs;
BitSet32 fprs;
u32 flags;
bool operator< (const SlowmemHandler& rhs) const
{
if (dest_reg < rhs.dest_reg) return true;
if (dest_reg > rhs.dest_reg) return false;
if (addr_reg < rhs.addr_reg) return true;
if (addr_reg > rhs.addr_reg) return false;
if (gprs < rhs.gprs) return true;
if (gprs > rhs.gprs) return false;
if (fprs < rhs.fprs) return true;
if (fprs > rhs.fprs) return false;
if (flags < rhs.flags) return true;
if (flags > rhs.flags) return false;
return false;
}
};
struct FastmemArea
{
u32 length;
const u8* slowmem_code;
};
// <Fastmem fault location, slowmem handler location>
std::map<const u8*, FastmemArea> m_fault_to_handler;
std::map<SlowmemHandler, const u8*> m_handler_to_loc;
Arm64GPRCache gpr; Arm64GPRCache gpr;
Arm64FPRCache fpr; Arm64FPRCache fpr;
@ -195,7 +220,7 @@ private:
ARM64FloatEmitter m_float_emit; ARM64FloatEmitter m_float_emit;
FarCodeCacheArm64 farcode; Arm64Gen::ARM64CodeBlock farcode;
u8* nearcode; // Backed up when we switch to far code. u8* nearcode; // Backed up when we switch to far code.
// Simple functions to switch between near and far code emitting // Simple functions to switch between near and far code emitting
@ -214,13 +239,11 @@ private:
// Dump a memory range of code // Dump a memory range of code
void DumpCode(const u8* start, const u8* end); void DumpCode(const u8* start, const u8* end);
// The key is the backpatch flags
std::map<u32, BackPatchInfo> m_backpatch_info;
// Backpatching routines // Backpatching routines
bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg); bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg);
void InitBackpatch(); void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr); Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr,
BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0));
// Loadstore routines // Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset); void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);

View File

@ -127,10 +127,12 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
return false; return false;
} }
u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARM64Reg RS, ARM64Reg addr) void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
ARM64Reg RS, ARM64Reg addr,
BitSet32 gprs_to_push, BitSet32 fprs_to_push)
{ {
u32 trouble_offset = 0; bool in_far_code = false;
const u8* code_base = emit->GetCodePtr(); const u8* fastmem_start = GetCodePtr();
if (fastmem) if (fastmem)
{ {
@ -140,172 +142,191 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
if (flags & BackPatchInfo::FLAG_STORE && if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32) if (flags & BackPatchInfo::FLAG_SIZE_F32)
{ {
float_emit.FCVT(32, 64, D0, RS); m_float_emit.FCVT(32, 64, D0, RS);
float_emit.REV32(8, D0, D0); m_float_emit.REV32(8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4; m_float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
} }
else else
{ {
float_emit.REV64(8, Q0, RS); m_float_emit.REV64(8, Q0, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4; m_float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0);
float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0);
} }
} }
else if (flags & BackPatchInfo::FLAG_LOAD && else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_F32) if (flags & BackPatchInfo::FLAG_SIZE_F32)
{ {
float_emit.LD1R(32, EncodeRegToDouble(RS), addr); m_float_emit.LD1R(32, EncodeRegToDouble(RS), addr);
float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
} }
else else
{ {
float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0); m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
float_emit.REV64(8, D0, D0); m_float_emit.REV64(8, D0, D0);
float_emit.INS(64, RS, 0, Q0, 0); m_float_emit.INS(64, RS, 0, Q0, 0);
} }
} }
else if (flags & BackPatchInfo::FLAG_STORE) else if (flags & BackPatchInfo::FLAG_STORE)
{ {
ARM64Reg temp = W0; ARM64Reg temp = W0;
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV32(temp, RS); REV32(temp, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(temp, RS); REV16(temp, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->STR(INDEX_UNSIGNED, temp, addr, 0); STR(INDEX_UNSIGNED, temp, addr, 0);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->STRH(INDEX_UNSIGNED, temp, addr, 0); STRH(INDEX_UNSIGNED, temp, addr, 0);
else else
emit->STRB(INDEX_UNSIGNED, RS, addr, 0); STRB(INDEX_UNSIGNED, RS, addr, 0);
} }
else else
{ {
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->LDR(INDEX_UNSIGNED, RS, addr, 0); LDR(INDEX_UNSIGNED, RS, addr, 0);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->LDRH(INDEX_UNSIGNED, RS, addr, 0); LDRH(INDEX_UNSIGNED, RS, addr, 0);
else if (flags & BackPatchInfo::FLAG_SIZE_8) else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->LDRB(INDEX_UNSIGNED, RS, addr, 0); LDRB(INDEX_UNSIGNED, RS, addr, 0);
if (!(flags & BackPatchInfo::FLAG_REVERSE)) if (!(flags & BackPatchInfo::FLAG_REVERSE))
{ {
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV32(RS, RS); REV32(RS, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, RS); REV16(RS, RS);
} }
if (flags & BackPatchInfo::FLAG_EXTEND) if (flags & BackPatchInfo::FLAG_EXTEND)
emit->SXTH(RS, RS); SXTH(RS, RS);
} }
} }
const u8* fastmem_end = GetCodePtr();
if (!fastmem || do_farcode)
{
if (fastmem && do_farcode)
{
SlowmemHandler handler;
handler.dest_reg = RS;
handler.addr_reg = addr;
handler.gprs = gprs_to_push;
handler.fprs = fprs_to_push;
handler.flags = flags;
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_start];
auto handler_loc_iter = m_handler_to_loc.find(handler);
if (handler_loc_iter == m_handler_to_loc.end())
{
in_far_code = true;
SwitchToFarCode();
const u8* handler_loc = GetCodePtr();
m_handler_to_loc[handler] = handler_loc;
fastmem_area->slowmem_code = handler_loc;
fastmem_area->length = fastmem_end - fastmem_start;
}
else else
{ {
const u8* handler_loc = handler_loc_iter->second;
fastmem_area->slowmem_code = handler_loc;
fastmem_area->length = fastmem_end - fastmem_start;
return;
}
}
ABI_PushRegisters(gprs_to_push);
m_float_emit.ABI_PushRegisters(fprs_to_push, X30);
if (flags & BackPatchInfo::FLAG_STORE && if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32) if (flags & BackPatchInfo::FLAG_SIZE_F32)
{ {
float_emit.FCVT(32, 64, D0, RS); m_float_emit.FCVT(32, 64, D0, RS);
float_emit.UMOV(32, W0, Q0, 0); m_float_emit.UMOV(32, W0, Q0, 0);
emit->MOVI2R(X30, (u64)&PowerPC::Write_U32); MOVI2R(X30, (u64)&PowerPC::Write_U32);
emit->BLR(X30); BLR(X30);
} }
else else
{ {
emit->MOVI2R(X30, (u64)&PowerPC::Write_U64); MOVI2R(X30, (u64)&PowerPC::Write_U64);
float_emit.UMOV(64, X0, RS, 0); m_float_emit.UMOV(64, X0, RS, 0);
emit->BLR(X30); BLR(X30);
} }
} }
else if (flags & BackPatchInfo::FLAG_LOAD && else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{ {
ARM64FloatEmitter float_emit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32) if (flags & BackPatchInfo::FLAG_SIZE_F32)
{ {
emit->MOVI2R(X30, (u64)&PowerPC::Read_U32); MOVI2R(X30, (u64)&PowerPC::Read_U32);
emit->BLR(X30); BLR(X30);
float_emit.DUP(32, RS, X0); m_float_emit.DUP(32, RS, X0);
float_emit.FCVTL(64, RS, RS); m_float_emit.FCVTL(64, RS, RS);
} }
else else
{ {
emit->MOVI2R(X30, (u64)&PowerPC::Read_F64); MOVI2R(X30, (u64)&PowerPC::Read_F64);
emit->BLR(X30); BLR(X30);
float_emit.INS(64, RS, 0, X0); m_float_emit.INS(64, RS, 0, X0);
} }
} }
else if (flags & BackPatchInfo::FLAG_STORE) else if (flags & BackPatchInfo::FLAG_STORE)
{ {
emit->MOV(W0, RS); MOV(W0, RS);
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(X30, (u64)&PowerPC::Write_U32); MOVI2R(X30, (u64)&PowerPC::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(X30, (u64)&PowerPC::Write_U16); MOVI2R(X30, (u64)&PowerPC::Write_U16);
else else
emit->MOVI2R(X30, (u64)&PowerPC::Write_U8); MOVI2R(X30, (u64)&PowerPC::Write_U8);
emit->BLR(X30); BLR(X30);
} }
else else
{ {
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(X30, (u64)&PowerPC::Read_U32); MOVI2R(X30, (u64)&PowerPC::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(X30, (u64)&PowerPC::Read_U16); MOVI2R(X30, (u64)&PowerPC::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8) else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->MOVI2R(X30, (u64)&PowerPC::Read_U8); MOVI2R(X30, (u64)&PowerPC::Read_U8);
emit->BLR(X30); BLR(X30);
if (!(flags & BackPatchInfo::FLAG_REVERSE)) if (!(flags & BackPatchInfo::FLAG_REVERSE))
{ {
emit->MOV(RS, W0); MOV(RS, W0);
} }
else else
{ {
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV32(RS, W0); REV32(RS, W0);
else if (flags & BackPatchInfo::FLAG_SIZE_16) else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, W0); REV16(RS, W0);
} }
if (flags & BackPatchInfo::FLAG_EXTEND) if (flags & BackPatchInfo::FLAG_EXTEND)
emit->SXTH(RS, RS); SXTH(RS, RS);
}
} }
if (do_padding) m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
ABI_PopRegisters(gprs_to_push);
}
if (in_far_code)
{ {
BackPatchInfo& info = m_backpatch_info[flags]; RET(X30);
u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size); SwitchToNearCode();
u32 code_size = emit->GetCodePtr() - code_base;
code_size /= 4;
for (u32 i = 0; i < (num_insts_max - code_size); ++i)
emit->HINT(HINT_NOP);
} }
return trouble_offset;
} }
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
@ -338,16 +359,29 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
return false; return false;
} }
BackPatchInfo& info = m_backpatch_info[flags]; auto slow_handler_iter = m_fault_to_handler.upper_bound((const u8*)ctx->CTX_PC);
ARM64XEmitter emitter((u8*)(ctx->CTX_PC - info.m_fastmem_trouble_inst_offset * 4)); slow_handler_iter--;
u64 new_pc = (u64)emitter.GetCodePtr();
// Slowmem routine doesn't need the address location // no fastmem area found
// It is already in the correct location if (slow_handler_iter == m_fault_to_handler.end())
EmitBackpatchRoutine(&emitter, flags, false, true, reg, INVALID_REG); return false;
// no overlapping fastmem area found
if ((const u8*)ctx->CTX_PC - slow_handler_iter->first > slow_handler_iter->second.length)
return false;
ARM64XEmitter emitter((u8*) slow_handler_iter->first);
emitter.BL(slow_handler_iter->second.slowmem_code);
u32 num_insts_max = slow_handler_iter->second.length / 4 - 1;
for (u32 i = 0; i < num_insts_max; ++i)
emitter.HINT(HINT_NOP);
m_fault_to_handler.erase(slow_handler_iter);
emitter.FlushIcache(); emitter.FlushIcache();
ctx->CTX_PC = new_pc; ctx->CTX_PC = (u64)slow_handler_iter->first;
// Wipe the top bits of the addr_register // Wipe the top bits of the addr_register
if (flags & BackPatchInfo::FLAG_STORE && if (flags & BackPatchInfo::FLAG_STORE &&
@ -357,283 +391,3 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
ctx->CTX_REG(0) &= 0xFFFFFFFFUll; ctx->CTX_REG(0) &= 0xFFFFFFFFUll;
return true; return true;
} }
void JitArm64::InitBackpatch()
{
u32 flags = 0;
BackPatchInfo info;
u8* code_base = GetWritableCodePtr();
u8* code_end;
// Loads
{
// 8bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - Extend
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_EXTEND;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - Reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit - Reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
// Stores
{
// 8bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
}

View File

@ -37,7 +37,6 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
regs_in_use[dest_reg] = 0; regs_in_use[dest_reg] = 0;
ARM64Reg addr_reg = W0; ARM64Reg addr_reg = W0;
@ -148,7 +147,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA); EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0));
} }
else if (mmio_address) else if (mmio_address)
{ {
@ -158,16 +157,11 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
} }
else else
{ {
// Has a chance of being backpatched which will destroy our state EmitBackpatchRoutine(flags,
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags,
jo.fastmem, jo.fastmem,
jo.fastmem, jo.fastmem,
dest_reg, XA); dest_reg, XA,
m_float_emit.ABI_PopRegisters(fprs_in_use, X30); regs_in_use, fprs_in_use);
ABI_PopRegisters(regs_in_use);
} }
gpr.Unlock(W0, W30); gpr.Unlock(W0, W30);
@ -192,7 +186,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W1] = 0; regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
ARM64Reg addr_reg = W1; ARM64Reg addr_reg = W1;
@ -296,7 +289,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
{ {
MOVI2R(XA, imm_addr); MOVI2R(XA, imm_addr);
EmitBackpatchRoutine(this, flags, true, false, RS, XA); EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0));
} }
else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE)) else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE))
{ {
@ -309,16 +302,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (is_immediate) if (is_immediate)
MOVI2R(XA, imm_addr); MOVI2R(XA, imm_addr);
// Has a chance of being backpatched which will destroy our state EmitBackpatchRoutine(flags,
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags,
jo.fastmem, jo.fastmem,
jo.fastmem, jo.fastmem,
RS, XA); RS, XA,
m_float_emit.ABI_PopRegisters(fprs_in_use, X30); regs_in_use,
ABI_PopRegisters(regs_in_use); fprs_in_use);
} }
gpr.Unlock(W0, W1, W30); gpr.Unlock(W0, W1, W30);

View File

@ -183,26 +183,20 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0 fprs_in_use[0] = 0; // Q0
fprs_in_use[VD - Q0] = 0; fprs_in_use[VD - Q0] = 0;
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)) if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
EmitBackpatchRoutine(this, flags, true, false, VD, XA); EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0));
} }
else else
{ {
// Has a chance of being backpatched which will destroy our state EmitBackpatchRoutine(flags,
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags,
jo.fastmem, jo.fastmem,
jo.fastmem, jo.fastmem,
VD, XA); VD, XA,
m_float_emit.ABI_PopRegisters(fprs_in_use, X30); regs_in_use, fprs_in_use);
ABI_PopRegisters(regs_in_use);
} }
gpr.Unlock(W0, W30); gpr.Unlock(W0, W30);
@ -383,7 +377,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[W0] = 0; regs_in_use[W0] = 0;
regs_in_use[W1] = 0; regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
fprs_in_use[0] = 0; // Q0 fprs_in_use[0] = 0; // Q0
if (is_immediate) if (is_immediate)
@ -437,29 +430,20 @@ void JitArm64::stfXX(UGeckoInstruction inst)
} }
else if (PowerPC::IsOptimizableRAMAddress(imm_addr)) else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
{ {
EmitBackpatchRoutine(this, flags, true, false, V0, XA); EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0));
} }
else else
{ {
ABI_PushRegisters(regs_in_use); EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags, false, false, V0, XA);
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
ABI_PopRegisters(regs_in_use);
} }
} }
else else
{ {
// Has a chance of being backpatched which will destroy our state EmitBackpatchRoutine(flags,
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
EmitBackpatchRoutine(this, flags,
jo.fastmem, jo.fastmem,
jo.fastmem, jo.fastmem,
V0, XA); V0, XA,
m_float_emit.ABI_PopRegisters(fprs_in_use, X30); regs_in_use, fprs_in_use);
ABI_PopRegisters(regs_in_use);
} }
gpr.Unlock(W0, W1, W30); gpr.Unlock(W0, W1, W30);
fpr.Unlock(Q0); fpr.Unlock(Q0);

View File

@ -34,8 +34,4 @@ struct BackPatchInfo
return 64; return 64;
return 0; return 0;
} }
u32 m_fastmem_size;
u32 m_fastmem_trouble_inst_offset;
u32 m_slowmem_size;
}; };