Merge pull request #2814 from Sonicadvance1/aarch64_faster_fastmem
[AArch64] Banish slowmem operations to farcode.
This commit is contained in:
commit
3bc5505272
|
@ -155,6 +155,8 @@ public:
|
|||
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
|
||||
bool operator==(BitSet other) const { return m_val == other.m_val; }
|
||||
bool operator!=(BitSet other) const { return m_val != other.m_val; }
|
||||
bool operator<(BitSet other) const { return m_val < other.m_val; }
|
||||
bool operator>(BitSet other) const { return m_val > other.m_val; }
|
||||
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
|
||||
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
|
||||
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
|
||||
|
|
|
@ -22,9 +22,19 @@ private:
|
|||
protected:
|
||||
u8 *region;
|
||||
size_t region_size;
|
||||
size_t parent_region_size;
|
||||
|
||||
bool m_has_child;
|
||||
bool m_is_child;
|
||||
CodeBlock* m_child;
|
||||
|
||||
public:
|
||||
CodeBlock() : region(nullptr), region_size(0) {}
|
||||
CodeBlock()
|
||||
: region(nullptr), region_size(0), parent_region_size(0),
|
||||
m_has_child(false), m_is_child(false), m_child(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
|
||||
|
||||
// Call this before you generate any code.
|
||||
|
@ -49,6 +59,12 @@ public:
|
|||
FreeMemoryPages(region, region_size);
|
||||
region = nullptr;
|
||||
region_size = 0;
|
||||
parent_region_size = 0;
|
||||
if (m_has_child)
|
||||
{
|
||||
m_child->region = nullptr;
|
||||
m_child->region_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsInSpace(u8 *ptr) const
|
||||
|
@ -70,7 +86,7 @@ public:
|
|||
|
||||
size_t GetSpaceLeft() const
|
||||
{
|
||||
return region_size - (T::GetCodePtr() - region);
|
||||
return (m_has_child ? parent_region_size : region_size) - (T::GetCodePtr() - region);
|
||||
}
|
||||
|
||||
bool IsAlmostFull() const
|
||||
|
@ -78,5 +94,16 @@ public:
|
|||
// This should be bigger than the biggest block ever.
|
||||
return GetSpaceLeft() < 0x10000;
|
||||
}
|
||||
void AddChildCodeSpace(CodeBlock* child, size_t size)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !m_has_child, "Already have a child! Can't have another!");
|
||||
m_child = child;
|
||||
m_has_child = true;
|
||||
m_child->m_is_child = true;
|
||||
u8* child_region = region + region_size - size;
|
||||
m_child->region = child_region;
|
||||
m_child->region_size = size;
|
||||
m_child->ResetCodePtr();
|
||||
parent_region_size = region_size - size;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -14,10 +14,13 @@
|
|||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16;
|
||||
|
||||
void JitArm64::Init()
|
||||
{
|
||||
AllocCodeSpace(CODE_SIZE);
|
||||
farcode.Init(SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE);
|
||||
size_t child_code_size = SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : AARCH64_FARCODE_SIZE;
|
||||
AllocCodeSpace(CODE_SIZE + child_code_size);
|
||||
AddChildCodeSpace(&farcode, child_code_size);
|
||||
jo.enableBlocklink = true;
|
||||
jo.optimizeGatherPipe = true;
|
||||
UpdateMemoryOptions();
|
||||
|
@ -30,22 +33,23 @@ void JitArm64::Init()
|
|||
code_block.m_stats = &js.st;
|
||||
code_block.m_gpa = &js.gpa;
|
||||
code_block.m_fpa = &js.fpa;
|
||||
InitBackpatch();
|
||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||
}
|
||||
|
||||
void JitArm64::ClearCache()
|
||||
{
|
||||
m_fault_to_handler.clear();
|
||||
m_handler_to_loc.clear();
|
||||
|
||||
blocks.Clear();
|
||||
ClearCodeSpace();
|
||||
farcode.ClearCodeSpace();
|
||||
blocks.Clear();
|
||||
UpdateMemoryOptions();
|
||||
}
|
||||
|
||||
void JitArm64::Shutdown()
|
||||
{
|
||||
FreeCodeSpace();
|
||||
farcode.Shutdown();
|
||||
blocks.Shutdown();
|
||||
asm_routines.Shutdown();
|
||||
}
|
||||
|
|
|
@ -18,15 +18,6 @@
|
|||
|
||||
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
|
||||
|
||||
// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken
|
||||
// exception branches.
|
||||
class FarCodeCacheArm64 : public Arm64Gen::ARM64CodeBlock
|
||||
{
|
||||
public:
|
||||
void Init(int size) { AllocCodeSpace(size); }
|
||||
void Shutdown() { FreeCodeSpace(); }
|
||||
};
|
||||
|
||||
// Some asserts to make sure we will be able to load everything
|
||||
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
|
||||
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
|
||||
|
@ -185,6 +176,40 @@ public:
|
|||
void psq_st(UGeckoInstruction inst);
|
||||
|
||||
private:
|
||||
|
||||
struct SlowmemHandler
|
||||
{
|
||||
ARM64Reg dest_reg;
|
||||
ARM64Reg addr_reg;
|
||||
BitSet32 gprs;
|
||||
BitSet32 fprs;
|
||||
u32 flags;
|
||||
bool operator< (const SlowmemHandler& rhs) const
|
||||
{
|
||||
if (dest_reg < rhs.dest_reg) return true;
|
||||
if (dest_reg > rhs.dest_reg) return false;
|
||||
if (addr_reg < rhs.addr_reg) return true;
|
||||
if (addr_reg > rhs.addr_reg) return false;
|
||||
if (gprs < rhs.gprs) return true;
|
||||
if (gprs > rhs.gprs) return false;
|
||||
if (fprs < rhs.fprs) return true;
|
||||
if (fprs > rhs.fprs) return false;
|
||||
if (flags < rhs.flags) return true;
|
||||
if (flags > rhs.flags) return false;
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct FastmemArea
|
||||
{
|
||||
u32 length;
|
||||
const u8* slowmem_code;
|
||||
};
|
||||
|
||||
// <Fastmem fault location, slowmem handler location>
|
||||
std::map<const u8*, FastmemArea> m_fault_to_handler;
|
||||
std::map<SlowmemHandler, const u8*> m_handler_to_loc;
|
||||
Arm64GPRCache gpr;
|
||||
Arm64FPRCache fpr;
|
||||
|
||||
|
@ -195,7 +220,7 @@ private:
|
|||
|
||||
ARM64FloatEmitter m_float_emit;
|
||||
|
||||
FarCodeCacheArm64 farcode;
|
||||
Arm64Gen::ARM64CodeBlock farcode;
|
||||
u8* nearcode; // Backed up when we switch to far code.
|
||||
|
||||
// Simple functions to switch between near and far code emitting
|
||||
|
@ -214,13 +239,11 @@ private:
|
|||
// Dump a memory range of code
|
||||
void DumpCode(const u8* start, const u8* end);
|
||||
|
||||
// The key is the backpatch flags
|
||||
std::map<u32, BackPatchInfo> m_backpatch_info;
|
||||
|
||||
// Backpatching routines
|
||||
bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg);
|
||||
void InitBackpatch();
|
||||
u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr);
|
||||
void EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||
Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr,
|
||||
BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0));
|
||||
// Loadstore routines
|
||||
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
|
||||
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);
|
||||
|
|
|
@ -127,10 +127,12 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
|
|||
return false;
|
||||
}
|
||||
|
||||
u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARM64Reg RS, ARM64Reg addr)
|
||||
void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||
ARM64Reg RS, ARM64Reg addr,
|
||||
BitSet32 gprs_to_push, BitSet32 fprs_to_push)
|
||||
{
|
||||
u32 trouble_offset = 0;
|
||||
const u8* code_base = emit->GetCodePtr();
|
||||
bool in_far_code = false;
|
||||
const u8* fastmem_start = GetCodePtr();
|
||||
|
||||
if (fastmem)
|
||||
{
|
||||
|
@ -140,172 +142,191 @@ u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem,
|
|||
if (flags & BackPatchInfo::FLAG_STORE &&
|
||||
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
|
||||
{
|
||||
ARM64FloatEmitter float_emit(emit);
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
{
|
||||
float_emit.FCVT(32, 64, D0, RS);
|
||||
float_emit.REV32(8, D0, D0);
|
||||
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
|
||||
float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
|
||||
m_float_emit.FCVT(32, 64, D0, RS);
|
||||
m_float_emit.REV32(8, D0, D0);
|
||||
m_float_emit.STR(32, INDEX_UNSIGNED, D0, addr, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
float_emit.REV64(8, Q0, RS);
|
||||
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
|
||||
float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
m_float_emit.REV64(8, Q0, RS);
|
||||
m_float_emit.STR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
}
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_LOAD &&
|
||||
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
|
||||
{
|
||||
ARM64FloatEmitter float_emit(emit);
|
||||
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
{
|
||||
float_emit.LD1R(32, EncodeRegToDouble(RS), addr);
|
||||
float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||
float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||
m_float_emit.LD1R(32, EncodeRegToDouble(RS), addr);
|
||||
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||
m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||
}
|
||||
else
|
||||
{
|
||||
float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
float_emit.REV64(8, D0, D0);
|
||||
float_emit.INS(64, RS, 0, Q0, 0);
|
||||
m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
m_float_emit.REV64(8, D0, D0);
|
||||
m_float_emit.INS(64, RS, 0, Q0, 0);
|
||||
}
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||
{
|
||||
ARM64Reg temp = W0;
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
emit->REV32(temp, RS);
|
||||
REV32(temp, RS);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
emit->REV16(temp, RS);
|
||||
|
||||
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
|
||||
REV16(temp, RS);
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
emit->STR(INDEX_UNSIGNED, temp, addr, 0);
|
||||
STR(INDEX_UNSIGNED, temp, addr, 0);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
emit->STRH(INDEX_UNSIGNED, temp, addr, 0);
|
||||
STRH(INDEX_UNSIGNED, temp, addr, 0);
|
||||
else
|
||||
emit->STRB(INDEX_UNSIGNED, RS, addr, 0);
|
||||
STRB(INDEX_UNSIGNED, RS, addr, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
emit->LDR(INDEX_UNSIGNED, RS, addr, 0);
|
||||
LDR(INDEX_UNSIGNED, RS, addr, 0);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
emit->LDRH(INDEX_UNSIGNED, RS, addr, 0);
|
||||
LDRH(INDEX_UNSIGNED, RS, addr, 0);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
||||
emit->LDRB(INDEX_UNSIGNED, RS, addr, 0);
|
||||
LDRB(INDEX_UNSIGNED, RS, addr, 0);
|
||||
|
||||
if (!(flags & BackPatchInfo::FLAG_REVERSE))
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
emit->REV32(RS, RS);
|
||||
REV32(RS, RS);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
emit->REV16(RS, RS);
|
||||
REV16(RS, RS);
|
||||
}
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_EXTEND)
|
||||
emit->SXTH(RS, RS);
|
||||
SXTH(RS, RS);
|
||||
}
|
||||
}
|
||||
else
|
||||
const u8* fastmem_end = GetCodePtr();
|
||||
|
||||
if (!fastmem || do_farcode)
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_STORE &&
|
||||
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
|
||||
if (fastmem && do_farcode)
|
||||
{
|
||||
ARM64FloatEmitter float_emit(emit);
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
SlowmemHandler handler;
|
||||
handler.dest_reg = RS;
|
||||
handler.addr_reg = addr;
|
||||
handler.gprs = gprs_to_push;
|
||||
handler.fprs = fprs_to_push;
|
||||
handler.flags = flags;
|
||||
|
||||
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_start];
|
||||
auto handler_loc_iter = m_handler_to_loc.find(handler);
|
||||
|
||||
if (handler_loc_iter == m_handler_to_loc.end())
|
||||
{
|
||||
float_emit.FCVT(32, 64, D0, RS);
|
||||
float_emit.UMOV(32, W0, Q0, 0);
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Write_U32);
|
||||
emit->BLR(X30);
|
||||
in_far_code = true;
|
||||
SwitchToFarCode();
|
||||
const u8* handler_loc = GetCodePtr();
|
||||
m_handler_to_loc[handler] = handler_loc;
|
||||
fastmem_area->slowmem_code = handler_loc;
|
||||
fastmem_area->length = fastmem_end - fastmem_start;
|
||||
}
|
||||
else
|
||||
{
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Write_U64);
|
||||
float_emit.UMOV(64, X0, RS, 0);
|
||||
emit->BLR(X30);
|
||||
const u8* handler_loc = handler_loc_iter->second;
|
||||
fastmem_area->slowmem_code = handler_loc;
|
||||
fastmem_area->length = fastmem_end - fastmem_start;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ABI_PushRegisters(gprs_to_push);
|
||||
m_float_emit.ABI_PushRegisters(fprs_to_push, X30);
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_STORE &&
|
||||
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
{
|
||||
m_float_emit.FCVT(32, 64, D0, RS);
|
||||
m_float_emit.UMOV(32, W0, Q0, 0);
|
||||
MOVI2R(X30, (u64)&PowerPC::Write_U32);
|
||||
BLR(X30);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVI2R(X30, (u64)&PowerPC::Write_U64);
|
||||
m_float_emit.UMOV(64, X0, RS, 0);
|
||||
BLR(X30);
|
||||
}
|
||||
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_LOAD &&
|
||||
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
|
||||
{
|
||||
ARM64FloatEmitter float_emit(emit);
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||
{
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
||||
emit->BLR(X30);
|
||||
float_emit.DUP(32, RS, X0);
|
||||
float_emit.FCVTL(64, RS, RS);
|
||||
MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
||||
BLR(X30);
|
||||
m_float_emit.DUP(32, RS, X0);
|
||||
m_float_emit.FCVTL(64, RS, RS);
|
||||
}
|
||||
else
|
||||
{
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Read_F64);
|
||||
emit->BLR(X30);
|
||||
float_emit.INS(64, RS, 0, X0);
|
||||
MOVI2R(X30, (u64)&PowerPC::Read_F64);
|
||||
BLR(X30);
|
||||
m_float_emit.INS(64, RS, 0, X0);
|
||||
}
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||
{
|
||||
emit->MOV(W0, RS);
|
||||
MOV(W0, RS);
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Write_U32);
|
||||
MOVI2R(X30, (u64)&PowerPC::Write_U32);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Write_U16);
|
||||
MOVI2R(X30, (u64)&PowerPC::Write_U16);
|
||||
else
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Write_U8);
|
||||
MOVI2R(X30, (u64)&PowerPC::Write_U8);
|
||||
|
||||
emit->BLR(X30);
|
||||
BLR(X30);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
||||
MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Read_U16);
|
||||
MOVI2R(X30, (u64)&PowerPC::Read_U16);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
||||
emit->MOVI2R(X30, (u64)&PowerPC::Read_U8);
|
||||
MOVI2R(X30, (u64)&PowerPC::Read_U8);
|
||||
|
||||
emit->BLR(X30);
|
||||
BLR(X30);
|
||||
|
||||
if (!(flags & BackPatchInfo::FLAG_REVERSE))
|
||||
{
|
||||
emit->MOV(RS, W0);
|
||||
MOV(RS, W0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
emit->REV32(RS, W0);
|
||||
REV32(RS, W0);
|
||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||
emit->REV16(RS, W0);
|
||||
REV16(RS, W0);
|
||||
}
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_EXTEND)
|
||||
emit->SXTH(RS, RS);
|
||||
SXTH(RS, RS);
|
||||
}
|
||||
|
||||
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
|
||||
ABI_PopRegisters(gprs_to_push);
|
||||
}
|
||||
|
||||
if (do_padding)
|
||||
if (in_far_code)
|
||||
{
|
||||
BackPatchInfo& info = m_backpatch_info[flags];
|
||||
u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size);
|
||||
|
||||
u32 code_size = emit->GetCodePtr() - code_base;
|
||||
code_size /= 4;
|
||||
|
||||
for (u32 i = 0; i < (num_insts_max - code_size); ++i)
|
||||
emit->HINT(HINT_NOP);
|
||||
RET(X30);
|
||||
SwitchToNearCode();
|
||||
}
|
||||
|
||||
return trouble_offset;
|
||||
}
|
||||
|
||||
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
||||
|
@ -338,16 +359,29 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
|||
return false;
|
||||
}
|
||||
|
||||
BackPatchInfo& info = m_backpatch_info[flags];
|
||||
ARM64XEmitter emitter((u8*)(ctx->CTX_PC - info.m_fastmem_trouble_inst_offset * 4));
|
||||
u64 new_pc = (u64)emitter.GetCodePtr();
|
||||
auto slow_handler_iter = m_fault_to_handler.upper_bound((const u8*)ctx->CTX_PC);
|
||||
slow_handler_iter--;
|
||||
|
||||
// Slowmem routine doesn't need the address location
|
||||
// It is already in the correct location
|
||||
EmitBackpatchRoutine(&emitter, flags, false, true, reg, INVALID_REG);
|
||||
// no fastmem area found
|
||||
if (slow_handler_iter == m_fault_to_handler.end())
|
||||
return false;
|
||||
|
||||
// no overlapping fastmem area found
|
||||
if ((const u8*)ctx->CTX_PC - slow_handler_iter->first > slow_handler_iter->second.length)
|
||||
return false;
|
||||
|
||||
ARM64XEmitter emitter((u8*) slow_handler_iter->first);
|
||||
|
||||
emitter.BL(slow_handler_iter->second.slowmem_code);
|
||||
|
||||
u32 num_insts_max = slow_handler_iter->second.length / 4 - 1;
|
||||
for (u32 i = 0; i < num_insts_max; ++i)
|
||||
emitter.HINT(HINT_NOP);
|
||||
|
||||
m_fault_to_handler.erase(slow_handler_iter);
|
||||
|
||||
emitter.FlushIcache();
|
||||
ctx->CTX_PC = new_pc;
|
||||
ctx->CTX_PC = (u64)slow_handler_iter->first;
|
||||
|
||||
// Wipe the top bits of the addr_register
|
||||
if (flags & BackPatchInfo::FLAG_STORE &&
|
||||
|
@ -357,283 +391,3 @@ bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
|||
ctx->CTX_REG(0) &= 0xFFFFFFFFUll;
|
||||
return true;
|
||||
}
|
||||
|
||||
void JitArm64::InitBackpatch()
|
||||
{
|
||||
u32 flags = 0;
|
||||
BackPatchInfo info;
|
||||
u8* code_base = GetWritableCodePtr();
|
||||
u8* code_end;
|
||||
|
||||
// Loads
|
||||
{
|
||||
// 8bit
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_8;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 16bit
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_16;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 32bit
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_32;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 16bit - Extend
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_16 |
|
||||
BackPatchInfo::FLAG_EXTEND;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 16bit - Reverse
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_16 |
|
||||
BackPatchInfo::FLAG_REVERSE;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 32bit - Reverse
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_32 |
|
||||
BackPatchInfo::FLAG_REVERSE;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 32bit float
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_F32;
|
||||
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 64bit float
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_LOAD |
|
||||
BackPatchInfo::FLAG_SIZE_F64;
|
||||
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
}
|
||||
|
||||
// Stores
|
||||
{
|
||||
// 8bit
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_STORE |
|
||||
BackPatchInfo::FLAG_SIZE_8;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 16bit
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_STORE |
|
||||
BackPatchInfo::FLAG_SIZE_16;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 32bit
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_STORE |
|
||||
BackPatchInfo::FLAG_SIZE_32;
|
||||
EmitBackpatchRoutine(this, flags, false, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, W0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 32bit float
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_STORE |
|
||||
BackPatchInfo::FLAG_SIZE_F32;
|
||||
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
// 64bit float
|
||||
{
|
||||
flags =
|
||||
BackPatchInfo::FLAG_STORE |
|
||||
BackPatchInfo::FLAG_SIZE_F64;
|
||||
EmitBackpatchRoutine(this, flags, false, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_slowmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
info.m_fastmem_trouble_inst_offset =
|
||||
EmitBackpatchRoutine(this, flags, true, false, Q0, X1);
|
||||
code_end = GetWritableCodePtr();
|
||||
info.m_fastmem_size = (code_end - code_base) / 4;
|
||||
|
||||
SetCodePtr(code_base);
|
||||
|
||||
m_backpatch_info[flags] = info;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,6 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[W0] = 0;
|
||||
regs_in_use[W30] = 0;
|
||||
regs_in_use[dest_reg] = 0;
|
||||
|
||||
ARM64Reg addr_reg = W0;
|
||||
|
@ -148,7 +147,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
|
||||
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA);
|
||||
EmitBackpatchRoutine(flags, true, false, dest_reg, XA, BitSet32(0), BitSet32(0));
|
||||
}
|
||||
else if (mmio_address)
|
||||
{
|
||||
|
@ -158,16 +157,11 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
}
|
||||
else
|
||||
{
|
||||
// Has a chance of being backpatched which will destroy our state
|
||||
// push and pop everything in this instance
|
||||
ABI_PushRegisters(regs_in_use);
|
||||
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
|
||||
EmitBackpatchRoutine(this, flags,
|
||||
EmitBackpatchRoutine(flags,
|
||||
jo.fastmem,
|
||||
jo.fastmem,
|
||||
dest_reg, XA);
|
||||
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
|
||||
ABI_PopRegisters(regs_in_use);
|
||||
dest_reg, XA,
|
||||
regs_in_use, fprs_in_use);
|
||||
}
|
||||
|
||||
gpr.Unlock(W0, W30);
|
||||
|
@ -192,7 +186,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[W0] = 0;
|
||||
regs_in_use[W1] = 0;
|
||||
regs_in_use[W30] = 0;
|
||||
|
||||
ARM64Reg addr_reg = W1;
|
||||
|
||||
|
@ -296,7 +289,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
{
|
||||
MOVI2R(XA, imm_addr);
|
||||
|
||||
EmitBackpatchRoutine(this, flags, true, false, RS, XA);
|
||||
EmitBackpatchRoutine(flags, true, false, RS, XA, BitSet32(0), BitSet32(0));
|
||||
}
|
||||
else if (mmio_address && !(flags & BackPatchInfo::FLAG_REVERSE))
|
||||
{
|
||||
|
@ -309,16 +302,12 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
if (is_immediate)
|
||||
MOVI2R(XA, imm_addr);
|
||||
|
||||
// Has a chance of being backpatched which will destroy our state
|
||||
// push and pop everything in this instance
|
||||
ABI_PushRegisters(regs_in_use);
|
||||
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
|
||||
EmitBackpatchRoutine(this, flags,
|
||||
EmitBackpatchRoutine(flags,
|
||||
jo.fastmem,
|
||||
jo.fastmem,
|
||||
RS, XA);
|
||||
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
|
||||
ABI_PopRegisters(regs_in_use);
|
||||
RS, XA,
|
||||
regs_in_use,
|
||||
fprs_in_use);
|
||||
}
|
||||
|
||||
gpr.Unlock(W0, W1, W30);
|
||||
|
|
|
@ -183,26 +183,20 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[W0] = 0;
|
||||
regs_in_use[W30] = 0;
|
||||
fprs_in_use[0] = 0; // Q0
|
||||
fprs_in_use[VD - Q0] = 0;
|
||||
|
||||
if (is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
EmitBackpatchRoutine(this, flags, true, false, VD, XA);
|
||||
EmitBackpatchRoutine(flags, true, false, VD, XA, BitSet32(0), BitSet32(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Has a chance of being backpatched which will destroy our state
|
||||
// push and pop everything in this instance
|
||||
ABI_PushRegisters(regs_in_use);
|
||||
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
|
||||
EmitBackpatchRoutine(this, flags,
|
||||
EmitBackpatchRoutine(flags,
|
||||
jo.fastmem,
|
||||
jo.fastmem,
|
||||
VD, XA);
|
||||
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
|
||||
ABI_PopRegisters(regs_in_use);
|
||||
VD, XA,
|
||||
regs_in_use, fprs_in_use);
|
||||
}
|
||||
|
||||
gpr.Unlock(W0, W30);
|
||||
|
@ -383,7 +377,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[W0] = 0;
|
||||
regs_in_use[W1] = 0;
|
||||
regs_in_use[W30] = 0;
|
||||
fprs_in_use[0] = 0; // Q0
|
||||
|
||||
if (is_immediate)
|
||||
|
@ -437,29 +430,20 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
}
|
||||
else if (PowerPC::IsOptimizableRAMAddress(imm_addr))
|
||||
{
|
||||
EmitBackpatchRoutine(this, flags, true, false, V0, XA);
|
||||
EmitBackpatchRoutine(flags, true, false, V0, XA, BitSet32(0), BitSet32(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
ABI_PushRegisters(regs_in_use);
|
||||
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
|
||||
EmitBackpatchRoutine(this, flags, false, false, V0, XA);
|
||||
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
|
||||
ABI_PopRegisters(regs_in_use);
|
||||
EmitBackpatchRoutine(flags, false, false, V0, XA, regs_in_use, fprs_in_use);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Has a chance of being backpatched which will destroy our state
|
||||
// push and pop everything in this instance
|
||||
ABI_PushRegisters(regs_in_use);
|
||||
m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
|
||||
EmitBackpatchRoutine(this, flags,
|
||||
EmitBackpatchRoutine(flags,
|
||||
jo.fastmem,
|
||||
jo.fastmem,
|
||||
V0, XA);
|
||||
m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
|
||||
ABI_PopRegisters(regs_in_use);
|
||||
V0, XA,
|
||||
regs_in_use, fprs_in_use);
|
||||
}
|
||||
gpr.Unlock(W0, W1, W30);
|
||||
fpr.Unlock(Q0);
|
||||
|
|
|
@ -34,8 +34,4 @@ struct BackPatchInfo
|
|||
return 64;
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 m_fastmem_size;
|
||||
u32 m_fastmem_trouble_inst_offset;
|
||||
u32 m_slowmem_size;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue