Merge pull request #2736 from degasus/farcache

JitArm64: Farcache
This commit is contained in:
Ryan Houdek 2015-07-15 08:41:58 -05:00
commit c0f80ca524
11 changed files with 75 additions and 11 deletions

View File

@ -271,8 +271,8 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned
void ARM64XEmitter::SetCodePtr(u8* ptr) void ARM64XEmitter::SetCodePtr(u8* ptr)
{ {
m_code = ptr; m_code = ptr;
m_startcode = m_code; if (!m_lastCacheFlushEnd)
m_lastCacheFlushEnd = ptr; m_lastCacheFlushEnd = ptr;
} }
const u8* ARM64XEmitter::GetCodePtr() const const u8* ARM64XEmitter::GetCodePtr() const
@ -315,6 +315,9 @@ void ARM64XEmitter::FlushIcache()
void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end) void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end)
{ {
if (start == end)
return;
#if defined(IOS) #if defined(IOS)
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start); // Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);

View File

@ -324,7 +324,6 @@ class ARM64XEmitter
private: private:
u8* m_code; u8* m_code;
u8* m_startcode;
u8* m_lastCacheFlushEnd; u8* m_lastCacheFlushEnd;
void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr); void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr);
@ -365,14 +364,13 @@ protected:
public: public:
ARM64XEmitter() ARM64XEmitter()
: m_code(nullptr), m_startcode(nullptr), m_lastCacheFlushEnd(nullptr) : m_code(nullptr), m_lastCacheFlushEnd(nullptr)
{ {
} }
ARM64XEmitter(u8* code_ptr) { ARM64XEmitter(u8* code_ptr) {
m_code = code_ptr; m_code = code_ptr;
m_lastCacheFlushEnd = code_ptr; m_lastCacheFlushEnd = code_ptr;
m_startcode = code_ptr;
} }
virtual ~ARM64XEmitter() virtual ~ARM64XEmitter()

View File

@ -72,5 +72,11 @@ public:
{ {
return region_size - (T::GetCodePtr() - region); return region_size - (T::GetCodePtr() - region);
} }
bool IsAlmostFull() const
{
// This should be bigger than the biggest block ever.
return GetSpaceLeft() < 0x10000;
}
}; };

View File

@ -490,9 +490,7 @@ void Jit64::Trace()
void Jit64::Jit(u32 em_address) void Jit64::Jit(u32 em_address)
{ {
if (GetSpaceLeft() < 0x10000 || if (IsAlmostFull() || farcode.IsAlmostFull() || trampolines.IsAlmostFull() ||
farcode.GetSpaceLeft() < 0x10000 ||
trampolines.GetSpaceLeft() < 0x10000 ||
blocks.IsFull() || blocks.IsFull() ||
SConfig::GetInstance().bJITNoBlockCache || SConfig::GetInstance().bJITNoBlockCache ||
m_clear_cache_asap) m_clear_cache_asap)

View File

@ -473,7 +473,7 @@ void JitIL::Trace()
void JitIL::Jit(u32 em_address) void JitIL::Jit(u32 em_address)
{ {
if (GetSpaceLeft() < 0x10000 || farcode.GetSpaceLeft() < 0x10000 || blocks.IsFull() || if (IsAlmostFull() || farcode.IsAlmostFull() || blocks.IsFull() ||
SConfig::GetInstance().bJITNoBlockCache) SConfig::GetInstance().bJITNoBlockCache)
{ {
ClearCache(); ClearCache();

View File

@ -17,6 +17,7 @@ using namespace Arm64Gen;
void JitArm64::Init() void JitArm64::Init()
{ {
AllocCodeSpace(CODE_SIZE); AllocCodeSpace(CODE_SIZE);
farcode.Init(SConfig::GetInstance().bMMU ? FARCODE_SIZE_MMU : FARCODE_SIZE);
jo.enableBlocklink = true; jo.enableBlocklink = true;
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
UpdateMemoryOptions(); UpdateMemoryOptions();
@ -36,6 +37,7 @@ void JitArm64::Init()
void JitArm64::ClearCache() void JitArm64::ClearCache()
{ {
ClearCodeSpace(); ClearCodeSpace();
farcode.ClearCodeSpace();
blocks.Clear(); blocks.Clear();
UpdateMemoryOptions(); UpdateMemoryOptions();
} }
@ -43,6 +45,7 @@ void JitArm64::ClearCache()
void JitArm64::Shutdown() void JitArm64::Shutdown()
{ {
FreeCodeSpace(); FreeCodeSpace();
farcode.Shutdown();
blocks.Shutdown(); blocks.Shutdown();
asm_routines.Shutdown(); asm_routines.Shutdown();
} }
@ -276,7 +279,7 @@ void JitArm64::SingleStep()
void JitArm64::Jit(u32) void JitArm64::Jit(u32)
{ {
if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache) if (IsAlmostFull() || farcode.IsAlmostFull() || blocks.IsFull() || SConfig::GetInstance().bJITNoBlockCache)
{ {
ClearCache(); ClearCache();
} }
@ -397,6 +400,10 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr));
FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit
FixupBranch far = B();
SwitchToFarCode();
SetJumpTarget(far);
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);
@ -407,6 +414,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
MOVI2R(WA, js.compilerPC); MOVI2R(WA, js.compilerPC);
WriteExceptionExit(WA); WriteExceptionExit(WA);
SwitchToNearCode();
SetJumpTarget(b1); SetJumpTarget(b1);
js.firstFPInstructionFound = true; js.firstFPInstructionFound = true;
@ -450,5 +459,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
b->originalSize = code_block.m_num_instructions; b->originalSize = code_block.m_num_instructions;
FlushIcache(); FlushIcache();
farcode.FlushIcache();
return start; return start;
} }

View File

@ -18,6 +18,15 @@
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) #define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
// A place to throw blocks of code we don't want polluting the cache, e.g. rarely taken
// exception branches.
class FarCodeCacheArm64 : public Arm64Gen::ARM64CodeBlock
{
public:
void Init(int size) { AllocCodeSpace(size); }
void Shutdown() { FreeCodeSpace(); }
};
// Some asserts to make sure we will be able to load everything // Some asserts to make sure we will be able to load everything
static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR");
static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned");
@ -185,6 +194,22 @@ private:
ARM64FloatEmitter m_float_emit; ARM64FloatEmitter m_float_emit;
FarCodeCacheArm64 farcode;
u8* nearcode; // Backed up when we switch to far code.
// Simple functions to switch between near and far code emitting
void SwitchToFarCode()
{
nearcode = GetWritableCodePtr();
SetCodePtr(farcode.GetWritableCodePtr());
}
void SwitchToNearCode()
{
farcode.SetCodePtr(GetWritableCodePtr());
SetCodePtr(nearcode);
}
// Dump a memory range of code // Dump a memory range of code
void DumpCode(const u8* start, const u8* end); void DumpCode(const u8* start, const u8* end);

View File

@ -142,6 +142,10 @@ void JitArm64::bcx(UGeckoInstruction inst)
!(inst.BO_2 & BO_BRANCH_IF_TRUE)); !(inst.BO_2 & BO_BRANCH_IF_TRUE));
} }
FixupBranch far = B();
SwitchToFarCode();
SetJumpTarget(far);
if (inst.LK) if (inst.LK)
{ {
u32 Jumpto = js.compilerPC + 4; u32 Jumpto = js.compilerPC + 4;
@ -161,6 +165,8 @@ void JitArm64::bcx(UGeckoInstruction inst)
WriteExit(destination); WriteExit(destination);
SwitchToNearCode();
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch ); SetJumpTarget( pConditionDontBranch );
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)
@ -235,6 +241,10 @@ void JitArm64::bclrx(UGeckoInstruction inst)
!(inst.BO_2 & BO_BRANCH_IF_TRUE)); !(inst.BO_2 & BO_BRANCH_IF_TRUE));
} }
FixupBranch far = B();
SwitchToFarCode();
SetJumpTarget(far);
LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR]));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
@ -252,6 +262,8 @@ void JitArm64::bclrx(UGeckoInstruction inst)
WriteExitDestInR(WA); WriteExitDestInR(WA);
SwitchToNearCode();
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch ); SetJumpTarget( pConditionDontBranch );
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0)

View File

@ -411,6 +411,10 @@ void JitArm64::lXX(UGeckoInstruction inst)
// if it's still 0, we can wait until the next event // if it's still 0, we can wait until the next event
FixupBranch noIdle = CBNZ(gpr.R(d)); FixupBranch noIdle = CBNZ(gpr.R(d));
FixupBranch far = B();
SwitchToFarCode();
SetJumpTarget(far);
gpr.Flush(FLUSH_MAINTAIN_STATE); gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE);
@ -423,6 +427,8 @@ void JitArm64::lXX(UGeckoInstruction inst)
gpr.Unlock(WA); gpr.Unlock(WA);
WriteExceptionExit(); WriteExceptionExit();
SwitchToNearCode();
SetJumpTarget(noIdle); SetJumpTarget(noIdle);
//js.compilerPC += 8; //js.compilerPC += 8;

View File

@ -156,7 +156,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
m_float_emit.ABI_PushRegisters(fprs_in_use, X30); m_float_emit.ABI_PushRegisters(fprs_in_use, X30);
BLR(EncodeRegTo64(type_reg)); BLR(EncodeRegTo64(type_reg));
m_float_emit.ABI_PopRegisters(fprs_in_use, X30); m_float_emit.ABI_PopRegisters(fprs_in_use, X30);
ABI_PushRegisters(gprs_in_use); ABI_PopRegisters(gprs_in_use);
SetJumpTarget(continue1); SetJumpTarget(continue1);
} }

View File

@ -184,6 +184,10 @@ void JitArm64::twx(UGeckoInstruction inst)
SetJumpTarget(fixup); SetJumpTarget(fixup);
} }
FixupBranch far = B();
SwitchToFarCode();
SetJumpTarget(far);
gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
@ -196,6 +200,8 @@ void JitArm64::twx(UGeckoInstruction inst)
// WA is unlocked in this function // WA is unlocked in this function
WriteExceptionExit(WA); WriteExceptionExit(WA);
SwitchToNearCode();
SetJumpTarget(dont_trap); SetJumpTarget(dont_trap);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))