Merge pull request #3840 from degasus/arm

JitArm64: Faster block linking.
This commit is contained in:
Markus Wick 2016-05-18 16:49:01 +02:00
commit 60e7c78007
18 changed files with 64 additions and 99 deletions

View File

@ -198,6 +198,6 @@ void CachedInterpreter::WriteDestroyBlock(const u8* location, u32 address)
{
}
void CachedInterpreter::WriteLinkBlock(u8* location, const u8* address)
void CachedInterpreter::WriteLinkBlock(u8* location, const JitBlock& block)
{
}

View File

@ -35,7 +35,7 @@ public:
return "Cached Interpreter";
}
void WriteLinkBlock(u8* location, const u8* address) override;
void WriteLinkBlock(u8* location, const JitBlock& block) override;
void WriteDestroyBlock(const u8* location, u32 address) override;

View File

@ -48,7 +48,7 @@ void JitArm64::Init()
fpr.Init(this);
blocks.Init();
asm_routines.Init();
GenerateAsm();
code_block.m_stats = &js.st;
code_block.m_gpa = &js.gpa;
@ -67,13 +67,14 @@ void JitArm64::ClearCache()
ClearCodeSpace();
farcode.ClearCodeSpace();
UpdateMemoryOptions();
GenerateAsm();
}
void JitArm64::Shutdown()
{
FreeCodeSpace();
blocks.Shutdown();
asm_routines.Shutdown();
}
void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
@ -196,9 +197,8 @@ void JitArm64::WriteExit(u32 destination)
b->linkData.push_back(linkData);
// the code generated in JitArm64BlockCache::WriteDestroyBlock must fit in this block
MOVI2R(X30, (u64)asm_routines.dispatcher);
MOVI2R(DISPATCHER_PC, destination);
BR(X30);
B(dispatcher);
}
void JitArm64::WriteExit(ARM64Reg Reg)
@ -213,8 +213,7 @@ void JitArm64::WriteExit(ARM64Reg Reg)
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(X30, (u64)asm_routines.dispatcher);
BR(X30);
B(dispatcher);
}
void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
@ -240,8 +239,7 @@ void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(X30, (u64)asm_routines.dispatcher);
BR(X30);
B(dispatcher);
}
void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
@ -272,8 +270,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
if (Profiler::g_ProfileBlocks)
EndTimeProfile(js.curBlock);
MOVI2R(X30, (u64)asm_routines.dispatcher);
BR(X30);
B(dispatcher);
}
void JitArm64::DumpCode(const u8* start, const u8* end)
@ -351,13 +348,13 @@ void JitArm64::EndTimeProfile(JitBlock* b)
void JitArm64::Run()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
CompiledCode pExecAddr = (CompiledCode)enterCode;
pExecAddr();
}
void JitArm64::SingleStep()
{
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
CompiledCode pExecAddr = (CompiledCode)enterCode;
pExecAddr();
}
@ -412,15 +409,14 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
// Downcount flag check, Only valid for linked blocks
{
FixupBranch bail = B(CC_PL);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(DISPATCHER_PC, js.blockStart);
MOVI2R(XA, (u64)asm_routines.doTiming);
BR(XA);
gpr.Unlock(WA);
B(doTiming);
SetJumpTarget(bail);
}
// Normal entry doesn't need to check for downcount.
b->normalEntry = GetCodePtr();
// Conditionally add profiling code.
if (Profiler::g_ProfileBlocks)
{
@ -452,17 +448,13 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB
MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE);
MOVI2R(X1, (u64)&JitInterface::CompileExceptionCheck);
BLR(X1);
MOVI2R(X1, (u64)asm_routines.dispatcher);
BR(X1);
B(dispatcher);
SwitchToNearCode();
SetJumpTarget(no_fail);
js.assumeNoPairedQuantize = true;
}
}
const u8 *normalEntry = GetCodePtr();
b->normalEntry = normalEntry;
gpr.Start(js.gpa);
fpr.Start(js.fpa);

View File

@ -13,11 +13,11 @@
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitArm64Cache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock
class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonAsmRoutinesBase
{
public:
JitArm64() : code_buffer(32000), m_float_emit(this) {}
@ -34,9 +34,9 @@ public:
void ClearCache();
CommonAsmRoutinesBase *GetAsmRoutines()
CommonAsmRoutinesBase *GetAsmRoutines() override
{
return &asm_routines;
return this;
}
void Run();
@ -181,7 +181,6 @@ private:
Arm64FPRCache fpr;
JitArm64BlockCache blocks;
JitArm64AsmRoutineManager asm_routines;
PPCAnalyst::CodeBuffer code_buffer;
@ -227,6 +226,11 @@ private:
void DoDownCount();
void Cleanup();
// AsmRoutines
void GenerateAsm();
void GenerateCommonAsm();
void GenMfcr();
// Profiling
void BeginTimeProfile(JitBlock* b);
void EndTimeProfile(JitBlock* b);

View File

@ -7,20 +7,23 @@
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64Cache.h"
void JitArm64BlockCache::WriteLinkBlock(u8* location, const u8* address)
void JitArm64BlockCache::WriteLinkBlock(u8* location, const JitBlock& block)
{
ARM64XEmitter emit(location);
s64 offset = address - location;
// different size of the dispatcher call, so they are still continuous
if (offset > 0 && offset <= 28 && offset % 4 == 0)
// Are we able to jump directly to the normal entry?
s64 distance = ((s64)block.normalEntry - (s64)location) >> 2;
if (distance >= -0x40000 && distance <= 0x3FFFF)
{
for (int i = 0; i < offset / 4; i++)
emit.HINT(HINT_NOP);
emit.B(CC_LE, block.normalEntry);
// We can't write DISPATCHER_PC here, as blink linking is only for 8bytes.
// So we'll hit two jumps when calling Advance.
emit.B(block.checkedEntry);
}
else
{
emit.B(address);
emit.B(block.checkedEntry);
}
emit.FlushIcache();
}
@ -29,9 +32,8 @@ void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address)
{
// must fit within the code generated in JitArm64::WriteExit
ARM64XEmitter emit((u8 *)location);
emit.MOVI2R(X30, (u64)jit->GetAsmRoutines()->dispatcher);
emit.MOVI2R(DISPATCHER_PC, address);
emit.BR(X30);
emit.B(jit->GetAsmRoutines()->dispatcher);
emit.FlushIcache();
}

View File

@ -12,6 +12,6 @@ typedef void (*CompiledCode)();
class JitArm64BlockCache : public JitBaseBlockCache
{
private:
void WriteLinkBlock(u8* location, const u8* address);
void WriteLinkBlock(u8* location, const JitBlock& block);
void WriteDestroyBlock(const u8* location, u32 address);
};

View File

@ -12,7 +12,6 @@
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
@ -217,6 +216,8 @@ void JitArm64::bclrx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITBranchOff);
bool conditional = (inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0;
ARM64Reg WA = gpr.GetReg();
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
@ -238,9 +239,12 @@ void JitArm64::bclrx(UGeckoInstruction inst)
!(inst.BO_2 & BO_BRANCH_IF_TRUE));
}
FixupBranch far = B();
SwitchToFarCode();
SetJumpTarget(far);
if (conditional)
{
FixupBranch far = B();
SwitchToFarCode();
SetJumpTarget(far);
}
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
@ -253,12 +257,13 @@ void JitArm64::bclrx(UGeckoInstruction inst)
gpr.Unlock(WB);
}
gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
gpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
fpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL);
WriteExit(WA);
SwitchToNearCode();
if (conditional)
SwitchToNearCode();
if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0)
SetJumpTarget( pConditionDontBranch );

View File

@ -13,7 +13,6 @@
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;

View File

@ -13,7 +13,6 @@
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;

View File

@ -19,7 +19,6 @@
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/Jit_Util.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;

View File

@ -16,7 +16,6 @@
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;

View File

@ -14,7 +14,6 @@
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;
@ -80,7 +79,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
UBFM(type_reg, scale_reg, 16, 18); // Type
UBFM(scale_reg, scale_reg, 24, 29); // Scale
MOVI2R(X30, (u64)&asm_routines.pairedLoadQuantized[inst.W * 8]);
MOVI2R(X30, (u64)&pairedLoadQuantized[inst.W * 8]);
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(X30);
@ -191,7 +190,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(fail);
// Slow
MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[16 + inst.W * 8]);
MOVI2R(X30, (u64)&pairedStoreQuantized[16 + inst.W * 8]);
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
ABI_PushRegisters(gprs_in_use);
@ -204,7 +203,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
SetJumpTarget(pass);
// Fast
MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[inst.W * 8]);
MOVI2R(X30, (u64)&pairedStoreQuantized[inst.W * 8]);
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(EncodeRegTo64(type_reg));

View File

@ -13,7 +13,6 @@
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
using namespace Arm64Gen;

View File

@ -12,7 +12,6 @@
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/PPCTables.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
@ -590,8 +589,7 @@ void JitArm64::mfcr(UGeckoInstruction inst)
JITDISABLE(bJITSystemRegistersOff);
gpr.Lock(W0, W1, W2, W30);
MOVI2R(X0, (u64)asm_routines.mfcr);
BLR(X0);
BL(GetAsmRoutines()->mfcr);
gpr.Unlock(W1, W2, W30);
gpr.BindToRegister(inst.RD, false);

View File

@ -9,13 +9,12 @@
#include "Core/HW/Memmap.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/JitArm64/Jit.h"
#include "Core/PowerPC/JitArm64/JitAsm.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
using namespace Arm64Gen;
void JitArm64AsmRoutineManager::Generate()
void JitArm64::GenerateAsm()
{
// This value is all of the callee saved registers that we are required to save.
// According to the AACPS64 we need to save R19 ~ R30.
@ -66,7 +65,7 @@ void JitArm64AsmRoutineManager::Generate()
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVI2R(X30, (u64)&Jit);
MOVI2R(X30, (u64)&::Jit);
BLR(X30);
B(dispatcherNoCheck);
@ -105,12 +104,12 @@ void JitArm64AsmRoutineManager::Generate()
JitRegister::Register(enterCode, GetCodePtr(), "JIT_Dispatcher");
GenerateCommon();
GenerateCommonAsm();
FlushIcache();
}
void JitArm64AsmRoutineManager::GenerateCommon()
void JitArm64::GenerateCommonAsm()
{
// X0 is the scale
// X1 is address
@ -577,11 +576,11 @@ void JitArm64AsmRoutineManager::GenerateCommon()
pairedStoreQuantized[30] = storeSingleS8Slow;
pairedStoreQuantized[31] = storeSingleS16Slow;
mfcr = AlignCode16();
GetAsmRoutines()->mfcr = AlignCode16();
GenMfcr();
}
void JitArm64AsmRoutineManager::GenMfcr()
void JitArm64::GenMfcr()
{
// Input: Nothing
// Returns: W0

View File

@ -1,30 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/Arm64Emitter.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
class JitArm64AsmRoutineManager : public CommonAsmRoutinesBase, public Arm64Gen::ARM64CodeBlock
{
private:
void Generate();
void GenerateCommon();
void GenMfcr();
public:
void Init()
{
AllocCodeSpace(16384);
Generate();
WriteProtect();
}
void Shutdown()
{
FreeCodeSpace();
}
};

View File

@ -198,7 +198,7 @@ using namespace Gen;
int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress);
if (destinationBlock != -1)
{
WriteLinkBlock(e.exitPtrs, blocks[destinationBlock].checkedEntry);
WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]);
e.linkStatus = true;
}
}
@ -316,8 +316,9 @@ using namespace Gen;
}
}
void JitBlockCache::WriteLinkBlock(u8* location, const u8* address)
void JitBlockCache::WriteLinkBlock(u8* location, const JitBlock& block)
{
const u8* address = block.checkedEntry;
XEmitter emit(location);
if (*location == 0xE8)
{

View File

@ -116,7 +116,7 @@ class JitBaseBlockCache
void DestroyBlock(int block_num, bool invalidate);
// Virtual for overloaded
virtual void WriteLinkBlock(u8* location, const u8* address) = 0;
virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0;
virtual void WriteDestroyBlock(const u8* location, u32 address) = 0;
public:
@ -164,6 +164,6 @@ public:
class JitBlockCache : public JitBaseBlockCache
{
private:
void WriteLinkBlock(u8* location, const u8* address) override;
void WriteLinkBlock(u8* location, const JitBlock& block) override;
void WriteDestroyBlock(const u8* location, u32 address) override;
};