From b2be9bd7f73870d58c932f397aab5e8247094dd5 Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 13 May 2016 21:44:34 +0200 Subject: [PATCH 1/5] JitArm64: Inline JitAsm in JitArm64. So they share the same emitter, and so they are in the same 128MB range. This allows us to use B() to jump to the dispatcher. However, so we have to regenerate them on every cache clear. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 21 ++++++------- Source/Core/Core/PowerPC/JitArm64/Jit.h | 14 +++++---- .../Core/PowerPC/JitArm64/JitArm64_Branch.cpp | 1 - .../JitArm64/JitArm64_FloatingPoint.cpp | 1 - .../PowerPC/JitArm64/JitArm64_Integer.cpp | 1 - .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 1 - .../JitArm64/JitArm64_LoadStoreFloating.cpp | 1 - .../JitArm64/JitArm64_LoadStorePaired.cpp | 7 ++--- .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 1 - .../JitArm64/JitArm64_SystemRegisters.cpp | 3 +- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 13 ++++---- Source/Core/Core/PowerPC/JitArm64/JitAsm.h | 30 ------------------- 12 files changed, 30 insertions(+), 64 deletions(-) delete mode 100644 Source/Core/Core/PowerPC/JitArm64/JitAsm.h diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index ee7e44b39c..7fdce138cc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -48,7 +48,7 @@ void JitArm64::Init() fpr.Init(this); blocks.Init(); - asm_routines.Init(); + GenerateAsm(); code_block.m_stats = &js.st; code_block.m_gpa = &js.gpa; @@ -67,13 +67,14 @@ void JitArm64::ClearCache() ClearCodeSpace(); farcode.ClearCodeSpace(); UpdateMemoryOptions(); + + GenerateAsm(); } void JitArm64::Shutdown() { FreeCodeSpace(); blocks.Shutdown(); - asm_routines.Shutdown(); } void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) @@ -196,7 +197,7 @@ void JitArm64::WriteExit(u32 destination) b->linkData.push_back(linkData); // the code generated in JitArm64BlockCache::WriteDestroyBlock must fit in this block - MOVI2R(X30, (u64)asm_routines.dispatcher); + MOVI2R(X30, (u64)dispatcher); MOVI2R(DISPATCHER_PC, destination); BR(X30); } @@ -213,7 +214,7 @@ void JitArm64::WriteExit(ARM64Reg Reg) if (Profiler::g_ProfileBlocks) EndTimeProfile(js.curBlock); - MOVI2R(X30, (u64)asm_routines.dispatcher); + MOVI2R(X30, (u64)dispatcher); BR(X30); } @@ -240,7 +241,7 @@ void JitArm64::WriteExceptionExit(u32 destination, bool only_external) if (Profiler::g_ProfileBlocks) EndTimeProfile(js.curBlock); - MOVI2R(X30, (u64)asm_routines.dispatcher); + MOVI2R(X30, (u64)dispatcher); BR(X30); } @@ -272,7 +273,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external) if (Profiler::g_ProfileBlocks) EndTimeProfile(js.curBlock); - MOVI2R(X30, (u64)asm_routines.dispatcher); + MOVI2R(X30, (u64)dispatcher); BR(X30); } @@ -351,13 +352,13 @@ void JitArm64::EndTimeProfile(JitBlock* b) void JitArm64::Run() { - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + CompiledCode pExecAddr = (CompiledCode)enterCode; pExecAddr(); } void JitArm64::SingleStep() { - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + CompiledCode pExecAddr = (CompiledCode)enterCode; pExecAddr(); } @@ -415,7 +416,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); MOVI2R(DISPATCHER_PC, js.blockStart); - MOVI2R(XA, (u64)asm_routines.doTiming); + MOVI2R(XA, (u64)doTiming); BR(XA); gpr.Unlock(WA); SetJumpTarget(bail); @@ -452,7 +453,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE); MOVI2R(X1, (u64)&JitInterface::CompileExceptionCheck); BLR(X1); - MOVI2R(X1, (u64)asm_routines.dispatcher); + MOVI2R(X1, (u64)dispatcher); BR(X1); SwitchToNearCode(); SetJumpTarget(no_fail); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index f42f2c059c..a791b279e4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -13,11 +13,11 @@ #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArm64/JitArm64Cache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" +#include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBase.h" -class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock +class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonAsmRoutinesBase { public: JitArm64() : code_buffer(32000), m_float_emit(this) {} @@ -34,9 +34,9 @@ public: void ClearCache(); - CommonAsmRoutinesBase *GetAsmRoutines() + CommonAsmRoutinesBase *GetAsmRoutines() override { - return &asm_routines; + return this; } void Run(); @@ -181,7 +181,6 @@ private: Arm64FPRCache fpr; JitArm64BlockCache blocks; - JitArm64AsmRoutineManager asm_routines; PPCAnalyst::CodeBuffer code_buffer; @@ -227,6 +226,11 @@ private: void DoDownCount(); void Cleanup(); + // AsmRoutines + void GenerateAsm(); + void GenerateCommonAsm(); + void GenMfcr(); + // Profiling void BeginTimeProfile(JitBlock* b); void EndTimeProfile(JitBlock* b); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index bda767d056..4871fa1e22 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -12,7 +12,6 @@ #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" using namespace Arm64Gen; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 5335f474ce..3df45074e3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -13,7 +13,6 @@ #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" using namespace Arm64Gen; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 2e6e0e6f26..4522b6e780 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -13,7 +13,6 @@ #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" using namespace Arm64Gen; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index be6e931381..b5ba689d21 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -19,7 +19,6 @@ #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/Jit_Util.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" using namespace Arm64Gen; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index e1e7865ff0..f27367f297 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -16,7 +16,6 @@ #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" using namespace Arm64Gen; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index f281d29d27..60f1faac9a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -14,7 +14,6 @@ #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" using namespace Arm64Gen; @@ -80,7 +79,7 @@ void JitArm64::psq_l(UGeckoInstruction inst) UBFM(type_reg, scale_reg, 16, 18); // Type UBFM(scale_reg, scale_reg, 24, 29); // Scale - MOVI2R(X30, (u64)&asm_routines.pairedLoadQuantized[inst.W * 8]); + MOVI2R(X30, (u64)&pairedLoadQuantized[inst.W * 8]); LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true)); BLR(X30); @@ -191,7 +190,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) SwitchToFarCode(); SetJumpTarget(fail); // Slow - MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[16 + inst.W * 8]); + MOVI2R(X30, (u64)&pairedStoreQuantized[16 + inst.W * 8]); LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true)); ABI_PushRegisters(gprs_in_use); @@ -204,7 +203,7 @@ void JitArm64::psq_st(UGeckoInstruction inst) SetJumpTarget(pass); // Fast - MOVI2R(X30, (u64)&asm_routines.pairedStoreQuantized[inst.W * 8]); + MOVI2R(X30, (u64)&pairedStoreQuantized[inst.W * 8]); LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true)); BLR(EncodeRegTo64(type_reg)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index afc4bc4a91..7241147b90 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -13,7 +13,6 @@ #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" using namespace Arm64Gen; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index b0764012cf..729c8d2d52 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -12,7 +12,6 @@ #include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PPCTables.h" #include "Core/PowerPC/JitArm64/Jit.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) { @@ -590,7 +589,7 @@ void JitArm64::mfcr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.Lock(W0, W1, W2, W30); - MOVI2R(X0, (u64)asm_routines.mfcr); + MOVI2R(X0, (u64)GetAsmRoutines()->mfcr); BLR(X0); gpr.Unlock(W1, W2, W30); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index e2d2b2cbcc..02cf15f1a0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -9,13 +9,12 @@ #include "Core/HW/Memmap.h" #include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/JitArm64/Jit.h" -#include "Core/PowerPC/JitArm64/JitAsm.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitCache.h" using namespace Arm64Gen; -void JitArm64AsmRoutineManager::Generate() +void JitArm64::GenerateAsm() { // This value is all of the callee saved registers that we are required to save. // According to the AACPS64 we need to save R19 ~ R30. @@ -66,7 +65,7 @@ void JitArm64AsmRoutineManager::Generate() STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - MOVI2R(X30, (u64)&Jit); + MOVI2R(X30, (u64)&::Jit); BLR(X30); B(dispatcherNoCheck); @@ -105,12 +104,12 @@ void JitArm64AsmRoutineManager::Generate() JitRegister::Register(enterCode, GetCodePtr(), "JIT_Dispatcher"); - GenerateCommon(); + GenerateCommonAsm(); FlushIcache(); } -void JitArm64AsmRoutineManager::GenerateCommon() +void JitArm64::GenerateCommonAsm() { // X0 is the scale // X1 is address @@ -577,11 +576,11 @@ void JitArm64AsmRoutineManager::GenerateCommon() pairedStoreQuantized[30] = storeSingleS8Slow; pairedStoreQuantized[31] = storeSingleS16Slow; - mfcr = AlignCode16(); + GetAsmRoutines()->mfcr = AlignCode16(); GenMfcr(); } -void JitArm64AsmRoutineManager::GenMfcr() +void JitArm64::GenMfcr() { // Input: Nothing // Returns: W0 diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.h b/Source/Core/Core/PowerPC/JitArm64/JitAsm.h deleted file mode 100644 index 5b4697ba6c..0000000000 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/Arm64Emitter.h" -#include "Core/PowerPC/JitCommon/JitAsmCommon.h" - -class JitArm64AsmRoutineManager : public CommonAsmRoutinesBase, public Arm64Gen::ARM64CodeBlock -{ -private: - void Generate(); - void GenerateCommon(); - void GenMfcr(); - -public: - void Init() - { - AllocCodeSpace(16384); - Generate(); - WriteProtect(); - } - - void Shutdown() - { - FreeCodeSpace(); - } -}; - From cf3c65fbd0d14527424449d00aab07596770972e Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 13 May 2016 23:13:35 +0200 Subject: [PATCH 2/5] JitArm64: Use B() instead of BR() to jumo to ASM. Avoid indirect jumps as good as possible. This is a noticeable speedup. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 21 ++++++------------- .../Core/PowerPC/JitArm64/JitArm64Cache.cpp | 3 +-- .../JitArm64/JitArm64_SystemRegisters.cpp | 3 +-- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 7fdce138cc..9a628430a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -197,9 +197,8 @@ void JitArm64::WriteExit(u32 destination) b->linkData.push_back(linkData); // the code generated in JitArm64BlockCache::WriteDestroyBlock must fit in this block - MOVI2R(X30, (u64)dispatcher); MOVI2R(DISPATCHER_PC, destination); - BR(X30); + B(dispatcher); } void JitArm64::WriteExit(ARM64Reg Reg) @@ -214,8 +213,7 @@ void JitArm64::WriteExit(ARM64Reg Reg) if (Profiler::g_ProfileBlocks) EndTimeProfile(js.curBlock); - MOVI2R(X30, (u64)dispatcher); - BR(X30); + B(dispatcher); } void JitArm64::WriteExceptionExit(u32 destination, bool only_external) @@ -241,8 +239,7 @@ void JitArm64::WriteExceptionExit(u32 destination, bool only_external) if (Profiler::g_ProfileBlocks) EndTimeProfile(js.curBlock); - MOVI2R(X30, (u64)dispatcher); - BR(X30); + B(dispatcher); } void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external) @@ -273,8 +270,7 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external) if (Profiler::g_ProfileBlocks) EndTimeProfile(js.curBlock); - MOVI2R(X30, (u64)dispatcher); - BR(X30); + B(dispatcher); } void JitArm64::DumpCode(const u8* start, const u8* end) @@ -413,12 +409,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB // Downcount flag check, Only valid for linked blocks { FixupBranch bail = B(CC_PL); - ARM64Reg WA = gpr.GetReg(); - ARM64Reg XA = EncodeRegTo64(WA); MOVI2R(DISPATCHER_PC, js.blockStart); - MOVI2R(XA, (u64)doTiming); - BR(XA); - gpr.Unlock(WA); + B(doTiming); SetJumpTarget(bail); } @@ -453,8 +445,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE); MOVI2R(X1, (u64)&JitInterface::CompileExceptionCheck); BLR(X1); - MOVI2R(X1, (u64)dispatcher); - BR(X1); + B(dispatcher); SwitchToNearCode(); SetJumpTarget(no_fail); js.assumeNoPairedQuantize = true; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp index cc416fc97e..ef9c6bcf46 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -29,9 +29,8 @@ void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address) { // must fit within the code generated in JitArm64::WriteExit ARM64XEmitter emit((u8 *)location); - emit.MOVI2R(X30, (u64)jit->GetAsmRoutines()->dispatcher); emit.MOVI2R(DISPATCHER_PC, address); - emit.BR(X30); + emit.B(jit->GetAsmRoutines()->dispatcher); emit.FlushIcache(); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 729c8d2d52..3bcb89a10a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -589,8 +589,7 @@ void JitArm64::mfcr(UGeckoInstruction inst) JITDISABLE(bJITSystemRegistersOff); gpr.Lock(W0, W1, W2, W30); - MOVI2R(X0, (u64)GetAsmRoutines()->mfcr); - BLR(X0); + BL(GetAsmRoutines()->mfcr); gpr.Unlock(W1, W2, W30); gpr.BindToRegister(inst.RD, false); From 10e716c62369a444d590229f716b57d04968b883 Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 13 May 2016 23:28:35 +0200 Subject: [PATCH 3/5] JitCommon: Pass jit block instead of host pointer on linking. So the JIT may do more fancy stuff. --- Source/Core/Core/PowerPC/CachedInterpreter.cpp | 2 +- Source/Core/Core/PowerPC/CachedInterpreter.h | 2 +- Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp | 3 ++- Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h | 2 +- Source/Core/Core/PowerPC/JitCommon/JitCache.cpp | 5 +++-- Source/Core/Core/PowerPC/JitCommon/JitCache.h | 4 ++-- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/PowerPC/CachedInterpreter.cpp b/Source/Core/Core/PowerPC/CachedInterpreter.cpp index 3b52caaa62..7831239d4e 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter.cpp +++ b/Source/Core/Core/PowerPC/CachedInterpreter.cpp @@ -198,6 +198,6 @@ void CachedInterpreter::WriteDestroyBlock(const u8* location, u32 address) { } -void CachedInterpreter::WriteLinkBlock(u8* location, const u8* address) +void CachedInterpreter::WriteLinkBlock(u8* location, const JitBlock& block) { } diff --git a/Source/Core/Core/PowerPC/CachedInterpreter.h b/Source/Core/Core/PowerPC/CachedInterpreter.h index 4a6b76b4ed..cf7aa90615 100644 --- a/Source/Core/Core/PowerPC/CachedInterpreter.h +++ b/Source/Core/Core/PowerPC/CachedInterpreter.h @@ -35,7 +35,7 @@ public: return "Cached Interpreter"; } - void WriteLinkBlock(u8* location, const u8* address) override; + void WriteLinkBlock(u8* location, const JitBlock& block) override; void WriteDestroyBlock(const u8* location, u32 address) override; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp index ef9c6bcf46..fe9280bf58 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -7,8 +7,9 @@ #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64Cache.h" -void JitArm64BlockCache::WriteLinkBlock(u8* location, const u8* address) +void JitArm64BlockCache::WriteLinkBlock(u8* location, const JitBlock& block) { + const u8* address = block.checkedEntry; ARM64XEmitter emit(location); s64 offset = address - location; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h index 7b7b2c755b..d9029822df 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h @@ -12,6 +12,6 @@ typedef void (*CompiledCode)(); class JitArm64BlockCache : public JitBaseBlockCache { private: - void WriteLinkBlock(u8* location, const u8* address); + void WriteLinkBlock(u8* location, const JitBlock& block); void WriteDestroyBlock(const u8* location, u32 address); }; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp index 61034fbf82..91c2d1a1ac 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.cpp @@ -198,7 +198,7 @@ using namespace Gen; int destinationBlock = GetBlockNumberFromStartAddress(e.exitAddress); if (destinationBlock != -1) { - WriteLinkBlock(e.exitPtrs, blocks[destinationBlock].checkedEntry); + WriteLinkBlock(e.exitPtrs, blocks[destinationBlock]); e.linkStatus = true; } } @@ -316,8 +316,9 @@ using namespace Gen; } } - void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) + void JitBlockCache::WriteLinkBlock(u8* location, const JitBlock& block) { + const u8* address = block.checkedEntry; XEmitter emit(location); if (*location == 0xE8) { diff --git a/Source/Core/Core/PowerPC/JitCommon/JitCache.h b/Source/Core/Core/PowerPC/JitCommon/JitCache.h index 5c716f5800..1216a8753a 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitCache.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitCache.h @@ -116,7 +116,7 @@ class JitBaseBlockCache void DestroyBlock(int block_num, bool invalidate); // Virtual for overloaded - virtual void WriteLinkBlock(u8* location, const u8* address) = 0; + virtual void WriteLinkBlock(u8* location, const JitBlock& block) = 0; virtual void WriteDestroyBlock(const u8* location, u32 address) = 0; public: @@ -164,6 +164,6 @@ public: class JitBlockCache : public JitBaseBlockCache { private: - void WriteLinkBlock(u8* location, const u8* address) override; + void WriteLinkBlock(u8* location, const JitBlock& block) override; void WriteDestroyBlock(const u8* location, u32 address) override; }; From b6f16352e4266623a6763aa7c3b9a9dbd8b2fc1f Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 13 May 2016 23:42:34 +0200 Subject: [PATCH 4/5] JitArm64: Check downcount on block linking. This skips one B() call. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 6 +++--- .../Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp | 16 +++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 9a628430a7..31031ef70c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -414,6 +414,9 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB SetJumpTarget(bail); } + // Normal entry doesn't need to check for downcount. + b->normalEntry = GetCodePtr(); + // Conditionally add profiling code. if (Profiler::g_ProfileBlocks) { @@ -452,9 +455,6 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB } } - const u8 *normalEntry = GetCodePtr(); - b->normalEntry = normalEntry; - gpr.Start(js.gpa); fpr.Start(js.fpa); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp index fe9280bf58..85664e7bb3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -9,19 +9,21 @@ void JitArm64BlockCache::WriteLinkBlock(u8* location, const JitBlock& block) { - const u8* address = block.checkedEntry; ARM64XEmitter emit(location); - s64 offset = address - location; - // different size of the dispatcher call, so they are still continuous - if (offset > 0 && offset <= 28 && offset % 4 == 0) + // Are we able to jump directly to the normal entry? + s64 distance = ((s64)block.normalEntry - (s64)location) >> 2; + if (distance >= -0x40000 && distance <= 0x3FFFF) { - for (int i = 0; i < offset / 4; i++) - emit.HINT(HINT_NOP); + emit.B(CC_LE, block.normalEntry); + + // We can't write DISPATCHER_PC here, as blink linking is only for 8bytes. + // So we'll hit two jumps when calling Advance. + emit.B(block.checkedEntry); } else { - emit.B(address); + emit.B(block.checkedEntry); } emit.FlushIcache(); } From 637851fce0c588063e584dde1b46a59a804648ed Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 5 Mar 2016 15:11:15 +0100 Subject: [PATCH 5/5] JitArm64: optimize bclrx the unconditional path is the most common, so no need to switch to far code here. --- .../Core/PowerPC/JitArm64/JitArm64_Branch.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index 4871fa1e22..773736a935 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -216,6 +216,8 @@ void JitArm64::bclrx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITBranchOff); + bool conditional = (inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0; + ARM64Reg WA = gpr.GetReg(); FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR @@ -237,9 +239,12 @@ void JitArm64::bclrx(UGeckoInstruction inst) !(inst.BO_2 & BO_BRANCH_IF_TRUE)); } - FixupBranch far = B(); - SwitchToFarCode(); - SetJumpTarget(far); + if (conditional) + { + FixupBranch far = B(); + SwitchToFarCode(); + SetJumpTarget(far); + } LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR])); AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. @@ -252,12 +257,13 @@ void JitArm64::bclrx(UGeckoInstruction inst) gpr.Unlock(WB); } - gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); - fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE); + gpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL); + fpr.Flush(conditional ? FlushMode::FLUSH_MAINTAIN_STATE : FlushMode::FLUSH_ALL); WriteExit(WA); - SwitchToNearCode(); + if (conditional) + SwitchToNearCode(); if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) SetJumpTarget( pConditionDontBranch );