Merge pull request #5923 from degasus/profiler
JitCommon: Update the block profiler
This commit is contained in:
commit
b969040534
|
@ -1218,6 +1218,14 @@ void ARM64XEmitter::MRS(ARM64Reg Rt, PStateField field)
|
||||||
EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));
|
EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ARM64XEmitter::CNTVCT(Arm64Gen::ARM64Reg Rt)
|
||||||
|
{
|
||||||
|
_assert_msg_(DYNA_REC, Is64Bit(Rt), "CNTVCT: Rt must be 64-bit");
|
||||||
|
|
||||||
|
// MRS <Xt>, CNTVCT_EL0 ; Read CNTVCT_EL0 into Xt
|
||||||
|
EncodeSystemInst(3 | 4, 3, 0xe, 0, 2, DecodeReg(Rt));
|
||||||
|
}
|
||||||
|
|
||||||
void ARM64XEmitter::HINT(SystemHint op)
|
void ARM64XEmitter::HINT(SystemHint op)
|
||||||
{
|
{
|
||||||
EncodeSystemInst(0, 3, 2, 0, op, WSP);
|
EncodeSystemInst(0, 3, 2, 0, op, WSP);
|
||||||
|
|
|
@ -603,9 +603,9 @@ public:
|
||||||
|
|
||||||
// System
|
// System
|
||||||
void _MSR(PStateField field, u8 imm);
|
void _MSR(PStateField field, u8 imm);
|
||||||
|
|
||||||
void _MSR(PStateField field, ARM64Reg Rt);
|
void _MSR(PStateField field, ARM64Reg Rt);
|
||||||
void MRS(ARM64Reg Rt, PStateField field);
|
void MRS(ARM64Reg Rt, PStateField field);
|
||||||
|
void CNTVCT(ARM64Reg Rt);
|
||||||
|
|
||||||
void HINT(SystemHint op);
|
void HINT(SystemHint op);
|
||||||
void CLREX();
|
void CLREX();
|
||||||
|
|
|
@ -200,7 +200,6 @@ void CachedInterpreter::Jit(u32 address)
|
||||||
|
|
||||||
b->checkedEntry = GetCodePtr();
|
b->checkedEntry = GetCodePtr();
|
||||||
b->normalEntry = GetCodePtr();
|
b->normalEntry = GetCodePtr();
|
||||||
b->runCount = 0;
|
|
||||||
|
|
||||||
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
for (u32 i = 0; i < code_block.m_num_instructions; i++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "Common/File.h"
|
#include "Common/File.h"
|
||||||
#include "Common/Logging/Log.h"
|
#include "Common/Logging/Log.h"
|
||||||
#include "Common/MemoryUtil.h"
|
#include "Common/MemoryUtil.h"
|
||||||
|
#include "Common/PerformanceCounter.h"
|
||||||
#include "Common/StringUtil.h"
|
#include "Common/StringUtil.h"
|
||||||
#include "Common/x64ABI.h"
|
#include "Common/x64ABI.h"
|
||||||
#include "Core/Core.h"
|
#include "Core/Core.h"
|
||||||
|
@ -370,6 +371,23 @@ bool Jit64::Cleanup()
|
||||||
did_something = true;
|
did_something = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Profiler::g_ProfileBlocks)
|
||||||
|
{
|
||||||
|
ABI_PushRegistersAndAdjustStack({}, 0);
|
||||||
|
// get end tic
|
||||||
|
MOV(64, R(ABI_PARAM1), ImmPtr(&js.curBlock->profile_data.ticStop));
|
||||||
|
ABI_CallFunction(QueryPerformanceCounter);
|
||||||
|
// tic counter += (end tic - start tic)
|
||||||
|
MOV(64, R(RSCRATCH2), ImmPtr(&js.curBlock->profile_data));
|
||||||
|
MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStop)));
|
||||||
|
SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStart)));
|
||||||
|
ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)));
|
||||||
|
ADD(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, downcountCounter)),
|
||||||
|
Imm32(js.downcountAmount));
|
||||||
|
MOV(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)), R(RSCRATCH));
|
||||||
|
ABI_PopRegistersAndAdjustStack({}, 0);
|
||||||
|
}
|
||||||
|
|
||||||
return did_something;
|
return did_something;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -627,7 +645,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
const u8* start =
|
const u8* start =
|
||||||
AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
|
AlignCode4(); // TODO: Test if this or AlignCode16 make a difference from GetCodePtr
|
||||||
b->checkedEntry = start;
|
b->checkedEntry = start;
|
||||||
b->runCount = 0;
|
|
||||||
|
|
||||||
// Downcount flag check. The last block decremented downcounter, and the flag should still be
|
// Downcount flag check. The last block decremented downcounter, and the flag should still be
|
||||||
// available.
|
// available.
|
||||||
|
@ -650,13 +667,12 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
// Conditionally add profiling code.
|
// Conditionally add profiling code.
|
||||||
if (Profiler::g_ProfileBlocks)
|
if (Profiler::g_ProfileBlocks)
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH), ImmPtr(&b->runCount));
|
|
||||||
ADD(32, MatR(RSCRATCH), Imm8(1));
|
|
||||||
b->ticCounter = 0;
|
|
||||||
b->ticStart = 0;
|
|
||||||
b->ticStop = 0;
|
|
||||||
// get start tic
|
// get start tic
|
||||||
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStart);
|
MOV(64, R(ABI_PARAM1), ImmPtr(&b->profile_data.ticStart));
|
||||||
|
int offset = static_cast<int>(offsetof(JitBlock::ProfileData, runCount)) -
|
||||||
|
static_cast<int>(offsetof(JitBlock::ProfileData, ticStart));
|
||||||
|
ADD(64, MDisp(ABI_PARAM1, offset), Imm8(1));
|
||||||
|
ABI_CallFunction(QueryPerformanceCounter);
|
||||||
}
|
}
|
||||||
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
|
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
|
||||||
// should help logged stack-traces become more accurate
|
// should help logged stack-traces become more accurate
|
||||||
|
@ -731,16 +747,6 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBloc
|
||||||
|
|
||||||
if (i == (code_block.m_num_instructions - 1))
|
if (i == (code_block.m_num_instructions - 1))
|
||||||
{
|
{
|
||||||
if (Profiler::g_ProfileBlocks)
|
|
||||||
{
|
|
||||||
// WARNING - cmp->branch merging will screw this up.
|
|
||||||
PROFILER_VPUSH;
|
|
||||||
// get end tic
|
|
||||||
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
|
|
||||||
// tic counter += (end tic - start tic)
|
|
||||||
PROFILER_UPDATE_TIME(b);
|
|
||||||
PROFILER_VPOP;
|
|
||||||
}
|
|
||||||
js.isLastInstruction = true;
|
js.isLastInstruction = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,15 +36,6 @@ constexpr size_t SAFE_STACK_SIZE = 512 * 1024;
|
||||||
constexpr size_t GUARD_SIZE = 0x10000; // two guards - bottom (permanent) and middle (see above)
|
constexpr size_t GUARD_SIZE = 0x10000; // two guards - bottom (permanent) and middle (see above)
|
||||||
constexpr size_t GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE;
|
constexpr size_t GUARD_OFFSET = STACK_SIZE - SAFE_STACK_SIZE - GUARD_SIZE;
|
||||||
|
|
||||||
static bool HasCycleCounters()
|
|
||||||
{
|
|
||||||
// Bit needs to be set to support cycle counters
|
|
||||||
const u32 PMUSERENR_CR = 0x4;
|
|
||||||
u32 reg;
|
|
||||||
asm("mrs %[val], PMUSERENR_EL0" : [val] "=r"(reg));
|
|
||||||
return !!(reg & PMUSERENR_CR);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::Init()
|
void JitArm64::Init()
|
||||||
{
|
{
|
||||||
InitializeInstructionTables();
|
InitializeInstructionTables();
|
||||||
|
@ -72,8 +63,6 @@ void JitArm64::Init()
|
||||||
|
|
||||||
AllocStack();
|
AllocStack();
|
||||||
GenerateAsm();
|
GenerateAsm();
|
||||||
|
|
||||||
m_supports_cycle_counter = HasCycleCounters();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx)
|
||||||
|
@ -172,6 +161,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
|
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
WriteExceptionExit(WA);
|
WriteExceptionExit(WA);
|
||||||
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -185,6 +175,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
||||||
FixupBranch c = B(CC_EQ);
|
FixupBranch c = B(CC_EQ);
|
||||||
WriteExceptionExit(WA);
|
WriteExceptionExit(WA);
|
||||||
SetJumpTarget(c);
|
SetJumpTarget(c);
|
||||||
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,6 +213,7 @@ void JitArm64::HLEFunction(UGeckoInstruction inst)
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
|
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
WriteExit(WA);
|
WriteExit(WA);
|
||||||
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::DoNothing(UGeckoInstruction inst)
|
void JitArm64::DoNothing(UGeckoInstruction inst)
|
||||||
|
@ -239,21 +231,16 @@ void JitArm64::Cleanup()
|
||||||
{
|
{
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
if (jo.optimizeGatherPipe && js.fifoBytesSinceCheck > 0)
|
||||||
{
|
{
|
||||||
gpr.Lock(W0);
|
|
||||||
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
|
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
|
||||||
BLR(X0);
|
BLR(X0);
|
||||||
gpr.Unlock(W0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::DoDownCount()
|
void JitArm64::DoDownCount()
|
||||||
{
|
{
|
||||||
ARM64Reg WA = gpr.GetReg();
|
LDR(INDEX_UNSIGNED, W0, PPC_REG, PPCSTATE_OFF(downcount));
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
|
SUBSI2R(W0, W0, js.downcountAmount, W1);
|
||||||
ARM64Reg WB = gpr.GetReg();
|
STR(INDEX_UNSIGNED, W0, PPC_REG, PPCSTATE_OFF(downcount));
|
||||||
SUBSI2R(WA, WA, js.downcountAmount, WB);
|
|
||||||
STR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(downcount));
|
|
||||||
gpr.Unlock(WA, WB);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::ResetStack()
|
void JitArm64::ResetStack()
|
||||||
|
@ -303,9 +290,7 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
||||||
{
|
{
|
||||||
Cleanup();
|
Cleanup();
|
||||||
DoDownCount();
|
DoDownCount();
|
||||||
|
EndTimeProfile(js.curBlock);
|
||||||
if (Profiler::g_ProfileBlocks)
|
|
||||||
EndTimeProfile(js.curBlock);
|
|
||||||
|
|
||||||
LK &= m_enable_blr_optimization;
|
LK &= m_enable_blr_optimization;
|
||||||
|
|
||||||
|
@ -342,17 +327,14 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
|
||||||
|
|
||||||
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return)
|
void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_after_return)
|
||||||
{
|
{
|
||||||
Cleanup();
|
|
||||||
DoDownCount();
|
|
||||||
|
|
||||||
LK &= m_enable_blr_optimization;
|
|
||||||
|
|
||||||
if (dest != DISPATCHER_PC)
|
if (dest != DISPATCHER_PC)
|
||||||
MOV(DISPATCHER_PC, dest);
|
MOV(DISPATCHER_PC, dest);
|
||||||
gpr.Unlock(dest);
|
|
||||||
|
|
||||||
if (Profiler::g_ProfileBlocks)
|
Cleanup();
|
||||||
EndTimeProfile(js.curBlock);
|
DoDownCount();
|
||||||
|
EndTimeProfile(js.curBlock);
|
||||||
|
|
||||||
|
LK &= m_enable_blr_optimization;
|
||||||
|
|
||||||
if (!LK)
|
if (!LK)
|
||||||
{
|
{
|
||||||
|
@ -418,35 +400,28 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dest != DISPATCHER_PC)
|
||||||
|
MOV(DISPATCHER_PC, dest);
|
||||||
|
|
||||||
Cleanup();
|
Cleanup();
|
||||||
|
EndTimeProfile(js.curBlock);
|
||||||
if (Profiler::g_ProfileBlocks)
|
|
||||||
EndTimeProfile(js.curBlock);
|
|
||||||
|
|
||||||
ARM64Reg code = gpr.GetReg();
|
|
||||||
ARM64Reg pc = gpr.GetReg();
|
|
||||||
|
|
||||||
// Check if {ARM_PC, PPC_PC} matches the current state.
|
// Check if {ARM_PC, PPC_PC} matches the current state.
|
||||||
LDP(INDEX_POST, EncodeRegTo64(code), EncodeRegTo64(pc), SP, 16);
|
LDP(INDEX_POST, X2, X1, SP, 16);
|
||||||
CMP(pc, dest);
|
CMP(W1, DISPATCHER_PC);
|
||||||
FixupBranch no_match = B(CC_NEQ);
|
FixupBranch no_match = B(CC_NEQ);
|
||||||
|
|
||||||
DoDownCount();
|
DoDownCount(); // overwrites X0 + X1
|
||||||
|
|
||||||
RET(EncodeRegTo64(code));
|
RET(X2);
|
||||||
|
|
||||||
SetJumpTarget(no_match);
|
SetJumpTarget(no_match);
|
||||||
|
|
||||||
DoDownCount();
|
DoDownCount();
|
||||||
|
|
||||||
if (dest != DISPATCHER_PC)
|
|
||||||
MOV(DISPATCHER_PC, dest);
|
|
||||||
|
|
||||||
ResetStack();
|
ResetStack();
|
||||||
|
|
||||||
B(dispatcher);
|
B(dispatcher);
|
||||||
|
|
||||||
gpr.Unlock(dest, pc, code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
|
void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
|
||||||
|
@ -469,39 +444,34 @@ void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
|
||||||
|
|
||||||
SetJumpTarget(no_exceptions);
|
SetJumpTarget(no_exceptions);
|
||||||
|
|
||||||
if (Profiler::g_ProfileBlocks)
|
EndTimeProfile(js.curBlock);
|
||||||
EndTimeProfile(js.curBlock);
|
|
||||||
|
|
||||||
B(dispatcher);
|
B(dispatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
|
void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
|
||||||
{
|
{
|
||||||
|
if (dest != DISPATCHER_PC)
|
||||||
|
MOV(DISPATCHER_PC, dest);
|
||||||
|
|
||||||
Cleanup();
|
Cleanup();
|
||||||
DoDownCount();
|
DoDownCount();
|
||||||
|
|
||||||
ARM64Reg WA = gpr.GetReg();
|
LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(Exceptions));
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(Exceptions));
|
FixupBranch no_exceptions = CBZ(W30);
|
||||||
FixupBranch no_exceptions = CBZ(WA);
|
|
||||||
gpr.Unlock(WA);
|
|
||||||
|
|
||||||
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(pc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
if (only_external)
|
if (only_external)
|
||||||
MOVP2R(EncodeRegTo64(dest), &PowerPC::CheckExternalExceptions);
|
MOVP2R(EncodeRegTo64(DISPATCHER_PC), &PowerPC::CheckExternalExceptions);
|
||||||
else
|
else
|
||||||
MOVP2R(EncodeRegTo64(dest), &PowerPC::CheckExceptions);
|
MOVP2R(EncodeRegTo64(DISPATCHER_PC), &PowerPC::CheckExceptions);
|
||||||
BLR(EncodeRegTo64(dest));
|
BLR(EncodeRegTo64(DISPATCHER_PC));
|
||||||
LDR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
|
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
|
|
||||||
SetJumpTarget(no_exceptions);
|
SetJumpTarget(no_exceptions);
|
||||||
|
|
||||||
if (dest != DISPATCHER_PC)
|
EndTimeProfile(js.curBlock);
|
||||||
MOV(DISPATCHER_PC, dest);
|
|
||||||
gpr.Unlock(dest);
|
|
||||||
|
|
||||||
if (Profiler::g_ProfileBlocks)
|
|
||||||
EndTimeProfile(js.curBlock);
|
|
||||||
|
|
||||||
B(dispatcher);
|
B(dispatcher);
|
||||||
}
|
}
|
||||||
|
@ -514,66 +484,39 @@ void JitArm64::DumpCode(const u8* start, const u8* end)
|
||||||
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
|
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EmitResetCycleCounters()
|
|
||||||
{
|
|
||||||
const u32 PMCR_EL0_E = 1;
|
|
||||||
const u32 PMCR_EL0_P = 2;
|
|
||||||
const u32 PMCR_EL0_C = 4;
|
|
||||||
const u32 PMCR_EL0_LC = 0x40;
|
|
||||||
_MSR(FIELD_PMCR_EL0, X0);
|
|
||||||
MOVI2R(X1, PMCR_EL0_E | PMCR_EL0_P | PMCR_EL0_C | PMCR_EL0_LC);
|
|
||||||
ORR(X0, X0, X1);
|
|
||||||
MRS(X0, FIELD_PMCR_EL0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
|
|
||||||
{
|
|
||||||
_MSR(FIELD_PMCCNTR_EL0, reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
void JitArm64::BeginTimeProfile(JitBlock* b)
|
void JitArm64::BeginTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
b->ticCounter = 0;
|
MOVP2R(X0, &b->profile_data);
|
||||||
b->ticStart = 0;
|
LDR(INDEX_UNSIGNED, X1, X0, offsetof(JitBlock::ProfileData, runCount));
|
||||||
b->ticStop = 0;
|
ADD(X1, X1, 1);
|
||||||
|
|
||||||
if (m_supports_cycle_counter)
|
// Fetch the current counter register
|
||||||
{
|
CNTVCT(X2);
|
||||||
EmitResetCycleCounters();
|
|
||||||
EmitGetCycles(X1);
|
// stores runCount and ticStart
|
||||||
MOVP2R(X0, &b->ticStart);
|
STP(INDEX_SIGNED, X1, X2, X0, offsetof(JitBlock::ProfileData, runCount));
|
||||||
STR(INDEX_UNSIGNED, X1, X0, 0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOVP2R(X1, &QueryPerformanceCounter);
|
|
||||||
MOVP2R(X0, &b->ticStart);
|
|
||||||
BLR(X1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::EndTimeProfile(JitBlock* b)
|
void JitArm64::EndTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
if (m_supports_cycle_counter)
|
if (!Profiler::g_ProfileBlocks)
|
||||||
{
|
return;
|
||||||
EmitGetCycles(X2);
|
|
||||||
MOVP2R(X0, &b->ticStart);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOVP2R(X1, &QueryPerformanceCounter);
|
|
||||||
MOVP2R(X0, &b->ticStop);
|
|
||||||
BLR(X1);
|
|
||||||
|
|
||||||
MOVP2R(X0, &b->ticStart);
|
// Fetch the current counter register
|
||||||
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
|
CNTVCT(X1);
|
||||||
}
|
|
||||||
|
|
||||||
LDR(INDEX_UNSIGNED, X1, X0, 0); // Start
|
MOVP2R(X0, &b->profile_data);
|
||||||
LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter
|
|
||||||
SUB(X2, X2, X1);
|
LDR(INDEX_UNSIGNED, X2, X0, offsetof(JitBlock::ProfileData, ticStart));
|
||||||
ADD(X3, X3, X2);
|
SUB(X1, X1, X2);
|
||||||
STR(INDEX_UNSIGNED, X3, X0, 16);
|
|
||||||
|
// loads ticCounter and downcountCounter
|
||||||
|
LDP(INDEX_SIGNED, X2, X3, X0, offsetof(JitBlock::ProfileData, ticCounter));
|
||||||
|
ADD(X2, X2, X1);
|
||||||
|
ADDI2R(X3, X3, js.downcountAmount, X1);
|
||||||
|
|
||||||
|
// stores ticCounter and downcountCounter
|
||||||
|
STP(INDEX_SIGNED, X2, X3, X0, offsetof(JitBlock::ProfileData, ticCounter));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::Run()
|
void JitArm64::Run()
|
||||||
|
@ -657,7 +600,6 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
|
||||||
|
|
||||||
const u8* start = GetCodePtr();
|
const u8* start = GetCodePtr();
|
||||||
b->checkedEntry = start;
|
b->checkedEntry = start;
|
||||||
b->runCount = 0;
|
|
||||||
|
|
||||||
// Downcount flag check, Only valid for linked blocks
|
// Downcount flag check, Only valid for linked blocks
|
||||||
{
|
{
|
||||||
|
@ -673,15 +615,6 @@ void JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitBlock*
|
||||||
// Conditionally add profiling code.
|
// Conditionally add profiling code.
|
||||||
if (Profiler::g_ProfileBlocks)
|
if (Profiler::g_ProfileBlocks)
|
||||||
{
|
{
|
||||||
ARM64Reg WA = gpr.GetReg();
|
|
||||||
ARM64Reg WB = gpr.GetReg();
|
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
|
||||||
ARM64Reg XB = EncodeRegTo64(WB);
|
|
||||||
MOVP2R(XA, &b->runCount);
|
|
||||||
LDR(INDEX_UNSIGNED, XB, XA, 0);
|
|
||||||
ADD(XB, XB, 1);
|
|
||||||
STR(INDEX_UNSIGNED, XB, XA, 0);
|
|
||||||
gpr.Unlock(WA, WB);
|
|
||||||
// get start tic
|
// get start tic
|
||||||
BeginTimeProfile(b);
|
BeginTimeProfile(b);
|
||||||
}
|
}
|
||||||
|
|
|
@ -174,9 +174,6 @@ private:
|
||||||
static void InitializeInstructionTables();
|
static void InitializeInstructionTables();
|
||||||
void CompileInstruction(PPCAnalyst::CodeOp& op);
|
void CompileInstruction(PPCAnalyst::CodeOp& op);
|
||||||
|
|
||||||
void EmitResetCycleCounters();
|
|
||||||
void EmitGetCycles(Arm64Gen::ARM64Reg reg);
|
|
||||||
|
|
||||||
// Simple functions to switch between near and far code emitting
|
// Simple functions to switch between near and far code emitting
|
||||||
void SwitchToFarCode()
|
void SwitchToFarCode()
|
||||||
{
|
{
|
||||||
|
@ -253,9 +250,6 @@ private:
|
||||||
Arm64Gen::ARM64CodeBlock farcode;
|
Arm64Gen::ARM64CodeBlock farcode;
|
||||||
u8* nearcode; // Backed up when we switch to far code.
|
u8* nearcode; // Backed up when we switch to far code.
|
||||||
|
|
||||||
// Do we support cycle counter profiling?
|
|
||||||
bool m_supports_cycle_counter;
|
|
||||||
|
|
||||||
bool m_enable_blr_optimization;
|
bool m_enable_blr_optimization;
|
||||||
bool m_cleanup_after_stackfault = false;
|
bool m_cleanup_after_stackfault = false;
|
||||||
u8* m_stack_base = nullptr;
|
u8* m_stack_base = nullptr;
|
||||||
|
|
|
@ -67,8 +67,8 @@ void JitArm64::rfi(UGeckoInstruction inst)
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_SRR0]));
|
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_SRR0]));
|
||||||
gpr.Unlock(WB, WC);
|
gpr.Unlock(WB, WC);
|
||||||
|
|
||||||
// WA is unlocked in this function
|
|
||||||
WriteExceptionExit(WA);
|
WriteExceptionExit(WA);
|
||||||
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::bx(UGeckoInstruction inst)
|
void JitArm64::bx(UGeckoInstruction inst)
|
||||||
|
@ -220,6 +220,8 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
|
||||||
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
|
AND(WA, WA, 30, 29); // Wipe the bottom 2 bits.
|
||||||
|
|
||||||
WriteExit(WA, inst.LK_3, js.compilerPC + 4);
|
WriteExit(WA, inst.LK_3, js.compilerPC + 4);
|
||||||
|
|
||||||
|
gpr.Unlock(WA);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitArm64::bclrx(UGeckoInstruction inst)
|
void JitArm64::bclrx(UGeckoInstruction inst)
|
||||||
|
@ -275,6 +277,8 @@ void JitArm64::bclrx(UGeckoInstruction inst)
|
||||||
|
|
||||||
WriteBLRExit(WA);
|
WriteBLRExit(WA);
|
||||||
|
|
||||||
|
gpr.Unlock(WA);
|
||||||
|
|
||||||
if (conditional)
|
if (conditional)
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,6 @@ struct JitBlock
|
||||||
// The number of PPC instructions represented by this block. Mostly
|
// The number of PPC instructions represented by this block. Mostly
|
||||||
// useful for logging.
|
// useful for logging.
|
||||||
u32 originalSize;
|
u32 originalSize;
|
||||||
int runCount; // for profiling.
|
|
||||||
|
|
||||||
// Information about exits to a known address from this block.
|
// Information about exits to a known address from this block.
|
||||||
// This is used to implement block linking.
|
// This is used to implement block linking.
|
||||||
|
@ -65,11 +64,15 @@ struct JitBlock
|
||||||
// This set stores all physical addresses of all occupied instructions.
|
// This set stores all physical addresses of all occupied instructions.
|
||||||
std::set<u32> physical_addresses;
|
std::set<u32> physical_addresses;
|
||||||
|
|
||||||
// we don't really need to save start and stop
|
// Block profiling data, structure is inlined in Jit.cpp
|
||||||
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;)
|
struct ProfileData
|
||||||
u64 ticStart; // for profiling - time.
|
{
|
||||||
u64 ticStop; // for profiling - time.
|
u64 ticCounter;
|
||||||
u64 ticCounter; // for profiling - time.
|
u64 downcountCounter;
|
||||||
|
u64 runCount;
|
||||||
|
u64 ticStart;
|
||||||
|
u64 ticStop;
|
||||||
|
} profile_data = {};
|
||||||
|
|
||||||
// This tracks the position if this block within the fast block cache.
|
// This tracks the position if this block within the fast block cache.
|
||||||
// We allow each block to have only one map entry.
|
// We allow each block to have only one map entry.
|
||||||
|
|
|
@ -119,12 +119,12 @@ void GetProfileResults(ProfileStats* prof_stats)
|
||||||
|
|
||||||
QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec);
|
QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec);
|
||||||
g_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) {
|
g_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) {
|
||||||
// Rough heuristic. Mem instructions should cost more.
|
const auto& data = block.profile_data;
|
||||||
u64 cost = block.originalSize * (block.runCount / 4);
|
u64 cost = data.downcountCounter;
|
||||||
u64 timecost = block.ticCounter;
|
u64 timecost = data.ticCounter;
|
||||||
// Todo: tweak.
|
// Todo: tweak.
|
||||||
if (block.runCount >= 1)
|
if (data.runCount >= 1)
|
||||||
prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, block.runCount,
|
prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, data.runCount,
|
||||||
block.codeSize);
|
block.codeSize);
|
||||||
prof_stats->cost_sum += cost;
|
prof_stats->cost_sum += cost;
|
||||||
prof_stats->timecost_sum += timecost;
|
prof_stats->timecost_sum += timecost;
|
||||||
|
|
|
@ -5,11 +5,12 @@
|
||||||
#include "Core/PowerPC/Profiler.h"
|
#include "Core/PowerPC/Profiler.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include "Common/PerformanceCounter.h"
|
||||||
#include "Core/PowerPC/JitInterface.h"
|
#include "Core/PowerPC/JitInterface.h"
|
||||||
|
|
||||||
namespace Profiler
|
namespace Profiler
|
||||||
{
|
{
|
||||||
bool g_ProfileBlocks;
|
bool g_ProfileBlocks = false;
|
||||||
|
|
||||||
void WriteProfileResults(const std::string& filename)
|
void WriteProfileResults(const std::string& filename)
|
||||||
{
|
{
|
||||||
|
|
|
@ -10,37 +10,6 @@
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
|
||||||
#include "Common/PerformanceCounter.h"
|
|
||||||
|
|
||||||
#if defined(_M_X86_64)
|
|
||||||
|
|
||||||
#define PROFILER_QUERY_PERFORMANCE_COUNTER(pt) \
|
|
||||||
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(pt))); \
|
|
||||||
ABI_CallFunction(QueryPerformanceCounter)
|
|
||||||
|
|
||||||
// block->ticCounter += block->ticStop - block->ticStart
|
|
||||||
#define PROFILER_UPDATE_TIME(block) \
|
|
||||||
MOV(64, R(RSCRATCH2), Imm64((u64)block)); \
|
|
||||||
MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStop))); \
|
|
||||||
SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStart))); \
|
|
||||||
ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter))); \
|
|
||||||
MOV(64, MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter)), R(RSCRATCH));
|
|
||||||
|
|
||||||
#define PROFILER_VPUSH \
|
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse(); \
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
|
||||||
|
|
||||||
#define PROFILER_VPOP ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define PROFILER_QUERY_PERFORMANCE_COUNTER(pt)
|
|
||||||
#define PROFILER_UPDATE_TIME(b)
|
|
||||||
#define PROFILER_VPUSH
|
|
||||||
#define PROFILER_VPOP
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct BlockStat
|
struct BlockStat
|
||||||
{
|
{
|
||||||
BlockStat(u32 _addr, u64 c, u64 ticks, u64 run, u32 size)
|
BlockStat(u32 _addr, u64 c, u64 ticks, u64 run, u32 size)
|
||||||
|
|
|
@ -87,6 +87,7 @@ void CJitWindow::Compare(u32 em_address)
|
||||||
PPCAnalyst::CodeBlock code_block;
|
PPCAnalyst::CodeBlock code_block;
|
||||||
PPCAnalyst::PPCAnalyzer analyzer;
|
PPCAnalyst::PPCAnalyzer analyzer;
|
||||||
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
|
||||||
|
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_FOLLOW);
|
||||||
|
|
||||||
code_block.m_stats = &st;
|
code_block.m_stats = &st;
|
||||||
code_block.m_gpa = &gpa;
|
code_block.m_gpa = &gpa;
|
||||||
|
|
Loading…
Reference in New Issue