From b90757651076cfb5122af387fa73904a0b86f231 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 7 Aug 2015 02:32:16 -0500 Subject: [PATCH] [AArch64] Support profiling by cycle counters if they are available to EL0 --- Source/Core/Common/Arm64Emitter.cpp | 6 +++ Source/Core/Common/Arm64Emitter.h | 3 ++ Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 66 ++++++++++++++++++++--- Source/Core/Core/PowerPC/JitArm64/Jit.h | 6 +++ 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 9069bedeb7..3a8303c2e3 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1102,6 +1102,12 @@ static void GetSystemReg(PStateField field, int &o0, int &op1, int &CRn, int &CR case FIELD_FPSR: o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1; break; + case FIELD_PMCR_EL0: + o0 = 3; op1 = 3; CRn = 9; CRm = 6; op2 = 0; + break; + case FIELD_PMCCNTR_EL0: + o0 = 3; op1 = 3; CRn = 9; CRm = 7; op2 = 0; + break; default: _assert_msg_(DYNA_REC, false, "Invalid PStateField to do a register move from/to"); break; diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 0133d7f0d4..f6cb834a5d 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -172,6 +172,8 @@ enum PStateField FIELD_DAIFSet, FIELD_DAIFClr, FIELD_NZCV, // The only system registers accessible from EL0 (user space) + FIELD_PMCR_EL0, + FIELD_PMCCNTR_EL0, FIELD_FPCR = 0x340, FIELD_FPSR = 0x341, }; @@ -809,6 +811,7 @@ public: void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void FCVTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 2c002f9dc7..64ae021930 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -15,6 +15,15 @@ using namespace Arm64Gen; static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16; +static bool HasCycleCounters() +{ + // Bit needs to be set to support cycle counters + const u32 PMUSERENR_CR = 0x4; + u32 reg; + asm ("mrs %[val], PMUSERENR_EL0" + : [val] "=r" (reg)); + return !!(reg & PMUSERENR_CR); +} void JitArm64::Init() { @@ -34,6 +43,8 @@ void JitArm64::Init() code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); + + m_supports_cycle_counter = HasCycleCounters(); } void JitArm64::ClearCache() @@ -233,26 +244,65 @@ void JitArm64::DumpCode(const u8* start, const u8* end) WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str()); } +void JitArm64::EmitResetCycleCounters() +{ + const u32 PMCR_EL0_E = 1; + const u32 PMCR_EL0_P = 2; + const u32 PMCR_EL0_C = 4; + const u32 PMCR_EL0_LC = 0x40; + _MSR(FIELD_PMCR_EL0, X0); + MOVI2R(X1, PMCR_EL0_E | + PMCR_EL0_P | + PMCR_EL0_C | + PMCR_EL0_LC); + ORR(X0, X0, X1); + MRS(X0, FIELD_PMCR_EL0); +} + +void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg) +{ + _MSR(FIELD_PMCCNTR_EL0, reg); +} + void JitArm64::BeginTimeProfile(JitBlock* b) { b->ticCounter = 0; b->ticStart = 0; b->ticStop = 0; - MOVI2R(X1, (u64)QueryPerformanceCounter); - MOVI2R(X0, (u64)&b->ticStart); - BLR(X1); + if (m_supports_cycle_counter) + { + EmitResetCycleCounters(); + EmitGetCycles(X1); + MOVI2R(X0, (u64)&b->ticStart); + STR(INDEX_UNSIGNED, X1, X0, 0); + } + else + { + MOVI2R(X1, (u64)QueryPerformanceCounter); + MOVI2R(X0, (u64)&b->ticStart); + BLR(X1); + } } void JitArm64::EndTimeProfile(JitBlock* b) { - MOVI2R(X1, (u64)QueryPerformanceCounter); - MOVI2R(X0, (u64)&b->ticStop); - BLR(X1); + if (m_supports_cycle_counter) + { + EmitGetCycles(X2); + MOVI2R(X0, (u64)&b->ticStart); + } + else + { + MOVI2R(X1, (u64)QueryPerformanceCounter); + MOVI2R(X0, (u64)&b->ticStop); + BLR(X1); + + MOVI2R(X0, (u64)&b->ticStart); + LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop + } - MOVI2R(X0, (u64)&b->ticStart); LDR(INDEX_UNSIGNED, X1, X0, 0); // Start - LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter SUB(X2, X2, X1); ADD(X3, X3, X2); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 1d7276bc0b..881626491b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -238,6 +238,12 @@ private: Arm64Gen::ARM64CodeBlock farcode; u8* nearcode; // Backed up when we switch to far code. + // Do we support cycle counter profiling? + bool m_supports_cycle_counter; + + void EmitResetCycleCounters(); + void EmitGetCycles(Arm64Gen::ARM64Reg reg); + // Simple functions to switch between near and far code emitting void SwitchToFarCode() {