[AArch64] Support profiling by cycle counters if they are available to EL0

This commit is contained in:
Ryan Houdek 2015-08-07 02:32:16 -05:00
parent 5110574c1f
commit b907576510
4 changed files with 73 additions and 8 deletions

View File

@ -1102,6 +1102,12 @@ static void GetSystemReg(PStateField field, int &o0, int &op1, int &CRn, int &CR
case FIELD_FPSR: case FIELD_FPSR:
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1; o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1;
break; break;
case FIELD_PMCR_EL0:
o0 = 3; op1 = 3; CRn = 9; CRm = 6; op2 = 0;
break;
case FIELD_PMCCNTR_EL0:
o0 = 3; op1 = 3; CRn = 9; CRm = 7; op2 = 0;
break;
default: default:
_assert_msg_(DYNA_REC, false, "Invalid PStateField to do a register move from/to"); _assert_msg_(DYNA_REC, false, "Invalid PStateField to do a register move from/to");
break; break;

View File

@ -172,6 +172,8 @@ enum PStateField
FIELD_DAIFSet, FIELD_DAIFSet,
FIELD_DAIFClr, FIELD_DAIFClr,
FIELD_NZCV, // The only system registers accessible from EL0 (user space) FIELD_NZCV, // The only system registers accessible from EL0 (user space)
FIELD_PMCR_EL0,
FIELD_PMCCNTR_EL0,
FIELD_FPCR = 0x340, FIELD_FPCR = 0x340,
FIELD_FPSR = 0x341, FIELD_FPSR = 0x341,
}; };
@ -809,6 +811,7 @@ public:
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn); void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);

View File

@ -15,6 +15,15 @@
using namespace Arm64Gen; using namespace Arm64Gen;
static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16; static const int AARCH64_FARCODE_SIZE = 1024 * 1024 * 16;
static bool HasCycleCounters()
{
// Bit needs to be set to support cycle counters
const u32 PMUSERENR_CR = 0x4;
u32 reg;
asm ("mrs %[val], PMUSERENR_EL0"
: [val] "=r" (reg));
return !!(reg & PMUSERENR_CR);
}
void JitArm64::Init() void JitArm64::Init()
{ {
@ -34,6 +43,8 @@ void JitArm64::Init()
code_block.m_gpa = &js.gpa; code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa; code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
m_supports_cycle_counter = HasCycleCounters();
} }
void JitArm64::ClearCache() void JitArm64::ClearCache()
@ -233,26 +244,65 @@ void JitArm64::DumpCode(const u8* start, const u8* end)
WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str()); WARN_LOG(DYNA_REC, "Code dump from %p to %p:\n%s", start, end, output.c_str());
} }
void JitArm64::EmitResetCycleCounters()
{
const u32 PMCR_EL0_E = 1;
const u32 PMCR_EL0_P = 2;
const u32 PMCR_EL0_C = 4;
const u32 PMCR_EL0_LC = 0x40;
_MSR(FIELD_PMCR_EL0, X0);
MOVI2R(X1, PMCR_EL0_E |
PMCR_EL0_P |
PMCR_EL0_C |
PMCR_EL0_LC);
ORR(X0, X0, X1);
MRS(X0, FIELD_PMCR_EL0);
}
void JitArm64::EmitGetCycles(Arm64Gen::ARM64Reg reg)
{
_MSR(FIELD_PMCCNTR_EL0, reg);
}
void JitArm64::BeginTimeProfile(JitBlock* b) void JitArm64::BeginTimeProfile(JitBlock* b)
{ {
b->ticCounter = 0; b->ticCounter = 0;
b->ticStart = 0; b->ticStart = 0;
b->ticStop = 0; b->ticStop = 0;
if (m_supports_cycle_counter)
{
EmitResetCycleCounters();
EmitGetCycles(X1);
MOVI2R(X0, (u64)&b->ticStart);
STR(INDEX_UNSIGNED, X1, X0, 0);
}
else
{
MOVI2R(X1, (u64)QueryPerformanceCounter); MOVI2R(X1, (u64)QueryPerformanceCounter);
MOVI2R(X0, (u64)&b->ticStart); MOVI2R(X0, (u64)&b->ticStart);
BLR(X1); BLR(X1);
}
} }
void JitArm64::EndTimeProfile(JitBlock* b) void JitArm64::EndTimeProfile(JitBlock* b)
{ {
if (m_supports_cycle_counter)
{
EmitGetCycles(X2);
MOVI2R(X0, (u64)&b->ticStart);
}
else
{
MOVI2R(X1, (u64)QueryPerformanceCounter); MOVI2R(X1, (u64)QueryPerformanceCounter);
MOVI2R(X0, (u64)&b->ticStop); MOVI2R(X0, (u64)&b->ticStop);
BLR(X1); BLR(X1);
MOVI2R(X0, (u64)&b->ticStart); MOVI2R(X0, (u64)&b->ticStart);
LDR(INDEX_UNSIGNED, X1, X0, 0); // Start
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
}
LDR(INDEX_UNSIGNED, X1, X0, 0); // Start
LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter LDR(INDEX_UNSIGNED, X3, X0, 16); // Counter
SUB(X2, X2, X1); SUB(X2, X2, X1);
ADD(X3, X3, X2); ADD(X3, X3, X2);

View File

@ -238,6 +238,12 @@ private:
Arm64Gen::ARM64CodeBlock farcode; Arm64Gen::ARM64CodeBlock farcode;
u8* nearcode; // Backed up when we switch to far code. u8* nearcode; // Backed up when we switch to far code.
// Do we support cycle counter profiling?
bool m_supports_cycle_counter;
void EmitResetCycleCounters();
void EmitGetCycles(Arm64Gen::ARM64Reg reg);
// Simple functions to switch between near and far code emitting // Simple functions to switch between near and far code emitting
void SwitchToFarCode() void SwitchToFarCode()
{ {