Profiling: measure time on POSIX systems using clock_gettime

This commit is contained in:
Gabriel Corona 2014-10-18 02:50:02 +02:00
parent 1c41acaddd
commit 641e820257
8 changed files with 86 additions and 35 deletions

View File

@ -14,6 +14,7 @@ set(SRCS BreakPoints.cpp
NandPaths.cpp NandPaths.cpp
Network.cpp Network.cpp
PcapFile.cpp PcapFile.cpp
PerformanceCounter.cpp
SettingsHandler.cpp SettingsHandler.cpp
SDCardUtil.cpp SDCardUtil.cpp
StringUtil.cpp StringUtil.cpp

View File

@ -0,0 +1,48 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#if !defined(_WIN32)
#include <cstdint>
#include <ctime>
#include <unistd.h>
#include "Common/CommonTypes.h"
#include "Common/PerformanceCounter.h"
#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0
#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK>0
#define DOLPHIN_CLOCK CLOCK_MONOTONIC
#else
#define DOLPHIN_CLOCK CLOCK_REALTIME
#endif
#endif
bool QueryPerformanceCounter(u64* out)
{
#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0
timespec tp;
if (clock_gettime(DOLPHIN_CLOCK, &tp))
return false;
*out = (u64) tp.tv_nsec + (u64) 1000000000 * (u64) tp.tv_sec;
return true;
#else
*out = 0;
return false;
#endif
}
bool QueryPerformanceFrequency(u64* out)
{
#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0
*out = 1000000000;
return true;
#else
*out = 1;
return false;
#endif
}
#endif

View File

@ -0,0 +1,17 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#if !defined(_WIN32)
#include <cstdint>
#include "Common/CommonTypes.h"
typedef u64 LARGE_INTEGER;
bool QueryPerformanceCounter(u64* out);
bool QueryPerformanceFrequency(u64* lpFrequency);
#endif

View File

@ -575,13 +575,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
{ {
MOV(64, R(RSCRATCH), Imm64((u64)&b->runCount)); MOV(64, R(RSCRATCH), Imm64((u64)&b->runCount));
ADD(32, MatR(RSCRATCH), Imm8(1)); ADD(32, MatR(RSCRATCH), Imm8(1));
#ifdef _WIN32
b->ticCounter = 0; b->ticCounter = 0;
b->ticStart = 0; b->ticStart = 0;
b->ticStop = 0; b->ticStop = 0;
#else
//TODO
#endif
// get start tic // get start tic
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStart); PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStart);
} }
@ -625,7 +621,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
// get end tic // get end tic
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
// tic counter += (end tic - start tic) // tic counter += (end tic - start tic)
PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); PROFILER_UPDATE_TIME(b);
PROFILER_VPOP; PROFILER_VPOP;
} }
} }

View File

@ -397,7 +397,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
// get end tic // get end tic
PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop); PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStop);
// tic counter += (end tic - start tic) // tic counter += (end tic - start tic)
PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart); PROFILER_UPDATE_TIME(&b);
PROFILER_VPOP; PROFILER_VPOP;
} }
} }
@ -467,4 +467,3 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo
FlushIcache(); FlushIcache();
return start; return start;
} }

View File

@ -51,13 +51,11 @@ struct JitBlock
}; };
std::vector<LinkData> linkData; std::vector<LinkData> linkData;
#ifdef _WIN32
// we don't really need to save start and stop // we don't really need to save start and stop
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;) // TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;)
u64 ticStart; // for profiling - time. u64 ticStart; // for profiling - time.
u64 ticStop; // for profiling - time. u64 ticStop; // for profiling - time.
u64 ticCounter; // for profiling - time. u64 ticCounter; // for profiling - time.
#endif
#ifdef USE_VTUNE #ifdef USE_VTUNE
char blockName[32]; char blockName[32];

View File

@ -8,6 +8,8 @@
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
#else
#include "Common/PerformanceCounter.h"
#endif #endif
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
@ -140,26 +142,20 @@ namespace JitInterface
std::vector<BlockStat> stats; std::vector<BlockStat> stats;
stats.reserve(jit->GetBlockCache()->GetNumBlocks()); stats.reserve(jit->GetBlockCache()->GetNumBlocks());
u64 cost_sum = 0; u64 cost_sum = 0;
#ifdef _WIN32
u64 timecost_sum = 0; u64 timecost_sum = 0;
u64 countsPerSec; u64 countsPerSec;
QueryPerformanceFrequency((LARGE_INTEGER *)&countsPerSec); QueryPerformanceFrequency((LARGE_INTEGER*)&countsPerSec);
#endif
for (int i = 0; i < jit->GetBlockCache()->GetNumBlocks(); i++) for (int i = 0; i < jit->GetBlockCache()->GetNumBlocks(); i++)
{ {
const JitBlock *block = jit->GetBlockCache()->GetBlock(i); const JitBlock *block = jit->GetBlockCache()->GetBlock(i);
// Rough heuristic. Mem instructions should cost more. // Rough heuristic. Mem instructions should cost more.
u64 cost = block->originalSize * (block->runCount / 4); u64 cost = block->originalSize * (block->runCount / 4);
#ifdef _WIN32
u64 timecost = block->ticCounter; u64 timecost = block->ticCounter;
#endif
// Todo: tweak. // Todo: tweak.
if (block->runCount >= 1) if (block->runCount >= 1)
stats.push_back(BlockStat(i, cost)); stats.push_back(BlockStat(i, cost));
cost_sum += cost; cost_sum += cost;
#ifdef _WIN32
timecost_sum += timecost; timecost_sum += timecost;
#endif
} }
sort(stats.begin(), stats.end()); sort(stats.begin(), stats.end());
@ -177,16 +173,11 @@ namespace JitInterface
{ {
std::string name = g_symbolDB.GetDescription(block->originalAddress); std::string name = g_symbolDB.GetDescription(block->originalAddress);
double percent = 100.0 * (double)stat.cost / (double)cost_sum; double percent = 100.0 * (double)stat.cost / (double)cost_sum;
#ifdef _WIN32
double timePercent = 100.0 * (double)block->ticCounter / (double)timecost_sum; double timePercent = 100.0 * (double)block->ticCounter / (double)timecost_sum;
fprintf(f.GetHandle(), "%08x\t%s\t%" PRIu64 "\t%" PRIu64 "\t%.2lf\t%llf\t%lf\t%i\n", fprintf(f.GetHandle(), "%08x\t%s\t%" PRIu64 "\t%" PRIu64 "\t%.2f\t%.2f\t%.2f\t%i\n",
block->originalAddress, name.c_str(), stat.cost, block->originalAddress, name.c_str(), stat.cost,
block->ticCounter, percent, timePercent, block->ticCounter, percent, timePercent,
(double)block->ticCounter*1000.0/(double)countsPerSec, block->codeSize); (double)block->ticCounter*1000.0/(double)countsPerSec, block->codeSize);
#else
fprintf(f.GetHandle(), "%08x\t%s\t%" PRIu64 "\t???\t%.2lf\t???\t???\t%i\n",
block->originalAddress, name.c_str(), stat.cost, percent, block->codeSize);
#endif
} }
} }
} }

View File

@ -5,22 +5,26 @@
#pragma once #pragma once
#include <cstddef>
#include <string> #include <string>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#ifdef _WIN32 #include "Common/PerformanceCounter.h"
#if defined(_M_X86_64)
#define PROFILER_QUERY_PERFORMANCE_COUNTER(pt) \ #define PROFILER_QUERY_PERFORMANCE_COUNTER(pt) \
LEA(64, ABI_PARAM1, M(pt)); \ MOV(64, R(ABI_PARAM1), Imm64((u64) pt)); \
CALL(QueryPerformanceCounter) CALL((const void*) QueryPerformanceCounter)
// asm write : (u64) dt += t1-t0 // block->ticCounter += block->ticStop - block->ticStart
#define PROFILER_ADD_DIFF_LARGE_INTEGER(pdt, pt1, pt0) \ #define PROFILER_UPDATE_TIME(block) \
MOV(64, R(RSCRATCH), M(pt1)); \ MOV(64, R(RSCRATCH2), Imm64((u64) block)); \
SUB(64, R(RSCRATCH), M(pt0)); \ MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStop))); \
ADD(64, R(RSCRATCH), M(pdt)); \ SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticStart))); \
MOV(64, M(pdt), R(RSCRATCH)); ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter))); \
MOV(64, MDisp(RSCRATCH2, offsetof(struct JitBlock, ticCounter)), R(RSCRATCH));
#define PROFILER_VPUSH \ #define PROFILER_VPUSH \
BitSet32 registersInUse = CallerSavedRegistersInUse(); \ BitSet32 registersInUse = CallerSavedRegistersInUse(); \
@ -32,10 +36,7 @@
#else #else
#define PROFILER_QUERY_PERFORMANCE_COUNTER(pt) #define PROFILER_QUERY_PERFORMANCE_COUNTER(pt)
#define PROFILER_UPDATE_TIME(b)
// TODO: Implement generic ways to do this cleanly with all supported architectures
// asm write : (u64) dt += t1-t0
#define PROFILER_ADD_DIFF_LARGE_INTEGER(pdt, pt1, pt0)
#define PROFILER_VPUSH #define PROFILER_VPUSH
#define PROFILER_VPOP #define PROFILER_VPOP