mirror of https://github.com/PCSX2/pcsx2.git
ee:profiler: count EE memory access
This commit is contained in:
parent
0e1188565e
commit
5b08bda5b8
|
@ -183,23 +183,43 @@ static const char eeOpcodeName[][16] = {
|
|||
#include <utility>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
struct eeProfiler {
|
||||
static const u32 memSpace = 1 << 19;
|
||||
|
||||
u64 opStats[static_cast<int>(eeOpcode::LAST)];
|
||||
u32 memStats[memSpace];
|
||||
u32 memStatsConst[memSpace];
|
||||
u64 memStatsSlow;
|
||||
u64 memStatsFast;
|
||||
u32 memMask;
|
||||
|
||||
void Reset() {
|
||||
memzero(*this);
|
||||
memzero(opStats);
|
||||
memzero(memStats);
|
||||
memzero(memStatsConst);
|
||||
memStatsSlow = 0;
|
||||
memStatsFast = 0;
|
||||
memMask = 0xF700FFF0;
|
||||
pxAssert(eeOpcodeName[static_cast<int>(eeOpcode::LAST)][0] == '!');
|
||||
}
|
||||
|
||||
void EmitOp(eeOpcode opcode) {
|
||||
int op = static_cast<int>(opcode);
|
||||
x86Emitter::xADD(x86Emitter::ptr32[&(((u32*)opStats)[op*2+0])], 1);
|
||||
x86Emitter::xADC(x86Emitter::ptr32[&(((u32*)opStats)[op*2+1])], 0);
|
||||
xADD(ptr32[&(((u32*)opStats)[op*2+0])], 1);
|
||||
xADC(ptr32[&(((u32*)opStats)[op*2+1])], 0);
|
||||
}
|
||||
|
||||
double per(u64 part, u64 total) {
|
||||
return (double) part / (double) total * 100.0;
|
||||
}
|
||||
|
||||
void Print() {
|
||||
// Compute opcode stat
|
||||
u64 total = 0;
|
||||
std::vector< std::pair<u32, u32> > v;
|
||||
std::vector< std::pair<u32, u32> > vc;
|
||||
for(int i = 0; i < static_cast<int>(eeOpcode::LAST); i++) {
|
||||
total += opStats[i];
|
||||
v.push_back(std::make_pair(opStats[i], i));
|
||||
|
@ -213,8 +233,119 @@ struct eeProfiler {
|
|||
double stat = (double)count / (double)total * 100.0;
|
||||
DevCon.WriteLn("%-8s - [%3.4f%%][count=%u]",
|
||||
eeOpcodeName[v[i].second], stat, (u32)count);
|
||||
if (stat < 0.01)
|
||||
break;
|
||||
}
|
||||
DevCon.WriteLn("Total = 0x%x_%x\n\n", (u32)(u64)(total>>32),(u32)total);
|
||||
//DevCon.WriteLn("Total = 0x%x_%x", (u32)(u64)(total>>32),(u32)total);
|
||||
|
||||
// Compute memory stat
|
||||
total = 0;
|
||||
u64 reg = 0;
|
||||
u64 gs = 0;
|
||||
u64 vu = 0;
|
||||
// FIXME: MAYBE count the scratch pad
|
||||
for (size_t i = 0; i < memSpace ; i++)
|
||||
total += memStats[i];
|
||||
|
||||
int ou = 32 * _1kb; // user segment (0x10000000)
|
||||
int ok = 352 * _1kb; // kernel segment (0xB0000000)
|
||||
for (int i = 0; i < 4 * _1kb; i++) reg += memStats[ou + 0 * _1kb + i] + memStats[ok + 0 * _1kb + i];
|
||||
for (int i = 0; i < 4 * _1kb; i++) gs += memStats[ou + 4 * _1kb + i] + memStats[ok + 4 * _1kb + i];
|
||||
for (int i = 0; i < 4 * _1kb; i++) vu += memStats[ou + 8 * _1kb + i] + memStats[ok + 8 * _1kb + i];
|
||||
|
||||
|
||||
u64 ram = total - reg - gs - vu;
|
||||
double ram_p = per(ram, total);
|
||||
double reg_p = per(reg, total);
|
||||
double gs_p = per(gs , total);
|
||||
double vu_p = per(vu , total);
|
||||
|
||||
// Compute const memory stat
|
||||
u64 total_const = 0;
|
||||
u64 reg_const = 0;
|
||||
for (size_t i = 0; i < memSpace ; i++)
|
||||
total_const += memStatsConst[i];
|
||||
|
||||
for (int i = 0; i < 4 * _1kb; i++) reg_const += memStatsConst[ou + i] + memStatsConst[ok + i];
|
||||
u64 ram_const = total_const - reg_const; // value is slightly wrong but good enough
|
||||
|
||||
double ram_const_p = per(ram_const, ram);
|
||||
double reg_const_p = per(reg_const, reg);
|
||||
|
||||
DevCon.WriteLn("\nEE Memory Profiler:");
|
||||
DevCon.WriteLn("Total = 0x%08x_%08x", (u32)(u64)(total>>32),(u32)total);
|
||||
DevCon.WriteLn(" RAM = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(ram>>32),(u32)ram, ram_p, ram_const_p);
|
||||
DevCon.WriteLn(" REG = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(reg>>32),(u32)reg, reg_p, reg_const_p);
|
||||
DevCon.WriteLn(" GS = 0x%08x_%08x [%3.4f%%]", (u32)(u64)( gs>>32),(u32) gs, gs_p);
|
||||
DevCon.WriteLn(" VU = 0x%08x_%08x [%3.4f%%]", (u32)(u64) (vu>>32),(u32) vu, vu_p);
|
||||
|
||||
u64 total_ram = memStatsSlow + memStatsFast;
|
||||
DevCon.WriteLn("\n RAM Fast [%3.4f%%] RAM Slow [%3.4f%%]. Total 0x%08x_%08x [%3.4f%%]",
|
||||
per(memStatsFast, total_ram), per(memStatsSlow, total_ram), (u32)(u64)(total_ram>>32),(u32)total_ram, per(total_ram, total));
|
||||
|
||||
v.clear();
|
||||
vc.clear();
|
||||
for (int i = 0; i < 4 * _1kb; i++) {
|
||||
u32 reg_c = memStatsConst[ou + i] + memStatsConst[ok + i];
|
||||
u32 reg = memStats[ok + i] + memStats[ou + i] - reg_c;
|
||||
if (reg)
|
||||
v.push_back(std::make_pair(reg, i * 16));
|
||||
if (reg_c)
|
||||
vc.push_back(std::make_pair(reg_c, i * 16));
|
||||
}
|
||||
std::sort (v.begin(), v.end());
|
||||
std::reverse(v.begin(), v.end());
|
||||
|
||||
std::sort (vc.begin(), vc.end());
|
||||
std::reverse(vc.begin(), vc.end());
|
||||
|
||||
DevCon.WriteLn("\nEE Reg Profiler:");
|
||||
for(u32 i = 0; i < v.size(); i++) {
|
||||
u64 count = v[i].first;
|
||||
double stat = (double)count / (double)(reg - reg_const) * 100.0;
|
||||
DevCon.WriteLn("%04x - [%3.4f%%][count=%u]",
|
||||
v[i].second, stat, (u32)count);
|
||||
if (stat < 0.01)
|
||||
break;
|
||||
}
|
||||
|
||||
DevCon.WriteLn("\nEE Const Reg Profiler:");
|
||||
for(u32 i = 0; i < vc.size(); i++) {
|
||||
u64 count = vc[i].first;
|
||||
double stat = (double)count / (double)reg_const * 100.0;
|
||||
DevCon.WriteLn("%04x - [%3.4f%%][count=%u]",
|
||||
vc[i].second, stat, (u32)count);
|
||||
if (stat < 0.01)
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Warning dirty ebx
|
||||
void EmitMem() {
|
||||
// Compact the 4GB virtual address to a 512KB virtual address
|
||||
if (x86caps.hasBMI2) {
|
||||
xPEXT(ebx, ecx, ptr[&memMask]);
|
||||
xADD(ptr32[(ebx*4) + memStats], 1);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitConstMem(u32 add) {
|
||||
if (x86caps.hasBMI2) {
|
||||
u32 a = _pext_u32(add, memMask);
|
||||
xADD(ptr32[a + memStats], 1);
|
||||
xADD(ptr32[a + memStatsConst], 1);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitSlowMem() {
|
||||
xADD(ptr32[(u32*)&memStatsSlow], 1);
|
||||
xADC(ptr32[(u32*)&memStatsSlow + 1], 0);
|
||||
}
|
||||
|
||||
void EmitFastMem() {
|
||||
xADD(ptr32[(u32*)&memStatsFast], 1);
|
||||
xADC(ptr32[(u32*)&memStatsFast + 1], 0);
|
||||
}
|
||||
};
|
||||
#else
|
||||
|
@ -222,6 +353,10 @@ struct eeProfiler {
|
|||
__fi void Reset() {}
|
||||
__fi void EmitOp(eeOpcode op) {}
|
||||
__fi void Print() {}
|
||||
__fi void EmitMem() {}
|
||||
__fi void EmitConstMem(u32 add) {}
|
||||
__fi void EmitSlowMem() {}
|
||||
__fi void EmitFastMem() {}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -161,6 +161,9 @@ namespace vtlb_private
|
|||
//
|
||||
static uptr* DynGen_PrepRegs()
|
||||
{
|
||||
// Warning dirty ebx (in case someone got the very bad idea to move this code)
|
||||
EE::Profiler.EmitMem();
|
||||
|
||||
xMOV( eax, ecx );
|
||||
xSHR( eax, VTLB_PAGE_BITS );
|
||||
xMOV( eax, ptr[(eax*4) + vtlbdata.vmap] );
|
||||
|
@ -370,6 +373,8 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
|
|||
// recompiler if the TLB is changed.
|
||||
void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
|
||||
{
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
|
||||
u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
|
||||
s32 ppf = addr_const + vmv_ptr;
|
||||
if( ppf >= 0 )
|
||||
|
@ -416,6 +421,8 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
|
|||
//
|
||||
void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
|
||||
{
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
|
||||
u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
|
||||
s32 ppf = addr_const + vmv_ptr;
|
||||
if( ppf >= 0 )
|
||||
|
@ -506,6 +513,8 @@ void vtlb_DynGenWrite(u32 sz)
|
|||
// recompiler if the TLB is changed.
|
||||
void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
||||
{
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
|
||||
u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
|
||||
s32 ppf = addr_const + vmv_ptr;
|
||||
if( ppf >= 0 )
|
||||
|
|
Loading…
Reference in New Issue