From 5b08bda5b80b257a1d21079ebdc63c22efaad52b Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 11 Dec 2015 20:26:09 +0100 Subject: [PATCH] ee:profiler: count EE memory access --- pcsx2/x86/R5900_Profiler.h | 143 +++++++++++++++++++++++++++++++++- pcsx2/x86/ix86-32/recVTLB.cpp | 9 +++ 2 files changed, 148 insertions(+), 4 deletions(-) diff --git a/pcsx2/x86/R5900_Profiler.h b/pcsx2/x86/R5900_Profiler.h index 3712215d48..8098cb4a9d 100644 --- a/pcsx2/x86/R5900_Profiler.h +++ b/pcsx2/x86/R5900_Profiler.h @@ -183,23 +183,43 @@ static const char eeOpcodeName[][16] = { #include #include +using namespace x86Emitter; + struct eeProfiler { + static const u32 memSpace = 1 << 19; + u64 opStats[static_cast(eeOpcode::LAST)]; + u32 memStats[memSpace]; + u32 memStatsConst[memSpace]; + u64 memStatsSlow; + u64 memStatsFast; + u32 memMask; void Reset() { - memzero(*this); + memzero(opStats); + memzero(memStats); + memzero(memStatsConst); + memStatsSlow = 0; + memStatsFast = 0; + memMask = 0xF700FFF0; pxAssert(eeOpcodeName[static_cast(eeOpcode::LAST)][0] == '!'); } void EmitOp(eeOpcode opcode) { int op = static_cast(opcode); - x86Emitter::xADD(x86Emitter::ptr32[&(((u32*)opStats)[op*2+0])], 1); - x86Emitter::xADC(x86Emitter::ptr32[&(((u32*)opStats)[op*2+1])], 0); + xADD(ptr32[&(((u32*)opStats)[op*2+0])], 1); + xADC(ptr32[&(((u32*)opStats)[op*2+1])], 0); + } + + double per(u64 part, u64 total) { + return (double) part / (double) total * 100.0; } void Print() { + // Compute opcode stat u64 total = 0; std::vector< std::pair > v; + std::vector< std::pair > vc; for(int i = 0; i < static_cast(eeOpcode::LAST); i++) { total += opStats[i]; v.push_back(std::make_pair(opStats[i], i)); @@ -213,8 +233,119 @@ struct eeProfiler { double stat = (double)count / (double)total * 100.0; DevCon.WriteLn("%-8s - [%3.4f%%][count=%u]", eeOpcodeName[v[i].second], stat, (u32)count); + if (stat < 0.01) + break; } - DevCon.WriteLn("Total = 0x%x_%x\n\n", (u32)(u64)(total>>32),(u32)total); + //DevCon.WriteLn("Total = 0x%x_%x", (u32)(u64)(total>>32),(u32)total); + + // Compute memory stat + total = 0; + u64 reg = 0; + u64 gs = 0; + u64 vu = 0; + // FIXME: MAYBE count the scratch pad + for (size_t i = 0; i < memSpace ; i++) + total += memStats[i]; + + int ou = 32 * _1kb; // user segment (0x10000000) + int ok = 352 * _1kb; // kernel segment (0xB0000000) + for (int i = 0; i < 4 * _1kb; i++) reg += memStats[ou + 0 * _1kb + i] + memStats[ok + 0 * _1kb + i]; + for (int i = 0; i < 4 * _1kb; i++) gs += memStats[ou + 4 * _1kb + i] + memStats[ok + 4 * _1kb + i]; + for (int i = 0; i < 4 * _1kb; i++) vu += memStats[ou + 8 * _1kb + i] + memStats[ok + 8 * _1kb + i]; + + + u64 ram = total - reg - gs - vu; + double ram_p = per(ram, total); + double reg_p = per(reg, total); + double gs_p = per(gs , total); + double vu_p = per(vu , total); + + // Compute const memory stat + u64 total_const = 0; + u64 reg_const = 0; + for (size_t i = 0; i < memSpace ; i++) + total_const += memStatsConst[i]; + + for (int i = 0; i < 4 * _1kb; i++) reg_const += memStatsConst[ou + i] + memStatsConst[ok + i]; + u64 ram_const = total_const - reg_const; // value is slightly wrong but good enough + + double ram_const_p = per(ram_const, ram); + double reg_const_p = per(reg_const, reg); + + DevCon.WriteLn("\nEE Memory Profiler:"); + DevCon.WriteLn("Total = 0x%08x_%08x", (u32)(u64)(total>>32),(u32)total); + DevCon.WriteLn(" RAM = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(ram>>32),(u32)ram, ram_p, ram_const_p); + DevCon.WriteLn(" REG = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(reg>>32),(u32)reg, reg_p, reg_const_p); + DevCon.WriteLn(" GS = 0x%08x_%08x [%3.4f%%]", (u32)(u64)( gs>>32),(u32) gs, gs_p); + DevCon.WriteLn(" VU = 0x%08x_%08x [%3.4f%%]", (u32)(u64) (vu>>32),(u32) vu, vu_p); + + u64 total_ram = memStatsSlow + memStatsFast; + DevCon.WriteLn("\n RAM Fast [%3.4f%%] RAM Slow [%3.4f%%]. Total 0x%08x_%08x [%3.4f%%]", + per(memStatsFast, total_ram), per(memStatsSlow, total_ram), (u32)(u64)(total_ram>>32),(u32)total_ram, per(total_ram, total)); + + v.clear(); + vc.clear(); + for (int i = 0; i < 4 * _1kb; i++) { + u32 reg_c = memStatsConst[ou + i] + memStatsConst[ok + i]; + u32 reg = memStats[ok + i] + memStats[ou + i] - reg_c; + if (reg) + v.push_back(std::make_pair(reg, i * 16)); + if (reg_c) + vc.push_back(std::make_pair(reg_c, i * 16)); + } + std::sort (v.begin(), v.end()); + std::reverse(v.begin(), v.end()); + + std::sort (vc.begin(), vc.end()); + std::reverse(vc.begin(), vc.end()); + + DevCon.WriteLn("\nEE Reg Profiler:"); + for(u32 i = 0; i < v.size(); i++) { + u64 count = v[i].first; + double stat = (double)count / (double)(reg - reg_const) * 100.0; + DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", + v[i].second, stat, (u32)count); + if (stat < 0.01) + break; + } + + DevCon.WriteLn("\nEE Const Reg Profiler:"); + for(u32 i = 0; i < vc.size(); i++) { + u64 count = vc[i].first; + double stat = (double)count / (double)reg_const * 100.0; + DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", + vc[i].second, stat, (u32)count); + if (stat < 0.01) + break; + } + + } + + // Warning dirty ebx + void EmitMem() { + // Compact the 4GB virtual address to a 512KB virtual address + if (x86caps.hasBMI2) { + xPEXT(ebx, ecx, ptr[&memMask]); + xADD(ptr32[(ebx*4) + memStats], 1); + } + } + + void EmitConstMem(u32 add) { + if (x86caps.hasBMI2) { + u32 a = _pext_u32(add, memMask); + xADD(ptr32[a + memStats], 1); + xADD(ptr32[a + memStatsConst], 1); + } + } + + void EmitSlowMem() { + xADD(ptr32[(u32*)&memStatsSlow], 1); + xADC(ptr32[(u32*)&memStatsSlow + 1], 0); + } + + void EmitFastMem() { + xADD(ptr32[(u32*)&memStatsFast], 1); + xADC(ptr32[(u32*)&memStatsFast + 1], 0); } }; #else @@ -222,6 +353,10 @@ struct eeProfiler { __fi void Reset() {} __fi void EmitOp(eeOpcode op) {} __fi void Print() {} + __fi void EmitMem() {} + __fi void EmitConstMem(u32 add) {} + __fi void EmitSlowMem() {} + __fi void EmitFastMem() {} }; #endif diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index b37eccfe5c..aace596c2c 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -161,6 +161,9 @@ namespace vtlb_private // static uptr* DynGen_PrepRegs() { + // Warning dirty ebx (in case someone got the very bad idea to move this code) + EE::Profiler.EmitMem(); + xMOV( eax, ecx ); xSHR( eax, VTLB_PAGE_BITS ); xMOV( eax, ptr[(eax*4) + vtlbdata.vmap] ); @@ -370,6 +373,8 @@ void vtlb_DynGenRead32(u32 bits, bool sign) // recompiler if the TLB is changed. void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { + EE::Profiler.EmitConstMem(addr_const); + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) @@ -416,6 +421,8 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) // void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) { + EE::Profiler.EmitConstMem(addr_const); + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) @@ -506,6 +513,8 @@ void vtlb_DynGenWrite(u32 sz) // recompiler if the TLB is changed. void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) { + EE::Profiler.EmitConstMem(addr_const); + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 )