vtune: plug PCSX2 core + add missing profiling (VU/VIF/TLB)

Doesn't fully work yet
* Unknown stack frame
* Outside any known module

Potential root cause:
* Nvidia driver
* VU code as ebp is required for emulation so likely no frame
This commit is contained in:
Gregory Hainaut 2016-11-25 17:46:52 +01:00
parent 4fca5f4d88
commit c9db1c6c4b
12 changed files with 91 additions and 6 deletions

View File

@ -114,6 +114,7 @@ for ARG in "$@"; do
echo "--clang-tidy : Do a clang-tidy analysis. Results can be found in build directory" echo "--clang-tidy : Do a clang-tidy analysis. Results can be found in build directory"
echo "--cppcheck : Do a cppcheck analysis. Results can be found in build directory" echo "--cppcheck : Do a cppcheck analysis. Results can be found in build directory"
echo "--coverity : Do a build for coverity" echo "--coverity : Do a build for coverity"
echo "--vtune : Plug GSdx with VTUNE"
exit 1 exit 1
esac esac

View File

@ -53,4 +53,5 @@ extern InfoVector any;
extern InfoVector ee; extern InfoVector ee;
extern InfoVector iop; extern InfoVector iop;
extern InfoVector vu; extern InfoVector vu;
extern InfoVector vif;
} }

View File

@ -107,5 +107,10 @@ set(UtilitiesFinalLibs
${wxWidgets_LIBRARIES} ${wxWidgets_LIBRARIES}
) )
if(USE_VTUNE)
set(UtilitiesFinalFlags ${UtilitiesFinalFlags} -DENABLE_VTUNE)
include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include")
endif()
add_pcsx2_lib(${Output} "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags}") add_pcsx2_lib(${Output} "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags}")
add_pcsx2_lib(${Output}_NO_TLS "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags} -DPCSX2_THREAD_LOCAL=0") add_pcsx2_lib(${Output}_NO_TLS "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags} -DPCSX2_THREAD_LOCAL=0")

View File

@ -17,9 +17,20 @@
#include "Perf.h" #include "Perf.h"
#ifdef __unix__
#include "unistd.h"
#endif
//#define ProfileWithPerf //#define ProfileWithPerf
#define MERGE_BLOCK_RESULT #define MERGE_BLOCK_RESULT
#ifdef ENABLE_VTUNE
#include "jitprofiling.h"
#ifdef _WIN32
#pragma comment(lib, "jitprofiling.lib")
#endif
#endif
namespace Perf namespace Perf
{ {
@ -28,9 +39,10 @@ InfoVector any("");
InfoVector ee("EE"); InfoVector ee("EE");
InfoVector iop("IOP"); InfoVector iop("IOP");
InfoVector vu("VU"); InfoVector vu("VU");
InfoVector vif("VIF");
// Perf is only supported on linux // Perf is only supported on linux
#if defined(__linux__) && defined(ProfileWithPerf) #if defined(__linux__) && (defined(ProfileWithPerf) || defined(ENABLE_VTUNE))
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Implementation of the Info object // Implementation of the Info object
@ -85,6 +97,25 @@ void InfoVector::map(uptr x86, u32 size, const char *symbol)
if (size < 8 * _1kb) if (size < 8 * _1kb)
m_v.emplace_back(x86, size, symbol); m_v.emplace_back(x86, size, symbol);
#endif #endif
#ifdef ENABLE_VTUNE
// mapping the full recompiler will blow up VTUNE
if (size < _16kb) {
fprintf(stderr, "map %s: %p size %d\n", symbol, (void*)x86, size);
std::string name = std::string(symbol);
iJIT_Method_Load ml;
memset(&ml, 0, sizeof(ml));
ml.method_id = iJIT_GetNewMethodID();
ml.method_name = (char*)name.c_str();
ml.method_load_address = (void*)x86;
ml.method_size = size;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
}
#endif
} }
void InfoVector::map(uptr x86, u32 size, u32 pc) void InfoVector::map(uptr x86, u32 size, u32 pc)
@ -92,6 +123,22 @@ void InfoVector::map(uptr x86, u32 size, u32 pc)
#ifndef MERGE_BLOCK_RESULT #ifndef MERGE_BLOCK_RESULT
m_v.emplace_back(x86, size, m_prefix, pc); m_v.emplace_back(x86, size, m_prefix, pc);
#endif #endif
#ifdef ENABLE_VTUNE
std::string name = std::string(m_prefix) + "_" + std::to_string(pc);
//fprintf(stderr, "map %s: %p size %d\n", name.c_str(), (void*)x86, size);
iJIT_Method_Load ml;
memset(&ml, 0, sizeof(ml));
ml.method_id = iJIT_GetNewMethodID();
ml.method_name = (char*)name.c_str();
ml.method_load_address = (void*)x86;
ml.method_size = size;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
#endif
} }
void InfoVector::reset() void InfoVector::reset()

View File

@ -681,6 +681,16 @@ foreach(res_file IN ITEMS
add_custom_command(OUTPUT "${res_bin}/${res_file}.h" COMMAND perl ${CMAKE_SOURCE_DIR}/linux_various/hex2h.pl "${res_src}/${res_file}.png" "${res_bin}/${res_file}" ) add_custom_command(OUTPUT "${res_bin}/${res_file}.h" COMMAND perl ${CMAKE_SOURCE_DIR}/linux_various/hex2h.pl "${res_src}/${res_file}.png" "${res_bin}/${res_file}" )
endforeach() endforeach()
if(USE_VTUNE)
set(pcsx2FinalFlags ${pcsx2FinalFlags} -DENABLE_VTUNE)
include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include")
if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386")
set(pcsx2FinalLibs ${pcsx2FinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib32/libjitprofiling.a)
else()
set(pcsx2FinalLibs ${pcsx2FinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib64/libjitprofiling.a)
endif()
endif()
# Suppress all the system-specific predefined macros outside the reserved namespace. # Suppress all the system-specific predefined macros outside the reserved namespace.
# Needed when stringifying macros. # Needed when stringifying macros.
# See issue: 1233 # See issue: 1233

View File

@ -170,7 +170,11 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
u8* retval = xGetPtr(); u8* retval = xGetPtr();
{ // Properly scope the frame prologue/epilogue { // Properly scope the frame prologue/epilogue
#ifdef ENABLE_VTUNE
xScopedStackFrame frame(true);
#else
xScopedStackFrame frame(IsDevBuild); xScopedStackFrame frame(IsDevBuild);
#endif
xJMP((void*)iopDispatcherReg); xJMP((void*)iopDispatcherReg);

View File

@ -393,7 +393,11 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
u8* retval = xGetAlignedCallTarget(); u8* retval = xGetAlignedCallTarget();
{ // Properly scope the frame prologue/epilogue { // Properly scope the frame prologue/epilogue
#ifdef ENABLE_VTUNE
xScopedStackFrame frame(true);
#else
xScopedStackFrame frame(IsDevBuild); xScopedStackFrame frame(IsDevBuild);
#endif
xJMP((void*)DispatcherReg); xJMP((void*)DispatcherReg);

View File

@ -21,6 +21,7 @@
#include "iCore.h" #include "iCore.h"
#include "iR5900.h" #include "iR5900.h"
#include "Utilities/Perf.h"
using namespace vtlb_private; using namespace vtlb_private;
using namespace x86Emitter; using namespace x86Emitter;
@ -333,6 +334,8 @@ void vtlb_dynarec_init()
} }
HostSys::MemProtectStatic( m_IndirectDispatchers, PageAccess_ExecOnly() ); HostSys::MemProtectStatic( m_IndirectDispatchers, PageAccess_ExecOnly() );
Perf::any.map((uptr)m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////

View File

@ -35,6 +35,7 @@ using namespace x86Emitter;
#include "microVU_Misc.h" #include "microVU_Misc.h"
#include "microVU_IR.h" #include "microVU_IR.h"
#include "microVU_Profiler.h" #include "microVU_Profiler.h"
#include "Utilities/Perf.h"
struct microBlockLink { struct microBlockLink {
microBlock block; microBlock block;

View File

@ -535,7 +535,7 @@ void mVUSaveFlags(microVU& mVU,microFlagCycles &mFC, microFlagCycles &mFCBackup)
mVUsetFlags(mVU, mFCBackup); // Sets Up Flag instances mVUsetFlags(mVU, mFCBackup); // Sets Up Flag instances
} }
void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
microFlagCycles mFC; microFlagCycles mFC;
u8* thisPtr = x86Ptr; u8* thisPtr = x86Ptr;
const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU.microMemSize / 8); const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU.microMemSize / 8);
@ -597,7 +597,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
// Fix up vi15 const info for propagation through blocks // Fix up vi15 const info for propagation through blocks
mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0; mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0;
mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0; mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0;
mVUsetFlags(mVU, mFC); // Sets Up Flag instances mVUsetFlags(mVU, mFC); // Sets Up Flag instances
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging... mVUdebugPrintBlocks(mVU, false); // Prints Start/End PC of blocks executed, for debugging...
@ -653,10 +653,13 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
} }
} }
if ((x == endCount) && (x!=1)) { Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index); } if ((x == endCount) && (x!=1)) { Console.Error("microVU%d: Possible infinite compiling loop!", mVU.index); }
// E-bit End // E-bit End
mVUsetupRange(mVU, xPC-8, false); mVUsetupRange(mVU, xPC-8, false);
mVUendProgram(mVU, &mFC, 1); mVUendProgram(mVU, &mFC, 1);
Perf::vu.map((uptr)thisPtr, x86Ptr - thisPtr, startPC);
return thisPtr; return thisPtr;
} }

View File

@ -20,6 +20,7 @@
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "newVif_UnpackSSE.h" #include "newVif_UnpackSSE.h"
#include "MTVU.h" #include "MTVU.h"
#include "Utilities/Perf.h"
void dVifReserve(int idx) { void dVifReserve(int idx) {
if(!nVif[idx].recReserve) if(!nVif[idx].recReserve)
@ -352,6 +353,8 @@ _vifT __fi void dVifUnpack(const u8* data, bool isFill) {
v.block.startPtr = (uptr)xGetAlignedCallTarget(); v.block.startPtr = (uptr)xGetAlignedCallTarget();
v.vifBlocks->add(v.block); v.vifBlocks->add(v.block);
VifUnpackSSE_Dynarec(v, v.block).CompileRoutine(); VifUnpackSSE_Dynarec(v, v.block).CompileRoutine();
Perf::vif.map((uptr)v.recWritePtr, xGetPtr() - v.recWritePtr, v.block.upkType /* FIXME ideally a key*/);
nVif[idx].recWritePtr = xGetPtr(); nVif[idx].recWritePtr = xGetPtr();
dVifRecLimit(idx); dVifRecLimit(idx);

View File

@ -206,8 +206,11 @@ endif()
if(USE_VTUNE) if(USE_VTUNE)
set(GSdxFinalFlags ${GSdxFinalFlags} -DENABLE_VTUNE) set(GSdxFinalFlags ${GSdxFinalFlags} -DENABLE_VTUNE)
include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include") include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include")
set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib64/libjitprofiling.a) if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386")
set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib32/libjitprofiling.a) set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib32/libjitprofiling.a)
else()
set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib64/libjitprofiling.a)
endif()
endif() endif()
# Generate Glsl header file. Protect with REBUILD_SHADER to avoid build-dependency on PERL # Generate Glsl header file. Protect with REBUILD_SHADER to avoid build-dependency on PERL