Merge pull request #1690 from PCSX2/greg/vtune

Greg/vtune
This commit is contained in:
Gregory Hainaut 2016-12-10 11:25:58 +01:00 committed by GitHub
commit 40ac87c9bc
19 changed files with 130 additions and 210 deletions

View File

@ -33,6 +33,9 @@ Specifies the config folder. It applies to pcsx2 + plugins.
forces running of the First-time Wizard.
.TP
.B --profiling
eases running from profiler tool such as Vtune
.TP
.SH AUTO-RUN OPTIONS
.TP

View File

@ -303,6 +303,9 @@ set(COMMON_FLAG "-pipe -fvisibility=hidden -pthread -fno-builtin-strcmp -fno-bui
if (DISABLE_SVU)
set(COMMON_FLAG "${COMMON_FLAG} -DDISABLE_SVU")
endif()
if(USE_VTUNE)
set(COMMON_FLAG "${COMMON_FLAG} -DENABLE_VTUNE")
endif()
set(HARDENING_FLAG "-D_FORTIFY_SOURCE=2 -Wformat -Wformat-security")
# -Wno-attributes: "always_inline function might not be inlinable" <= real spam (thousand of warnings!!!)
# -Wno-missing-field-initializers: standard allow to init only the begin of struct/array in static init. Just a silly warning.

33
cmake/FindVtune.cmake Normal file
View File

@ -0,0 +1,33 @@
# Find Intel's VTUNE tool
# VTUNE_FOUND found Vtune
# VTUNE_INCLUDE_DIRS include path to jitprofiling.h
# VTUNE_LIBRARIES path to vtune libs
find_path(VTUNE_INCLUDE_DIRS NAMES jitprofiling.h PATHS
/opt/intel/vtune_amplifier_xe_2018/include
/opt/intel/vtune_amplifier_xe_2017/include
/opt/intel/vtune_amplifier_xe_2016/include
)
if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386")
find_library(VTUNE_LIBRARIES NAMES libjitprofiling.a PATHS
/opt/intel/vtune_amplifier_xe_2018/lib32
/opt/intel/vtune_amplifier_xe_2017/lib32
/opt/intel/vtune_amplifier_xe_2016/lib32
)
else()
find_library(VTUNE_LIBRARIES NAMES libjitprofiling.a PATHS
/opt/intel/vtune_amplifier_xe_2018/lib64
/opt/intel/vtune_amplifier_xe_2017/lib64
/opt/intel/vtune_amplifier_xe_2016/lib64
)
endif()
# handle the QUIETLY and REQUIRED arguments and set VTUNE_FOUND to TRUE if
# all listed variables are TRUE
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Vtune DEFAULT_MSG VTUNE_LIBRARIES VTUNE_INCLUDE_DIRS)
mark_as_advanced(VTUNE_FOUND VTUNE_INCLUDE_DIRS VTUNE_LIBRARIES)

View File

@ -12,6 +12,7 @@ endif()
find_package(LibLZMA)
find_package(OpenGL)
find_package(PNG)
find_package(Vtune)
# The requirement of wxWidgets is checked in SelectPcsx2Plugins module
# Does not require the module (allow to compile non-wx plugins)
# Force the unicode build (the variable is only supported on cmake 2.8.3 and above)
@ -156,6 +157,10 @@ if(SDL_FOUND AND NOT SDL2_API)
include_directories(${SDL_INCLUDE_DIR})
endif()
if(USE_VTUNE AND VTUNE_FOUND)
include_directories(${VTUNE_INCLUDE_DIRS})
endif()
if(wxWidgets_FOUND)
include(${wxWidgets_USE_FILE})
endif()

View File

@ -215,63 +215,6 @@ protected:
virtual void ReprotectCommittedBlocks(const PageProtectionMode &newmode);
};
// --------------------------------------------------------------------------------------
// BaseVmReserveListener
// --------------------------------------------------------------------------------------
class BaseVmReserveListener : public VirtualMemoryReserve
{
DeclareNoncopyableObject(BaseVmReserveListener);
typedef VirtualMemoryReserve _parent;
protected:
EventListenerHelper_PageFault<BaseVmReserveListener> m_pagefault_listener;
// Incremental size by which the buffer grows (in pages)
uptr m_blocksize;
public:
BaseVmReserveListener(const wxString &name, size_t size = 0);
virtual ~BaseVmReserveListener() throw() {}
operator void *() { return m_baseptr; }
operator const void *() const { return m_baseptr; }
operator u8 *() { return (u8 *)m_baseptr; }
operator const u8 *() const { return (u8 *)m_baseptr; }
using _parent::operator[];
void OnPageFaultEvent(const PageFaultInfo &info, bool &handled);
virtual uptr SetBlockSize(uptr bytes)
{
m_blocksize = (bytes + __pagesize - 1) / __pagesize;
return m_blocksize * __pagesize;
}
virtual void Reset()
{
_parent::Reset();
}
protected:
// This function is called from OnPageFaultEvent after the address has been translated
// and confirmed to apply to this reserved area in question. OnPageFaultEvent contains
// a try/catch exception handler, which ensures "reasonable" error response behavior if
// this function throws exceptions.
//
// Important: This method is called from the context of an exception/signal handler. On
// Windows this isn't a big deal (most operations are ok). On Linux, however, logging
// and other facilities are probably not a good idea.
virtual void DoCommitAndProtect(uptr offset) = 0;
// This function is called for every committed block.
virtual void OnCommittedBlock(void *block) = 0;
virtual void CommitBlocks(uptr page, uint blocks);
};
#ifdef __POSIX__
#define PCSX2_PAGEFAULT_PROTECT

View File

@ -36,6 +36,7 @@ class InfoVector
{
std::vector<Info> m_v;
char m_prefix[20];
unsigned int m_vtune_id;
public:
InfoVector(const char *prefix);

View File

@ -107,10 +107,5 @@ set(UtilitiesFinalLibs
${wxWidgets_LIBRARIES}
)
if(USE_VTUNE)
set(UtilitiesFinalFlags ${UtilitiesFinalFlags} -DENABLE_VTUNE)
include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include")
endif()
add_pcsx2_lib(${Output} "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags}")
add_pcsx2_lib(${Output}_NO_TLS "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags} -DPCSX2_THREAD_LOCAL=0")

View File

@ -76,6 +76,11 @@ void Info::Print(FILE *fp)
InfoVector::InfoVector(const char *prefix)
{
strncpy(m_prefix, prefix, sizeof(m_prefix));
#ifdef ENABLE_VTUNE
m_vtune_id = iJIT_GetNewMethodID();
#else
m_vtune_id = 0;
#endif
}
void InfoVector::print(FILE *fp)
@ -90,18 +95,16 @@ void InfoVector::map(uptr x86, u32 size, const char *symbol)
// Dispatchers are on a page and must always be kept.
// Recompilers are much bigger (TODO check VIF) and are only
// useful when MERGE_BLOCK_RESULT is defined
#ifdef MERGE_BLOCK_RESULT
m_v.emplace_back(x86, size, symbol);
#if defined(ENABLE_VTUNE) || !defined(MERGE_BLOCK_RESULT)
u32 max_code_size = 16 * _1kb;
#else
if (size < 8 * _1kb)
m_v.emplace_back(x86, size, symbol);
u32 max_code_size = _1gb;
#endif
if (size < max_code_size) {
m_v.emplace_back(x86, size, symbol);
#ifdef ENABLE_VTUNE
// mapping the full recompiler will blow up VTUNE
if (size < _16kb) {
fprintf(stderr, "map %s: %p size %d\n", symbol, (void *)x86, size);
std::string name = std::string(symbol);
iJIT_Method_Load ml;
@ -114,9 +117,11 @@ void InfoVector::map(uptr x86, u32 size, const char *symbol)
ml.method_size = size;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
}
//fprintf(stderr, "mapF %s: %p size %dKB\n", ml.method_name, ml.method_load_address, ml.method_size / 1024u);
#endif
}
}
void InfoVector::map(uptr x86, u32 size, u32 pc)
{
@ -125,19 +130,24 @@ void InfoVector::map(uptr x86, u32 size, u32 pc)
#endif
#ifdef ENABLE_VTUNE
std::string name = std::string(m_prefix) + "_" + std::to_string(pc);
//fprintf(stderr, "map %s: %p size %d\n", name.c_str(), (void*)x86, size);
iJIT_Method_Load ml;
iJIT_Method_Load_V2 ml;
memset(&ml, 0, sizeof(ml));
#ifdef MERGE_BLOCK_RESULT
ml.method_id = m_vtune_id;
ml.method_name = m_prefix;
#else
std::string name = std::string(m_prefix) + "_" + std::to_string(pc);
ml.method_id = iJIT_GetNewMethodID();
ml.method_name = (char *)name.c_str();
#endif
ml.method_load_address = (void *)x86;
ml.method_size = size;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED_V2, &ml);
//fprintf(stderr, "mapB %s: %p size %d\n", ml.method_name, ml.method_load_address, ml.method_size);
#endif
}

View File

@ -257,80 +257,6 @@ bool VirtualMemoryReserve::TryResize(uint newsize)
return true;
}
// --------------------------------------------------------------------------------------
// BaseVmReserveListener (implementations)
// --------------------------------------------------------------------------------------
BaseVmReserveListener::BaseVmReserveListener(const wxString &name, size_t size)
: VirtualMemoryReserve(name, size)
, m_pagefault_listener(this)
{
m_blocksize = __pagesize;
}
void BaseVmReserveListener::CommitBlocks(uptr page, uint blocks)
{
const uptr blocksbytes = blocks * m_blocksize * __pagesize;
void *blockptr = (u8 *)m_baseptr + (page * __pagesize);
// Depending on the operating system, this call could fail if the system is low on either
// physical ram or virtual memory.
if (!HostSys::MmapCommitPtr(blockptr, blocksbytes, m_prot_mode)) {
throw Exception::OutOfMemory(m_name)
.SetDiagMsg(pxsFmt("An additional %u blocks @ 0x%08x were requested, but could not be committed!", blocks, blockptr));
}
u8 *init = (u8 *)blockptr;
u8 *endpos = init + blocksbytes;
for (; init < endpos; init += m_blocksize * __pagesize)
OnCommittedBlock(init);
m_pages_commited += m_blocksize * blocks;
}
void BaseVmReserveListener::OnPageFaultEvent(const PageFaultInfo &info, bool &handled)
{
sptr offset = (info.addr - (uptr)m_baseptr) / __pagesize;
if ((offset < 0) || ((uptr)offset >= m_pages_reserved))
return;
if (!m_allow_writes) {
pxFailRel(pxsFmt(
L"Memory Protection Fault @ %ls (%s)\n"
L"Modification of this reserve has been disabled (m_allow_writes == false).",
pxsPtr(info.addr), WX_STR(m_name)));
return;
}
// Linux Note! the SIGNAL handler is very limited in what it can do, and not only can't
// we let the C++ exception try to unwind the stack, we may not be able to log it either.
// (but we might as well try -- kernel/posix rules says not to do it, but Linux kernel
// implementations seem to support it).
// Note also that logging the exception and/or issuing an assertion dialog are always
// possible if the thread handling the signal is not the main thread.
// In windows we can let exceptions bubble out of the page fault handler. SEH will more
// or less handle them in a semi-expected way, and might even avoid a GPF long enough
// for the system to log the error or something.
#ifndef __WXMSW__
try {
#endif
DoCommitAndProtect(offset);
handled = true;
#ifndef __WXMSW__
} catch (Exception::BaseException &ex) {
handled = false;
if (!wxThread::IsMain()) {
pxFailRel(ex.FormatDiagnosticMessage());
} else {
pxTrap();
}
}
#endif
}
// --------------------------------------------------------------------------------------
// PageProtectionMode (implementations)
// --------------------------------------------------------------------------------------

View File

@ -682,13 +682,7 @@ foreach(res_file IN ITEMS
endforeach()
if(USE_VTUNE)
set(pcsx2FinalFlags ${pcsx2FinalFlags} -DENABLE_VTUNE)
include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include")
if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386")
set(pcsx2FinalLibs ${pcsx2FinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib32/libjitprofiling.a)
else()
set(pcsx2FinalLibs ${pcsx2FinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib64/libjitprofiling.a)
endif()
set(pcsx2FinalLibs ${pcsx2FinalLibs} ${VTUNE_LIBRARIES})
endif()
# Suppress all the system-specific predefined macros outside the reserved namespace.

View File

@ -536,6 +536,24 @@ void BC0TL() {
}
void ERET() {
#ifdef ENABLE_VTUNE
// Allow to stop vtune in a predictable way to compare runs
// Of course, the limit will depend on the game.
const u32 million = 1000 * 1000;
static u32 vtune = 0;
vtune++;
// quick_exit vs exit: quick_exit won't call static storage destructor (OS will manage). It helps
// avoiding the race condition between threads destruction.
if (vtune > 30 * million) {
Console.WriteLn("VTUNE: quick_exit");
std::quick_exit(EXIT_SUCCESS);
} else if (!(vtune % million)) {
Console.WriteLn("VTUNE: ERET was called %uM times", vtune/million);
}
#endif
if (cpuRegs.CP0.n.Status.b.ERL) {
cpuRegs.pc = cpuRegs.CP0.n.ErrorEPC;
cpuRegs.CP0.n.Status.b.ERL = 0;

View File

@ -36,11 +36,9 @@
// Parameters:
// name - a nice long name that accurately describes the contents of this reserve.
RecompiledCodeReserve::RecompiledCodeReserve( const wxString& name, uint defCommit )
: BaseVmReserveListener( name )
: VirtualMemoryReserve( name, defCommit )
{
m_blocksize = (1024 * 128) / __pagesize;
m_prot_mode = PageAccess_Any();
m_def_commit = defCommit / __pagesize;
}
RecompiledCodeReserve::~RecompiledCodeReserve() throw()
@ -59,19 +57,13 @@ void RecompiledCodeReserve::_termProfiler()
{
}
uint RecompiledCodeReserve::_calcDefaultCommitInBlocks() const
{
return (m_def_commit + m_blocksize - 1) / m_blocksize;
}
void* RecompiledCodeReserve::Reserve( size_t size, uptr base, uptr upper_bounds )
{
if (!_parent::Reserve(size, base, upper_bounds)) return NULL;
_registerProfiler();
// Pre-Allocate the first block (to reduce the number of segmentation fault
// in debugger)
DoCommitAndProtect(0);
Commit();
_registerProfiler();
return m_baseptr;
}
@ -80,11 +72,24 @@ void RecompiledCodeReserve::Reset()
{
_parent::Reset();
// Pre-Allocate the first block (to reduce the number of segmentation fault
// in debugger)
DoCommitAndProtect(0);
Commit();
}
bool RecompiledCodeReserve::Commit()
{
bool status = _parent::Commit();
if (IsDevBuild && m_baseptr)
{
// Clear the recompiled code block to 0xcc (INT3) -- this helps disasm tools show
// the assembly dump more cleanly. We don't clear the block on Release builds since
// it can add a noticeable amount of overhead to large block recompilations.
memset(m_baseptr, 0xCC, m_pages_commited * __pagesize);
}
return status;
}
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long.
// After a name has been set, a profiler source will be automatically registered and cleared
@ -96,23 +101,6 @@ RecompiledCodeReserve& RecompiledCodeReserve::SetProfilerName( const wxString& s
return *this;
}
void RecompiledCodeReserve::DoCommitAndProtect( uptr page )
{
CommitBlocks(page, (m_pages_commited || !m_def_commit) ? 1 : _calcDefaultCommitInBlocks() );
}
void RecompiledCodeReserve::OnCommittedBlock( void* block )
{
if (IsDevBuild)
{
// Clear the recompiled code block to 0xcc (INT3) -- this helps disasm tools show
// the assembly dump more cleanly. We don't clear the block on Release builds since
// it can add a noticeable amount of overhead to large block recompilations.
memset(block, 0xCC, m_blocksize * __pagesize);
}
}
// This error message is shared by R5900, R3000, and microVU recompilers. It is not used by the
// SuperVU recompiler, since it has its own customized message.
void RecompiledCodeReserve::ThrowIfNotOk() const

View File

@ -23,16 +23,11 @@
// A recompiled code reserve is a simple sequential-growth block of memory which is auto-
// cleared to INT 3 (0xcc) as needed.
//
class RecompiledCodeReserve : public BaseVmReserveListener
class RecompiledCodeReserve : public VirtualMemoryReserve
{
typedef BaseVmReserveListener _parent;
typedef VirtualMemoryReserve _parent;
protected:
// Specifies the number of blocks that should be committed automatically when the
// reserve is created. Typically this chunk is larger than the block size, and
// should be based on whatever typical overhead is needed for basic block use.
uint m_def_commit;
wxString m_profiler_name;
public:
@ -40,8 +35,8 @@ public:
virtual ~RecompiledCodeReserve() throw();
virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 );
virtual void OnCommittedBlock( void* block );
virtual void Reset();
virtual bool Commit();
virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname );
virtual RecompiledCodeReserve& SetProfilerName( const char* shortname )
@ -59,10 +54,7 @@ public:
protected:
void ResetProcessReserves() const;
void DoCommitAndProtect( uptr page );
void _registerProfiler();
void _termProfiler();
uint _calcDefaultCommitInBlocks() const;
};

View File

@ -324,6 +324,7 @@ public:
wxFileName VmSettingsFile;
bool DisableSpeedhacks;
bool ProfilingMode;
// Note that gamefixes in this array should only be honored if the
// "HasCustomGamefixes" boolean is also enabled.
@ -338,6 +339,7 @@ public:
DisableSpeedhacks = false;
ApplyCustomGamefixes = false;
GsWindowMode = GsWinMode_Unspecified;
ProfilingMode = false;
}
// Returns TRUE if either speedhacks or gamefixes are being overridden.

View File

@ -353,6 +353,12 @@ static void _ApplySettings( const Pcsx2Config& src, Pcsx2Config& fixup )
else if( !g_Conf->EnableGameFixes )
fixup.Gamefixes.DisableAll();
if( overrides.ProfilingMode )
{
fixup.GS.FrameLimitEnable = false;
fixup.GS.VsyncEnable = false;
}
wxString gameCRC;
wxString gameSerial;
wxString gamePatch;

View File

@ -249,6 +249,8 @@ void Pcsx2App::OnInitCmdLine( wxCmdLineParser& parser )
parser.AddSwitch( wxEmptyString,L"forcewiz", AddAppName(_("forces %s to start the First-time Wizard")) );
parser.AddSwitch( wxEmptyString,L"portable", _("enables portable mode operation (requires admin/root access)") );
parser.AddSwitch( wxEmptyString,L"profiling", _("update options to ease profiling (debug)") );
const PluginInfo* pi = tbl_PluginInfo; do {
parser.AddOption( wxEmptyString, pi->GetShortname().Lower(),
pxsFmt( _("specify the file to use as the %s plugin"), WX_STR(pi->GetShortname()) )
@ -282,6 +284,8 @@ bool Pcsx2App::ParseOverrides( wxCmdLineParser& parser )
Overrides.DisableSpeedhacks = parser.Found(L"nohacks");
Overrides.ProfilingMode = parser.Found(L"profiling");
if (parser.Found(L"gamefixes", &dest))
{
Overrides.ApplyCustomGamefixes = true;

View File

@ -157,6 +157,10 @@ void Panels::FramelimiterPanel::Apply()
}
appfps.SanityCheck();
// If the user has a command line override specified, we need to disable it
// so that their changes take effect
wxGetApp().Overrides.ProfilingMode = false;
}
// --------------------------------------------------------------------------------------

View File

@ -420,7 +420,6 @@ void VifUnpackSSE_Init()
nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions", _64kb);
nVifUpkExec->SetProfilerName("iVIF-SSE");
nVifUpkExec->SetBlockSize( 1 );
nVifUpkExec->Reserve( _64kb );
nVifUpkExec->ThrowIfNotOk();

View File

@ -204,13 +204,7 @@ if(LIBLZMA_FOUND)
endif()
if(USE_VTUNE)
set(GSdxFinalFlags ${GSdxFinalFlags} -DENABLE_VTUNE)
include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include")
if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386")
set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib32/libjitprofiling.a)
else()
set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib64/libjitprofiling.a)
endif()
set(GSdxFinalLibs ${GSdxFinalLibs} ${VTUNE_LIBRARIES})
endif()
# Generate Glsl header file. Protect with REBUILD_SHADER to avoid build-dependency on PERL