From 0453e5cad8f6772ca84e1a4ab160e41443e965d9 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 4 Dec 2016 18:46:02 +0100 Subject: [PATCH 1/5] cmake: improve vtune integration Year is included in the path so search in order 2018/2017/2016 Not ideal but at least all logic is inside the FindVtune module --- cmake/BuildParameters.cmake | 3 +++ cmake/FindVtune.cmake | 33 +++++++++++++++++++++++++++++ cmake/SearchForStuff.cmake | 5 +++++ common/src/Utilities/CMakeLists.txt | 5 ----- pcsx2/CMakeLists.txt | 8 +------ plugins/GSdx/CMakeLists.txt | 8 +------ 6 files changed, 43 insertions(+), 19 deletions(-) create mode 100644 cmake/FindVtune.cmake diff --git a/cmake/BuildParameters.cmake b/cmake/BuildParameters.cmake index 29b9459c9d..13c89dcd31 100644 --- a/cmake/BuildParameters.cmake +++ b/cmake/BuildParameters.cmake @@ -303,6 +303,9 @@ set(COMMON_FLAG "-pipe -fvisibility=hidden -pthread -fno-builtin-strcmp -fno-bui if (DISABLE_SVU) set(COMMON_FLAG "${COMMON_FLAG} -DDISABLE_SVU") endif() +if(USE_VTUNE) + set(COMMON_FLAG "${COMMON_FLAG} -DENABLE_VTUNE") +endif() set(HARDENING_FLAG "-D_FORTIFY_SOURCE=2 -Wformat -Wformat-security") # -Wno-attributes: "always_inline function might not be inlinable" <= real spam (thousand of warnings!!!) # -Wno-missing-field-initializers: standard allow to init only the begin of struct/array in static init. Just a silly warning. diff --git a/cmake/FindVtune.cmake b/cmake/FindVtune.cmake new file mode 100644 index 0000000000..dc5c0a14a5 --- /dev/null +++ b/cmake/FindVtune.cmake @@ -0,0 +1,33 @@ +# Find Intel's VTUNE tool + +# VTUNE_FOUND found Vtune +# VTUNE_INCLUDE_DIRS include path to jitprofiling.h +# VTUNE_LIBRARIES path to vtune libs + +find_path(VTUNE_INCLUDE_DIRS NAMES jitprofiling.h PATHS + /opt/intel/vtune_amplifier_xe_2018/include + /opt/intel/vtune_amplifier_xe_2017/include + /opt/intel/vtune_amplifier_xe_2016/include + ) + +if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386") + find_library(VTUNE_LIBRARIES NAMES libjitprofiling.a PATHS + /opt/intel/vtune_amplifier_xe_2018/lib32 + /opt/intel/vtune_amplifier_xe_2017/lib32 + /opt/intel/vtune_amplifier_xe_2016/lib32 + ) +else() + find_library(VTUNE_LIBRARIES NAMES libjitprofiling.a PATHS + /opt/intel/vtune_amplifier_xe_2018/lib64 + /opt/intel/vtune_amplifier_xe_2017/lib64 + /opt/intel/vtune_amplifier_xe_2016/lib64 + ) +endif() + +# handle the QUIETLY and REQUIRED arguments and set VTUNE_FOUND to TRUE if +# all listed variables are TRUE +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Vtune DEFAULT_MSG VTUNE_LIBRARIES VTUNE_INCLUDE_DIRS) + +mark_as_advanced(VTUNE_FOUND VTUNE_INCLUDE_DIRS VTUNE_LIBRARIES) + diff --git a/cmake/SearchForStuff.cmake b/cmake/SearchForStuff.cmake index ec4f0e89fc..94c91fd7c8 100644 --- a/cmake/SearchForStuff.cmake +++ b/cmake/SearchForStuff.cmake @@ -12,6 +12,7 @@ endif() find_package(LibLZMA) find_package(OpenGL) find_package(PNG) +find_package(Vtune) # The requirement of wxWidgets is checked in SelectPcsx2Plugins module # Does not require the module (allow to compile non-wx plugins) # Force the unicode build (the variable is only supported on cmake 2.8.3 and above) @@ -156,6 +157,10 @@ if(SDL_FOUND AND NOT SDL2_API) include_directories(${SDL_INCLUDE_DIR}) endif() +if(USE_VTUNE AND VTUNE_FOUND) + include_directories(${VTUNE_INCLUDE_DIRS}) +endif() + if(wxWidgets_FOUND) include(${wxWidgets_USE_FILE}) endif() diff --git a/common/src/Utilities/CMakeLists.txt b/common/src/Utilities/CMakeLists.txt index 4def9fa953..86fc6bcca5 100644 --- a/common/src/Utilities/CMakeLists.txt +++ b/common/src/Utilities/CMakeLists.txt @@ -107,10 +107,5 @@ set(UtilitiesFinalLibs ${wxWidgets_LIBRARIES} ) -if(USE_VTUNE) - set(UtilitiesFinalFlags ${UtilitiesFinalFlags} -DENABLE_VTUNE) - include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include") -endif() - add_pcsx2_lib(${Output} "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags}") add_pcsx2_lib(${Output}_NO_TLS "${UtilitiesFinalSources}" "${UtilitiesFinalLibs}" "${UtilitiesFinalFlags} -DPCSX2_THREAD_LOCAL=0") diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index aa0e8dfbe6..f8c36b269a 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -682,13 +682,7 @@ foreach(res_file IN ITEMS endforeach() if(USE_VTUNE) - set(pcsx2FinalFlags ${pcsx2FinalFlags} -DENABLE_VTUNE) - include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include") - if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386") - set(pcsx2FinalLibs ${pcsx2FinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib32/libjitprofiling.a) - else() - set(pcsx2FinalLibs ${pcsx2FinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib64/libjitprofiling.a) - endif() + set(pcsx2FinalLibs ${pcsx2FinalLibs} ${VTUNE_LIBRARIES}) endif() # Suppress all the system-specific predefined macros outside the reserved namespace. diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index e98c5ac28c..886682eb84 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -204,13 +204,7 @@ if(LIBLZMA_FOUND) endif() if(USE_VTUNE) - set(GSdxFinalFlags ${GSdxFinalFlags} -DENABLE_VTUNE) - include_directories("$ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/include") - if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386") - set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib32/libjitprofiling.a) - else() - set(GSdxFinalLibs ${GSdxFinalLibs} $ENV{VTUNE_AMPLIFIER_XE_2016_DIR}/lib64/libjitprofiling.a) - endif() + set(GSdxFinalLibs ${GSdxFinalLibs} ${VTUNE_LIBRARIES}) endif() # Generate Glsl header file. Protect with REBUILD_SHADER to avoid build-dependency on PERL From 903d3595e5923d6a3a9c3493dff25b41eff0ba4e Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 4 Dec 2016 19:59:24 +0100 Subject: [PATCH 2/5] pcsx2: add a --profiling cli option Disable Framelimiter and Vsync So you can profile real data instead of the idle time between vsync ;) --- bin/docs/PCSX2.1 | 5 ++++- pcsx2/gui/App.h | 2 ++ pcsx2/gui/AppCoreThread.cpp | 6 ++++++ pcsx2/gui/AppInit.cpp | 4 ++++ pcsx2/gui/Panels/VideoPanel.cpp | 4 ++++ 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/bin/docs/PCSX2.1 b/bin/docs/PCSX2.1 index c39c2fc449..e6c2b40e32 100644 --- a/bin/docs/PCSX2.1 +++ b/bin/docs/PCSX2.1 @@ -33,6 +33,9 @@ Specifies the config folder. It applies to pcsx2 + plugins. forces running of the First-time Wizard. .TP +.B --profiling +eases running from profiler tool such as Vtune +.TP .SH AUTO-RUN OPTIONS .TP @@ -76,7 +79,7 @@ Disables the quick boot feature, forcing you to sit through the PS2 startup spla Override for the CDVD plugin. .TP -.B --gs=[libpath] +.B --gs=[libpath] Override for the GS plugin. .TP diff --git a/pcsx2/gui/App.h b/pcsx2/gui/App.h index 0aac08b943..2c09285d30 100644 --- a/pcsx2/gui/App.h +++ b/pcsx2/gui/App.h @@ -324,6 +324,7 @@ public: wxFileName VmSettingsFile; bool DisableSpeedhacks; + bool ProfilingMode; // Note that gamefixes in this array should only be honored if the // "HasCustomGamefixes" boolean is also enabled. @@ -338,6 +339,7 @@ public: DisableSpeedhacks = false; ApplyCustomGamefixes = false; GsWindowMode = GsWinMode_Unspecified; + ProfilingMode = false; } // Returns TRUE if either speedhacks or gamefixes are being overridden. diff --git a/pcsx2/gui/AppCoreThread.cpp b/pcsx2/gui/AppCoreThread.cpp index ca0b0bda08..535459f399 100644 --- a/pcsx2/gui/AppCoreThread.cpp +++ b/pcsx2/gui/AppCoreThread.cpp @@ -353,6 +353,12 @@ static void _ApplySettings( const Pcsx2Config& src, Pcsx2Config& fixup ) else if( !g_Conf->EnableGameFixes ) fixup.Gamefixes.DisableAll(); + if( overrides.ProfilingMode ) + { + fixup.GS.FrameLimitEnable = false; + fixup.GS.VsyncEnable = false; + } + wxString gameCRC; wxString gameSerial; wxString gamePatch; diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp index 4353b88980..af52fa81ca 100644 --- a/pcsx2/gui/AppInit.cpp +++ b/pcsx2/gui/AppInit.cpp @@ -249,6 +249,8 @@ void Pcsx2App::OnInitCmdLine( wxCmdLineParser& parser ) parser.AddSwitch( wxEmptyString,L"forcewiz", AddAppName(_("forces %s to start the First-time Wizard")) ); parser.AddSwitch( wxEmptyString,L"portable", _("enables portable mode operation (requires admin/root access)") ); + parser.AddSwitch( wxEmptyString,L"profiling", _("update options to ease profiling (debug)") ); + const PluginInfo* pi = tbl_PluginInfo; do { parser.AddOption( wxEmptyString, pi->GetShortname().Lower(), pxsFmt( _("specify the file to use as the %s plugin"), WX_STR(pi->GetShortname()) ) @@ -282,6 +284,8 @@ bool Pcsx2App::ParseOverrides( wxCmdLineParser& parser ) Overrides.DisableSpeedhacks = parser.Found(L"nohacks"); + Overrides.ProfilingMode = parser.Found(L"profiling"); + if (parser.Found(L"gamefixes", &dest)) { Overrides.ApplyCustomGamefixes = true; diff --git a/pcsx2/gui/Panels/VideoPanel.cpp b/pcsx2/gui/Panels/VideoPanel.cpp index 22895747ac..b7e4acdfb0 100644 --- a/pcsx2/gui/Panels/VideoPanel.cpp +++ b/pcsx2/gui/Panels/VideoPanel.cpp @@ -157,6 +157,10 @@ void Panels::FramelimiterPanel::Apply() } appfps.SanityCheck(); + + // If the user has a command line override specified, we need to disable it + // so that their changes take effect + wxGetApp().Overrides.ProfilingMode = false; } // -------------------------------------------------------------------------------------- From b9369e7c00f73e3d9113e095dc8d46fbe3500569 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 4 Dec 2016 21:12:29 +0100 Subject: [PATCH 3/5] pcsx2: remove the reserve feature of recompiler memory Cons: * requires ~180MB of physical memory (virtual memory is the same so it doesn't impact the 4GB limit) From steam: 98.81% got at least 2GB of RAM. 83.62% got at least 4GB of RAM. That being said, it might not really increase RAM requirements as OS could put the new allocation in the swap. Pro: * code is much easier * remove at least half of the signal listener * last but not least, it is way easier for profiler/debugger --- common/include/Utilities/PageFaultSource.h | 57 ----------------- common/src/Utilities/VirtualMemory.cpp | 74 ---------------------- pcsx2/System.cpp | 52 ++++++--------- pcsx2/System/RecTypes.h | 16 ++--- pcsx2/x86/newVif_UnpackSSE.cpp | 1 - 5 files changed, 24 insertions(+), 176 deletions(-) diff --git a/common/include/Utilities/PageFaultSource.h b/common/include/Utilities/PageFaultSource.h index 631e839a68..212d804462 100644 --- a/common/include/Utilities/PageFaultSource.h +++ b/common/include/Utilities/PageFaultSource.h @@ -215,63 +215,6 @@ protected: virtual void ReprotectCommittedBlocks(const PageProtectionMode &newmode); }; -// -------------------------------------------------------------------------------------- -// BaseVmReserveListener -// -------------------------------------------------------------------------------------- -class BaseVmReserveListener : public VirtualMemoryReserve -{ - DeclareNoncopyableObject(BaseVmReserveListener); - - typedef VirtualMemoryReserve _parent; - -protected: - EventListenerHelper_PageFault m_pagefault_listener; - - // Incremental size by which the buffer grows (in pages) - uptr m_blocksize; - -public: - BaseVmReserveListener(const wxString &name, size_t size = 0); - virtual ~BaseVmReserveListener() throw() {} - - operator void *() { return m_baseptr; } - operator const void *() const { return m_baseptr; } - - operator u8 *() { return (u8 *)m_baseptr; } - operator const u8 *() const { return (u8 *)m_baseptr; } - - using _parent::operator[]; - - void OnPageFaultEvent(const PageFaultInfo &info, bool &handled); - - virtual uptr SetBlockSize(uptr bytes) - { - m_blocksize = (bytes + __pagesize - 1) / __pagesize; - return m_blocksize * __pagesize; - } - - virtual void Reset() - { - _parent::Reset(); - } - -protected: - // This function is called from OnPageFaultEvent after the address has been translated - // and confirmed to apply to this reserved area in question. OnPageFaultEvent contains - // a try/catch exception handler, which ensures "reasonable" error response behavior if - // this function throws exceptions. - // - // Important: This method is called from the context of an exception/signal handler. On - // Windows this isn't a big deal (most operations are ok). On Linux, however, logging - // and other facilities are probably not a good idea. - virtual void DoCommitAndProtect(uptr offset) = 0; - - // This function is called for every committed block. - virtual void OnCommittedBlock(void *block) = 0; - - virtual void CommitBlocks(uptr page, uint blocks); -}; - #ifdef __POSIX__ #define PCSX2_PAGEFAULT_PROTECT diff --git a/common/src/Utilities/VirtualMemory.cpp b/common/src/Utilities/VirtualMemory.cpp index 44f47f2137..946ca6e9ca 100644 --- a/common/src/Utilities/VirtualMemory.cpp +++ b/common/src/Utilities/VirtualMemory.cpp @@ -257,80 +257,6 @@ bool VirtualMemoryReserve::TryResize(uint newsize) return true; } -// -------------------------------------------------------------------------------------- -// BaseVmReserveListener (implementations) -// -------------------------------------------------------------------------------------- - -BaseVmReserveListener::BaseVmReserveListener(const wxString &name, size_t size) - : VirtualMemoryReserve(name, size) - , m_pagefault_listener(this) -{ - m_blocksize = __pagesize; -} - -void BaseVmReserveListener::CommitBlocks(uptr page, uint blocks) -{ - const uptr blocksbytes = blocks * m_blocksize * __pagesize; - void *blockptr = (u8 *)m_baseptr + (page * __pagesize); - - // Depending on the operating system, this call could fail if the system is low on either - // physical ram or virtual memory. - if (!HostSys::MmapCommitPtr(blockptr, blocksbytes, m_prot_mode)) { - throw Exception::OutOfMemory(m_name) - .SetDiagMsg(pxsFmt("An additional %u blocks @ 0x%08x were requested, but could not be committed!", blocks, blockptr)); - } - - u8 *init = (u8 *)blockptr; - u8 *endpos = init + blocksbytes; - for (; init < endpos; init += m_blocksize * __pagesize) - OnCommittedBlock(init); - - m_pages_commited += m_blocksize * blocks; -} - -void BaseVmReserveListener::OnPageFaultEvent(const PageFaultInfo &info, bool &handled) -{ - sptr offset = (info.addr - (uptr)m_baseptr) / __pagesize; - if ((offset < 0) || ((uptr)offset >= m_pages_reserved)) - return; - - if (!m_allow_writes) { - pxFailRel(pxsFmt( - L"Memory Protection Fault @ %ls (%s)\n" - L"Modification of this reserve has been disabled (m_allow_writes == false).", - pxsPtr(info.addr), WX_STR(m_name))); - return; - } - -// Linux Note! the SIGNAL handler is very limited in what it can do, and not only can't -// we let the C++ exception try to unwind the stack, we may not be able to log it either. -// (but we might as well try -- kernel/posix rules says not to do it, but Linux kernel -// implementations seem to support it). -// Note also that logging the exception and/or issuing an assertion dialog are always -// possible if the thread handling the signal is not the main thread. - -// In windows we can let exceptions bubble out of the page fault handler. SEH will more -// or less handle them in a semi-expected way, and might even avoid a GPF long enough -// for the system to log the error or something. - -#ifndef __WXMSW__ - try { -#endif - DoCommitAndProtect(offset); - handled = true; - -#ifndef __WXMSW__ - } catch (Exception::BaseException &ex) { - handled = false; - if (!wxThread::IsMain()) { - pxFailRel(ex.FormatDiagnosticMessage()); - } else { - pxTrap(); - } - } -#endif -} - // -------------------------------------------------------------------------------------- // PageProtectionMode (implementations) // -------------------------------------------------------------------------------------- diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index a988f0a63f..7fdc857100 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -36,11 +36,9 @@ // Parameters: // name - a nice long name that accurately describes the contents of this reserve. RecompiledCodeReserve::RecompiledCodeReserve( const wxString& name, uint defCommit ) - : BaseVmReserveListener( name ) + : VirtualMemoryReserve( name, defCommit ) { - m_blocksize = (1024 * 128) / __pagesize; m_prot_mode = PageAccess_Any(); - m_def_commit = defCommit / __pagesize; } RecompiledCodeReserve::~RecompiledCodeReserve() throw() @@ -59,19 +57,13 @@ void RecompiledCodeReserve::_termProfiler() { } -uint RecompiledCodeReserve::_calcDefaultCommitInBlocks() const -{ - return (m_def_commit + m_blocksize - 1) / m_blocksize; -} - void* RecompiledCodeReserve::Reserve( size_t size, uptr base, uptr upper_bounds ) { if (!_parent::Reserve(size, base, upper_bounds)) return NULL; - _registerProfiler(); - // Pre-Allocate the first block (to reduce the number of segmentation fault - // in debugger) - DoCommitAndProtect(0); + Commit(); + + _registerProfiler(); return m_baseptr; } @@ -80,11 +72,24 @@ void RecompiledCodeReserve::Reset() { _parent::Reset(); - // Pre-Allocate the first block (to reduce the number of segmentation fault - // in debugger) - DoCommitAndProtect(0); + Commit(); } +bool RecompiledCodeReserve::Commit() +{ + bool status = _parent::Commit(); + + if (IsDevBuild && m_baseptr) + { + // Clear the recompiled code block to 0xcc (INT3) -- this helps disasm tools show + // the assembly dump more cleanly. We don't clear the block on Release builds since + // it can add a noticeable amount of overhead to large block recompilations. + + memset(m_baseptr, 0xCC, m_pages_commited * __pagesize); + } + + return status; +} // Sets the abbreviated name used by the profiler. Name should be under 10 characters long. // After a name has been set, a profiler source will be automatically registered and cleared @@ -96,23 +101,6 @@ RecompiledCodeReserve& RecompiledCodeReserve::SetProfilerName( const wxString& s return *this; } -void RecompiledCodeReserve::DoCommitAndProtect( uptr page ) -{ - CommitBlocks(page, (m_pages_commited || !m_def_commit) ? 1 : _calcDefaultCommitInBlocks() ); -} - -void RecompiledCodeReserve::OnCommittedBlock( void* block ) -{ - if (IsDevBuild) - { - // Clear the recompiled code block to 0xcc (INT3) -- this helps disasm tools show - // the assembly dump more cleanly. We don't clear the block on Release builds since - // it can add a noticeable amount of overhead to large block recompilations. - - memset(block, 0xCC, m_blocksize * __pagesize); - } -} - // This error message is shared by R5900, R3000, and microVU recompilers. It is not used by the // SuperVU recompiler, since it has its own customized message. void RecompiledCodeReserve::ThrowIfNotOk() const diff --git a/pcsx2/System/RecTypes.h b/pcsx2/System/RecTypes.h index 91de2a6f73..906f282dc5 100644 --- a/pcsx2/System/RecTypes.h +++ b/pcsx2/System/RecTypes.h @@ -23,16 +23,11 @@ // A recompiled code reserve is a simple sequential-growth block of memory which is auto- // cleared to INT 3 (0xcc) as needed. // -class RecompiledCodeReserve : public BaseVmReserveListener +class RecompiledCodeReserve : public VirtualMemoryReserve { - typedef BaseVmReserveListener _parent; + typedef VirtualMemoryReserve _parent; protected: - // Specifies the number of blocks that should be committed automatically when the - // reserve is created. Typically this chunk is larger than the block size, and - // should be based on whatever typical overhead is needed for basic block use. - uint m_def_commit; - wxString m_profiler_name; public: @@ -40,15 +35,15 @@ public: virtual ~RecompiledCodeReserve() throw(); virtual void* Reserve( size_t size, uptr base=0, uptr upper_bounds=0 ); - virtual void OnCommittedBlock( void* block ); virtual void Reset(); + virtual bool Commit(); virtual RecompiledCodeReserve& SetProfilerName( const wxString& shortname ); virtual RecompiledCodeReserve& SetProfilerName( const char* shortname ) { return SetProfilerName( fromUTF8(shortname) ); } - + void ThrowIfNotOk() const; operator void*() { return m_baseptr; } @@ -59,10 +54,7 @@ public: protected: void ResetProcessReserves() const; - void DoCommitAndProtect( uptr page ); void _registerProfiler(); void _termProfiler(); - - uint _calcDefaultCommitInBlocks() const; }; diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index 179e87cc3a..e463443d0e 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -420,7 +420,6 @@ void VifUnpackSSE_Init() nVifUpkExec = new RecompiledCodeReserve(L"VIF SSE-optimized Unpacking Functions", _64kb); nVifUpkExec->SetProfilerName("iVIF-SSE"); - nVifUpkExec->SetBlockSize( 1 ); nVifUpkExec->Reserve( _64kb ); nVifUpkExec->ThrowIfNotOk(); From 031b6e63728b28672ee93773349050e3b87738a8 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 4 Dec 2016 21:47:04 +0100 Subject: [PATCH 4/5] common: improve vtune merge support Mapping the full buffer is killer on Vtune (either crash or requires a huge processing time). Instead keep the same ID for code in the same buffers. I think all buffers are correctly mapped now but I still miss the frame pointer for VU code. --- common/include/Utilities/Perf.h | 1 + common/src/Utilities/Perf.cpp | 38 +++++++++++++++++++++------------ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/common/include/Utilities/Perf.h b/common/include/Utilities/Perf.h index c9ed9e636a..d6a3eecb0b 100644 --- a/common/include/Utilities/Perf.h +++ b/common/include/Utilities/Perf.h @@ -36,6 +36,7 @@ class InfoVector { std::vector m_v; char m_prefix[20]; + unsigned int m_vtune_id; public: InfoVector(const char *prefix); diff --git a/common/src/Utilities/Perf.cpp b/common/src/Utilities/Perf.cpp index a4ce02eb17..746fd3cee2 100644 --- a/common/src/Utilities/Perf.cpp +++ b/common/src/Utilities/Perf.cpp @@ -76,6 +76,11 @@ void Info::Print(FILE *fp) InfoVector::InfoVector(const char *prefix) { strncpy(m_prefix, prefix, sizeof(m_prefix)); +#ifdef ENABLE_VTUNE + m_vtune_id = iJIT_GetNewMethodID(); +#else + m_vtune_id = 0; +#endif } void InfoVector::print(FILE *fp) @@ -90,18 +95,16 @@ void InfoVector::map(uptr x86, u32 size, const char *symbol) // Dispatchers are on a page and must always be kept. // Recompilers are much bigger (TODO check VIF) and are only // useful when MERGE_BLOCK_RESULT is defined - -#ifdef MERGE_BLOCK_RESULT - m_v.emplace_back(x86, size, symbol); +#if defined(ENABLE_VTUNE) || !defined(MERGE_BLOCK_RESULT) + u32 max_code_size = 16 * _1kb; #else - if (size < 8 * _1kb) - m_v.emplace_back(x86, size, symbol); + u32 max_code_size = _1gb; #endif + if (size < max_code_size) { + m_v.emplace_back(x86, size, symbol); + #ifdef ENABLE_VTUNE - // mapping the full recompiler will blow up VTUNE - if (size < _16kb) { - fprintf(stderr, "map %s: %p size %d\n", symbol, (void *)x86, size); std::string name = std::string(symbol); iJIT_Method_Load ml; @@ -114,8 +117,10 @@ void InfoVector::map(uptr x86, u32 size, const char *symbol) ml.method_size = size; iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml); - } + +//fprintf(stderr, "mapF %s: %p size %dKB\n", ml.method_name, ml.method_load_address, ml.method_size / 1024u); #endif + } } void InfoVector::map(uptr x86, u32 size, u32 pc) @@ -125,19 +130,24 @@ void InfoVector::map(uptr x86, u32 size, u32 pc) #endif #ifdef ENABLE_VTUNE - std::string name = std::string(m_prefix) + "_" + std::to_string(pc); - //fprintf(stderr, "map %s: %p size %d\n", name.c_str(), (void*)x86, size); - - iJIT_Method_Load ml; + iJIT_Method_Load_V2 ml; memset(&ml, 0, sizeof(ml)); +#ifdef MERGE_BLOCK_RESULT + ml.method_id = m_vtune_id; + ml.method_name = m_prefix; +#else + std::string name = std::string(m_prefix) + "_" + std::to_string(pc); ml.method_id = iJIT_GetNewMethodID(); ml.method_name = (char *)name.c_str(); +#endif ml.method_load_address = (void *)x86; ml.method_size = size; - iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml); + iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED_V2, &ml); + +//fprintf(stderr, "mapB %s: %p size %d\n", ml.method_name, ml.method_load_address, ml.method_size); #endif } From 7f64f39c05bbdef5ccbe215a1d3f0d88a60e8fc7 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 8 Dec 2016 09:53:00 +0100 Subject: [PATCH 5/5] vtune: count the number of ERET to trigger a quick exit The purpose is to stop vtune profiling in a predictable way. It allows to compare multiple runs. ERET is called every syscall/interrupt return so it is proportional to the EE program execution. --- pcsx2/COP0.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pcsx2/COP0.cpp b/pcsx2/COP0.cpp index 28330ce351..6898098725 100644 --- a/pcsx2/COP0.cpp +++ b/pcsx2/COP0.cpp @@ -536,6 +536,24 @@ void BC0TL() { } void ERET() { +#ifdef ENABLE_VTUNE + // Allow to stop vtune in a predictable way to compare runs + // Of course, the limit will depend on the game. + const u32 million = 1000 * 1000; + static u32 vtune = 0; + vtune++; + + // quick_exit vs exit: quick_exit won't call static storage destructor (OS will manage). It helps + // avoiding the race condition between threads destruction. + if (vtune > 30 * million) { + Console.WriteLn("VTUNE: quick_exit"); + std::quick_exit(EXIT_SUCCESS); + } else if (!(vtune % million)) { + Console.WriteLn("VTUNE: ERET was called %uM times", vtune/million); + } + +#endif + if (cpuRegs.CP0.n.Status.b.ERL) { cpuRegs.pc = cpuRegs.CP0.n.ErrorEPC; cpuRegs.CP0.n.Status.b.ERL = 0;