Perf: Support instruction-level profiling with jitdump on Linux

This commit is contained in:
Stenzek 2023-04-28 21:09:04 +10:00 committed by refractionpcsx2
parent b3697579c0
commit a5ed24ca88
14 changed files with 220 additions and 247 deletions

View File

@ -15,197 +15,211 @@
#include "common/Perf.h"
#include "common/Pcsx2Defs.h"
#ifdef __unix__
#include <unistd.h>
#endif
#include "common/Assertions.h"
#include "common/StringUtil.h"
#ifdef ENABLE_VTUNE
#include "jitprofiling.h"
#endif
#include <string> // std::string
#include <cstring> // strncpy
#include <algorithm> // std::remove_if
#include <array>
#include <cstring>
#ifdef __linux__
#include <atomic>
#include <ctime>
#include <mutex>
#include <elf.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#endif
//#define ProfileWithPerf
#define MERGE_BLOCK_RESULT
//#define ProfileWithPerfJitDump
#ifdef ENABLE_VTUNE
#ifdef _WIN32
#if defined(ENABLE_VTUNE) && defined(_WIN32)
#pragma comment(lib, "jitprofiling.lib")
#endif
#endif
namespace Perf
{
// Warning object aren't thread safe
InfoVector any("");
InfoVector ee("EE");
InfoVector iop("IOP");
InfoVector vu("VU");
InfoVector vif("VIF");
Group any("");
Group ee("EE");
Group iop("IOP");
Group vu0("VU0");
Group vu1("VU1");
Group vif("VIF");
// Perf is only supported on linux
#if defined(__linux__) && (defined(ProfileWithPerf) || defined(ENABLE_VTUNE))
////////////////////////////////////////////////////////////////////////////////
// Implementation of the Info object
////////////////////////////////////////////////////////////////////////////////
Info::Info(uptr x86, u32 size, const char* symbol)
: m_x86(x86)
, m_size(size)
, m_dynamic(false)
#if defined(__linux__) && defined(ProfileWithPerf)
static std::FILE* s_map_file = nullptr;
static bool s_map_file_opened = false;
static std::mutex s_mutex;
static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
{
strncpy(m_symbol, symbol, sizeof(m_symbol));
}
std::unique_lock lock(s_mutex);
Info::Info(uptr x86, u32 size, const char* symbol, u32 pc)
: m_x86(x86)
, m_size(size)
, m_dynamic(true)
{
snprintf(m_symbol, sizeof(m_symbol), "%s_0x%08x", symbol, pc);
}
void Info::Print(FILE* fp)
{
fprintf(fp, "%x %x %s\n", m_x86, m_size, m_symbol);
}
////////////////////////////////////////////////////////////////////////////////
// Implementation of the InfoVector object
////////////////////////////////////////////////////////////////////////////////
InfoVector::InfoVector(const char* prefix)
{
strncpy(m_prefix, prefix, sizeof(m_prefix));
#ifdef ENABLE_VTUNE
m_vtune_id = iJIT_GetNewMethodID();
#else
m_vtune_id = 0;
#endif
}
void InfoVector::print(FILE* fp)
{
for (auto&& it : m_v)
it.Print(fp);
}
void InfoVector::map(uptr x86, u32 size, const char* symbol)
{
// This function is typically used for dispatcher and recompiler.
// Dispatchers are on a page and must always be kept.
// Recompilers are much bigger (TODO check VIF) and are only
// useful when MERGE_BLOCK_RESULT is defined
#if defined(ENABLE_VTUNE) || !defined(MERGE_BLOCK_RESULT)
u32 max_code_size = 16 * _1kb;
#else
u32 max_code_size = _1gb;
#endif
if (size < max_code_size)
if (!s_map_file)
{
m_v.emplace_back(x86, size, symbol);
if (s_map_file_opened)
return;
#ifdef ENABLE_VTUNE
std::string name = std::string(symbol);
iJIT_Method_Load ml;
memset(&ml, 0, sizeof(ml));
ml.method_id = iJIT_GetNewMethodID();
ml.method_name = (char*)name.c_str();
ml.method_load_address = (void*)x86;
ml.method_size = size;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
//fprintf(stderr, "mapF %s: %p size %dKB\n", ml.method_name, ml.method_load_address, ml.method_size / 1024u);
#endif
char file[256];
snprintf(file, std::size(file), "/tmp/perf-%d.map", getpid());
s_map_file = std::fopen(file, "wb");
s_map_file_opened = true;
if (!s_map_file)
return;
}
std::fprintf(s_map_file, "%" PRIx64 " %zx %s\n", static_cast<u64>(reinterpret_cast<uintptr_t>(ptr)), size, symbol);
std::fflush(s_map_file);
}
#elif defined(__linux__) && defined(ProfileWithPerfJitDump)
enum : u32
{
JIT_CODE_LOAD = 0,
JIT_CODE_MOVE = 1,
JIT_CODE_DEBUG_INFO = 2,
JIT_CODE_CLOSE = 3,
JIT_CODE_UNWINDING_INFO = 4
};
#pragma pack(push, 1)
struct JITDUMP_HEADER
{
u32 magic = 0x4A695444; // JiTD
u32 version = 1;
u32 header_size = sizeof(JITDUMP_HEADER);
u32 elf_mach;
u32 pad1 = 0;
u32 pid;
u64 timestamp;
u64 flags = 0;
};
struct JITDUMP_RECORD_HEADER
{
u32 id;
u32 total_size;
u64 timestamp;
};
struct JITDUMP_CODE_LOAD
{
JITDUMP_RECORD_HEADER header;
u32 pid;
u32 tid;
u64 vma;
u64 code_addr;
u64 code_size;
u64 code_index;
// name
};
#pragma pack(pop)
static u64 JitDumpTimestamp()
{
struct timespec ts = {};
clock_gettime(CLOCK_MONOTONIC, &ts);
return (static_cast<u64>(ts.tv_sec) * 1000000000ULL) + static_cast<u64>(ts.tv_nsec);
}
void InfoVector::map(uptr x86, u32 size, u32 pc)
static FILE* s_jitdump_file = nullptr;
static bool s_jitdump_file_opened = false;
static std::mutex s_jitdump_mutex;
static u32 s_jitdump_record_id;
static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
{
#ifndef MERGE_BLOCK_RESULT
m_v.emplace_back(x86, size, m_prefix, pc);
#endif
const u32 namelen = std::strlen(symbol) + 1;
#ifdef ENABLE_VTUNE
iJIT_Method_Load_V2 ml;
std::unique_lock lock(s_jitdump_mutex);
if (!s_jitdump_file)
{
if (!s_jitdump_file_opened)
{
char file[256];
snprintf(file, std::size(file), "jit-%d.dump", getpid());
s_jitdump_file = fopen(file, "w+b");
s_jitdump_file_opened = true;
if (!s_jitdump_file)
return;
}
memset(&ml, 0, sizeof(ml));
void* perf_marker = mmap(nullptr, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(s_jitdump_file), 0);
pxAssertRel(perf_marker != MAP_FAILED, "Map perf marker");
#ifdef MERGE_BLOCK_RESULT
ml.method_id = m_vtune_id;
ml.method_name = m_prefix;
#else
std::string name = std::string(m_prefix) + "_" + std::to_string(pc);
JITDUMP_HEADER jh = {};
jh.elf_mach = EM_X86_64;
jh.pid = getpid();
jh.timestamp = JitDumpTimestamp();
std::fwrite(&jh, sizeof(jh), 1, s_jitdump_file);
}
JITDUMP_CODE_LOAD cl = {};
cl.header.id = JIT_CODE_LOAD;
cl.header.total_size = sizeof(cl) + namelen + static_cast<u32>(size);
cl.header.timestamp = JitDumpTimestamp();
cl.pid = getpid();
cl.tid = syscall(SYS_gettid);
cl.vma = 0;
cl.code_addr = static_cast<u64>(reinterpret_cast<uintptr_t>(ptr));
cl.code_size = static_cast<u64>(size);
cl.code_index = s_jitdump_record_id++;
std::fwrite(&cl, sizeof(cl), 1, s_jitdump_file);
std::fwrite(symbol, namelen, 1, s_jitdump_file);
std::fwrite(ptr, size, 1, s_jitdump_file);
std::fflush(s_jitdump_file);
}
#elif defined(ENABLE_VTUNE)
static void RegisterMethod(const void* ptr, size_t size, const char* symbol)
{
iJIT_Method_Load_V2 ml = {};
ml.method_id = iJIT_GetNewMethodID();
ml.method_name = (char*)name.c_str();
#endif
ml.method_load_address = (void*)x86;
ml.method_size = size;
ml.method_name = const_cast<char*>(symbol);
ml.method_load_address = ptr;
ml.method_size = static_cast<unsigned int>(size);
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED_V2, &ml);
//fprintf(stderr, "mapB %s: %p size %d\n", ml.method_name, ml.method_load_address, ml.method_size);
#endif
}
void InfoVector::reset()
{
auto dynamic = std::remove_if(m_v.begin(), m_v.end(), [](Info i) { return i.m_dynamic; });
m_v.erase(dynamic, m_v.end());
}
////////////////////////////////////////////////////////////////////////////////
// Global function
////////////////////////////////////////////////////////////////////////////////
void dump()
{
char file[256];
snprintf(file, 250, "/tmp/perf-%d.map", getpid());
FILE* fp = fopen(file, "w");
any.print(fp);
ee.print(fp);
iop.print(fp);
vu.print(fp);
if (fp)
fclose(fp);
}
void dump_and_reset()
{
dump();
any.reset();
ee.reset();
iop.reset();
vu.reset();
}
#else
////////////////////////////////////////////////////////////////////////////////
// Dummy implementation
////////////////////////////////////////////////////////////////////////////////
InfoVector::InfoVector(const char* prefix)
: m_vtune_id(0)
static void RegisterMethod(const void* ptr, size_t size, const char* method)
{
}
void InfoVector::map(uptr x86, u32 size, const char* symbol) {}
void InfoVector::map(uptr x86, u32 size, u32 pc) {}
void InfoVector::reset() {}
#endif
void dump() {}
void dump_and_reset() {}
#if (defined(__linux__) && (defined(ProfileWithPerf) || defined(ProfileWithPerfJitDump))) || defined(ENABLE_VTUNE)
void Group::Register(const void* ptr, size_t size, const char* symbol)
{
char full_symbol[128];
if (HasPrefix())
std::snprintf(full_symbol, std::size(full_symbol), "%s_%s", m_prefix, symbol);
else
StringUtil::Strlcpy(full_symbol, symbol, std::size(full_symbol));
RegisterMethod(ptr, size, full_symbol);
}
void Group::RegisterPC(const void* ptr, size_t size, u32 pc)
{
char full_symbol[128];
if (HasPrefix())
std::snprintf(full_symbol, std::size(full_symbol), "%s_%08X", m_prefix, pc);
else
std::snprintf(full_symbol, std::size(full_symbol), "%08X", pc);
RegisterMethod(ptr, size, full_symbol);
}
void Group::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key)
{
char full_symbol[128];
if (HasPrefix())
std::snprintf(full_symbol, std::size(full_symbol), "%s_%s%016" PRIX64, m_prefix, prefix, key);
else
std::snprintf(full_symbol, std::size(full_symbol), "%s%016" PRIX64, prefix, key);
RegisterMethod(ptr, size, full_symbol);
}
#else
void Group::Register(const void* ptr, size_t size, const char* symbol) {}
void Group::RegisterPC(const void* ptr, size_t size, u32 pc) {}
void Group::RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key) {}
#endif
} // namespace Perf

View File

@ -21,42 +21,23 @@
namespace Perf
{
struct Info
class Group
{
uptr m_x86;
u32 m_size;
char m_symbol[20];
// The idea is to keep static zones that are set only
// once.
bool m_dynamic;
Info(uptr x86, u32 size, const char* symbol);
Info(uptr x86, u32 size, const char* symbol, u32 pc);
void Print(FILE* fp);
};
class InfoVector
{
std::vector<Info> m_v;
char m_prefix[20];
unsigned int m_vtune_id;
const char* m_prefix;
public:
InfoVector(const char* prefix);
constexpr Group(const char* prefix) : m_prefix(prefix) {}
bool HasPrefix() const { return (m_prefix && m_prefix[0]); }
void print(FILE* fp);
void map(uptr x86, u32 size, const char* symbol);
void map(uptr x86, u32 size, u32 pc);
void reset();
void Register(const void* ptr, size_t size, const char* symbol);
void RegisterPC(const void* ptr, size_t size, u32 pc);
void RegisterKey(const void* ptr, size_t size, const char* prefix, u64 key);
};
void dump();
void dump_and_reset();
extern InfoVector any;
extern InfoVector ee;
extern InfoVector iop;
extern InfoVector vu;
extern InfoVector vif;
extern Group any;
extern Group ee;
extern Group iop;
extern Group vu0;
extern Group vu1;
extern Group vif;
} // namespace Perf

View File

@ -17,6 +17,7 @@
#include "GSDrawScanlineCodeGenerator.all.h"
#include "GS/Renderers/Common/GSFunctionMap.h"
#include "GSVertexSW.h"
#include "common/Perf.h"
MULTI_ISA_UNSHARED_IMPL;
using namespace Xbyak;
@ -590,6 +591,8 @@ L("exit");
if (isYmm)
vzeroupper();
ret();
Perf::any.RegisterKey(actual.getCode(), actual.getSize(), "GSDrawScanline_", m_sel.key);
}
/// Inputs: a0=pixels, a1=left, a2[x64]=top, a3[x64]=v

View File

@ -16,6 +16,7 @@
#include "PrecompiledHeader.h"
#include "GSSetupPrimCodeGenerator.all.h"
#include "GSVertexSW.h"
#include "common/Perf.h"
MULTI_ISA_UNSHARED_IMPL;
using namespace Xbyak;
@ -147,6 +148,8 @@ void GSSetupPrimCodeGenerator2::Generate()
if (isYmm)
vzeroupper();
ret();
Perf::any.RegisterKey(actual.getCode(), actual.getSize(), "GSSetupPrim_", m_sel.key);
}
void GSSetupPrimCodeGenerator2::Depth_XMM()

View File

@ -307,14 +307,6 @@ RecompiledCodeReserve::~RecompiledCodeReserve()
Release();
}
void RecompiledCodeReserve::_registerProfiler()
{
if (m_profiler_name.empty() || !IsOk())
return;
Perf::any.map((uptr)m_baseptr, m_size, m_profiler_name.c_str());
}
void RecompiledCodeReserve::Assign(VirtualMemoryManagerPtr allocator, size_t offset, size_t size)
{
// Anything passed to the memory allocator must be page aligned.
@ -329,7 +321,6 @@ void RecompiledCodeReserve::Assign(VirtualMemoryManagerPtr allocator, size_t off
}
VirtualMemoryReserve::Assign(std::move(allocator), base, size);
_registerProfiler();
}
void RecompiledCodeReserve::Reset()
@ -353,13 +344,3 @@ void RecompiledCodeReserve::ForbidModification()
{
HostSys::MemProtect(m_baseptr, m_size, PageProtectionMode().Read().Execute());
}
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long.
// After a name has been set, a profiler source will be automatically registered and cleared
// in accordance with changes in the reserve area.
RecompiledCodeReserve& RecompiledCodeReserve::SetProfilerName(std::string name)
{
m_profiler_name = std::move(name);
_registerProfiler();
return *this;
}

View File

@ -154,9 +154,6 @@ class RecompiledCodeReserve : public VirtualMemoryReserve
{
typedef VirtualMemoryReserve _parent;
protected:
std::string m_profiler_name;
public:
RecompiledCodeReserve(std::string name);
~RecompiledCodeReserve();
@ -164,14 +161,9 @@ public:
void Assign(VirtualMemoryManagerPtr allocator, size_t offset, size_t size);
void Reset();
RecompiledCodeReserve& SetProfilerName(std::string name);
void ForbidModification();
void AllowModification();
operator u8*() { return m_baseptr; }
operator const u8*() const { return m_baseptr; }
protected:
void _registerProfiler();
};

View File

@ -283,7 +283,7 @@ static void _DynGen_Dispatchers()
recBlocks.SetJITCompile(iopJITCompile);
Perf::any.map((uptr)&iopRecDispatchers, 4096, "IOP Dispatcher");
Perf::any.Register((void*)iopRecDispatchers, 4096, "IOP Dispatcher");
}
////////////////////////////////////////////////////
@ -896,7 +896,6 @@ static void recReserve()
return;
recMem = new RecompiledCodeReserve("R3000A Recompiler Cache");
recMem->SetProfilerName("IOPrec");
recMem->Assign(GetVmMemory().CodeMemory(), HostMemoryMap::IOPrecOffset, 32 * _1mb);
}
@ -940,8 +939,6 @@ void recResetIOP()
{
DevCon.WriteLn("iR3000A Recompiler reset.");
Perf::iop.reset();
recAlloc();
recMem->Reset();
@ -1005,9 +1002,6 @@ static void recShutdown()
safe_free(s_pInstCache);
s_nInstCacheSize = 0;
// FIXME Warning thread unsafe
Perf::dump();
}
static void iopClearRecLUT(BASEBLOCK* base, int count)
@ -1768,7 +1762,7 @@ StartRecomp:
pxAssert(xGetPtr() - recPtr < _64kb);
s_pCurBlockEx->x86size = xGetPtr() - recPtr;
Perf::iop.map(s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc);
Perf::iop.RegisterPC((void*)s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc);
recPtr = xGetPtr();

View File

@ -514,7 +514,7 @@ static void _DynGen_Dispatchers()
recBlocks.SetJITCompile(JITCompile);
Perf::any.map((uptr)&eeRecDispatchers, 4096, "EE Dispatcher");
Perf::any.Register((void*)eeRecDispatchers, 4096, "EE Dispatcher");
}
@ -533,7 +533,6 @@ static void recReserve()
return;
recMem = new RecompiledCodeReserve("R5900 Recompiler Cache");
recMem->SetProfilerName("EErec");
recMem->Assign(GetVmMemory().CodeMemory(), HostMemoryMap::EErecOffset, 64 * _1mb);
}
@ -616,8 +615,6 @@ static void recResetRaw()
{
Console.WriteLn(Color_StrongBlack, "EE/iR5900-32 Recompiler Reset");
Perf::ee.reset();
EE::Profiler.Reset();
recAlloc();
@ -655,9 +652,6 @@ static void recShutdown()
safe_free(s_pInstCache);
s_nInstCacheSize = 0;
// FIXME Warning thread unsafe
Perf::dump();
}
void recStep()
@ -743,9 +737,6 @@ static void recExecute()
eeCpuExecuting = false;
// FIXME Warning thread unsafe
Perf::dump();
EE::Profiler.Print();
}
@ -2678,7 +2669,7 @@ StartRecomp:
iDumpBlock(s_pCurBlockEx->startpc, s_pCurBlockEx->size*4, s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size);
}
#endif
Perf::ee.map(s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc);
Perf::ee.RegisterPC((void*)s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc);
recPtr = xGetPtr();

View File

@ -388,7 +388,7 @@ void vtlb_dynarec_init()
HostSys::MemProtectStatic(m_IndirectDispatchers, PageAccess_ExecOnly());
Perf::any.map((uptr)m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
Perf::any.Register(m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
}
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -31,7 +31,6 @@ alignas(__pagesize) static u8 vu1_RecDispatchers[mVUdispCacheSize];
void mVUreserveCache(microVU& mVU)
{
mVU.cache_reserve = new RecompiledCodeReserve(StringUtil::StdStringFromFormat("Micro VU%u Recompiler Cache", mVU.index));
mVU.cache_reserve->SetProfilerName(StringUtil::StdStringFromFormat("mVU%urec", mVU.index));
const size_t alloc_offset = mVU.index ? HostMemoryMap::mVU0recOffset : HostMemoryMap::mVU1recOffset;
mVU.cache_reserve->Assign(GetVmMemory().CodeMemory(), alloc_offset, mVU.cacheSize * _1mb);
@ -128,11 +127,6 @@ void mVUreset(microVU& mVU, bool resetReserve)
}
HostSys::MemProtect(mVU.dispCache, mVUdispCacheSize, PageAccess_ExecOnly());
if (mVU.index)
Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU1 Dispatcher");
else
Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU0 Dispatcher");
}
// Free Allocated Resources

View File

@ -997,7 +997,13 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
perf_and_return:
Perf::vu.map((uptr)thisPtr, x86Ptr - thisPtr, startPC);
if (mVU.regs().start_pc == startPC)
{
if (mVU.index)
Perf::vu1.RegisterPC(thisPtr, static_cast<u32>(x86Ptr - thisPtr), startPC);
else
Perf::vu0.RegisterPC(thisPtr, static_cast<u32>(x86Ptr - thisPtr), startPC);
}
return thisPtr;
}

View File

@ -94,6 +94,9 @@ void mVUdispatcherAB(mV)
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
Perf::any.Register(mVU.startFunct, static_cast<u32>(xGetPtr() - mVU.startFunct),
mVU.index ? "VU1StartFunc" : "VU0StartFunc");
}
// Generates the code for resuming/exit xgkick
@ -134,6 +137,9 @@ void mVUdispatcherCD(mV)
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
Perf::any.Register(mVU.startFunctXG, static_cast<u32>(xGetPtr() - mVU.startFunctXG),
mVU.index ? "VU1StartFuncXG" : "VU0StartFuncXG");
}
void mvuGenerateWaitMTVU(mV)
@ -211,6 +217,9 @@ void mvuGenerateWaitMTVU(mV)
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
Perf::any.Register(mVU.waitMTVU, static_cast<u32>(xGetPtr() - mVU.waitMTVU),
mVU.index ? "VU1WaitMTVU" : "VU0WaitMTVU");
}
void mvuGenerateCopyPipelineState(mV)
@ -263,6 +272,9 @@ void mvuGenerateCopyPipelineState(mV)
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
Perf::any.Register(mVU.copyPLState, static_cast<u32>(xGetPtr() - mVU.copyPLState),
mVU.index ? "VU1CopyPLState" : "VU0CopyPLState");
}
//------------------------------------------------------------------

View File

@ -361,7 +361,7 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill)
VifUnpackSSE_Dynarec(v, block).CompileRoutine();
Perf::vif.map((uptr)v.recWritePtr, xGetPtr() - v.recWritePtr, block.upkType /* FIXME ideally a key*/);
Perf::vif.RegisterPC(v.recWritePtr, xGetPtr() - v.recWritePtr, block.upkType /* FIXME ideally a key*/);
v.recWritePtr = xGetPtr();
return &block;

View File

@ -15,6 +15,7 @@
#include "PrecompiledHeader.h"
#include "newVif_UnpackSSE.h"
#include "common/Perf.h"
#include "fmt/core.h"
#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
@ -346,7 +347,6 @@ void VifUnpackSSE_Init()
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
nVifUpkExec = new RecompiledCodeReserve("VIF SSE-optimized Unpacking Functions");
nVifUpkExec->SetProfilerName("iVIF-SSE");
nVifUpkExec->Assign(GetVmMemory().CodeMemory(), HostMemoryMap::VIFUnpackRecOffset, _1mb);
xSetPtr(*nVifUpkExec);
@ -365,6 +365,8 @@ void VifUnpackSSE_Init()
nVifUpkExec->GetPtr(),
(uint)(xGetPtr() - nVifUpkExec->GetPtr())
);
Perf::any.Register(nVifUpkExec->GetPtr(), xGetPtr() - nVifUpkExec->GetPtr(), "VIF Unpack");
}
void VifUnpackSSE_Destroy()