mirror of https://github.com/PCSX2/pcsx2.git
x86: Move dispatchers to recompiler code space
This commit is contained in:
parent
5f11ba0445
commit
377746f155
|
@ -140,12 +140,6 @@ namespace HostSys
|
|||
|
||||
extern void MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode);
|
||||
|
||||
template <uint size>
|
||||
void MemProtectStatic(u8 (&arr)[size], const PageProtectionMode& mode)
|
||||
{
|
||||
MemProtect(arr, size, mode);
|
||||
}
|
||||
|
||||
extern std::string GetFileMappingName(const char* prefix);
|
||||
extern void* CreateSharedMemory(const char* name, size_t size);
|
||||
extern void DestroySharedMemory(void* ptr);
|
||||
|
|
|
@ -32,10 +32,10 @@ namespace x86Emitter
|
|||
|
||||
// Special form for calling functions. This form automatically resolves the
|
||||
// correct displacement based on the size of the instruction being generated.
|
||||
void operator()(void* func) const
|
||||
void operator()(const void* func) const
|
||||
{
|
||||
if (isJmp)
|
||||
xJccKnownTarget(Jcc_Unconditional, (void*)(uptr)func, false); // double cast to/from (uptr) needed to appease GCC
|
||||
xJccKnownTarget(Jcc_Unconditional, (const void*)(uptr)func, false); // double cast to/from (uptr) needed to appease GCC
|
||||
else
|
||||
{
|
||||
// calls are relative to the instruction after this one, and length is
|
||||
|
@ -58,32 +58,32 @@ namespace x86Emitter
|
|||
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
|
||||
// some registers. But I think it is enough to handle the first call.
|
||||
|
||||
void operator()(void* f, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const;
|
||||
void operator()(const void* f, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const;
|
||||
|
||||
void operator()(void* f, u32 a1, const xRegister32& a2) const;
|
||||
void operator()(void* f, const xIndirect32& a1) const;
|
||||
void operator()(void* f, u32 a1, u32 a2) const;
|
||||
void operator()(void* f, void* a1) const;
|
||||
void operator()(const void* f, u32 a1, const xRegister32& a2) const;
|
||||
void operator()(const void* f, const xIndirect32& a1) const;
|
||||
void operator()(const void* f, u32 a1, u32 a2) const;
|
||||
void operator()(const void* f, void* a1) const;
|
||||
|
||||
void operator()(void* f, const xRegisterLong& a1, const xRegisterLong& a2 = xEmptyReg) const;
|
||||
void operator()(void* f, u32 a1, const xRegisterLong& a2) const;
|
||||
void operator()(const void* f, const xRegisterLong& a1, const xRegisterLong& a2 = xEmptyReg) const;
|
||||
void operator()(const void* f, u32 a1, const xRegisterLong& a2) const;
|
||||
|
||||
template <typename T>
|
||||
__fi void operator()(T* func, u32 a1, const xRegisterLong& a2 = xEmptyReg) const
|
||||
{
|
||||
(*this)((void*)func, a1, a2);
|
||||
(*this)((const void*)func, a1, a2);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void operator()(T* func, const xIndirect32& a1) const
|
||||
{
|
||||
(*this)((void*)func, a1);
|
||||
(*this)((const void*)func, a1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void operator()(T* func, u32 a1, u32 a2) const
|
||||
{
|
||||
(*this)((void*)func, a1, a2);
|
||||
(*this)((const void*)func, a1, a2);
|
||||
}
|
||||
|
||||
void operator()(const xIndirectNative& f, const xRegisterLong& a1 = xEmptyReg, const xRegisterLong& a2 = xEmptyReg) const;
|
||||
|
|
|
@ -78,7 +78,7 @@ namespace x86Emitter
|
|||
}
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void* f, const xRegister32& a1, const xRegister32& a2) const
|
||||
void xImpl_FastCall::operator()(const void* f, const xRegister32& a1, const xRegister32& a2) const
|
||||
{
|
||||
prepareRegsForFastcall(a1, a2);
|
||||
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
||||
|
@ -93,7 +93,7 @@ namespace x86Emitter
|
|||
}
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void* f, const xRegisterLong& a1, const xRegisterLong& a2) const
|
||||
void xImpl_FastCall::operator()(const void* f, const xRegisterLong& a1, const xRegisterLong& a2) const
|
||||
{
|
||||
prepareRegsForFastcall(a1, a2);
|
||||
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
||||
|
@ -108,7 +108,7 @@ namespace x86Emitter
|
|||
}
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void* f, u32 a1, const xRegisterLong& a2) const
|
||||
void xImpl_FastCall::operator()(const void* f, u32 a1, const xRegisterLong& a2) const
|
||||
{
|
||||
if (!a2.IsEmpty())
|
||||
{
|
||||
|
@ -118,13 +118,13 @@ namespace x86Emitter
|
|||
(*this)(f, arg1reg, arg2reg);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void* f, void* a1) const
|
||||
void xImpl_FastCall::operator()(const void* f, void* a1) const
|
||||
{
|
||||
xLEA(arg1reg, ptr[a1]);
|
||||
(*this)(f, arg1reg, arg2reg);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void* f, u32 a1, const xRegister32& a2) const
|
||||
void xImpl_FastCall::operator()(const void* f, u32 a1, const xRegister32& a2) const
|
||||
{
|
||||
if (!a2.IsEmpty())
|
||||
{
|
||||
|
@ -134,13 +134,13 @@ namespace x86Emitter
|
|||
(*this)(f, arg1regd, arg2regd);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void* f, const xIndirect32& a1) const
|
||||
void xImpl_FastCall::operator()(const void* f, const xIndirect32& a1) const
|
||||
{
|
||||
xMOV(arg1regd, a1);
|
||||
(*this)(f, arg1regd);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(void* f, u32 a1, u32 a2) const
|
||||
void xImpl_FastCall::operator()(const void* f, u32 a1, u32 a2) const
|
||||
{
|
||||
xMOV(arg1regd, a1);
|
||||
xMOV(arg2regd, a2);
|
||||
|
|
|
@ -144,13 +144,6 @@ emitterT void x86SetJ32A(u32* j32)
|
|||
x86SetJ32(j32);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
emitterT void x86Align(int bytes)
|
||||
{
|
||||
// forward align
|
||||
x86Ptr = (u8*)(((uptr)x86Ptr + bytes - 1) & ~(bytes - 1));
|
||||
}
|
||||
|
||||
/********************/
|
||||
/* IX86 instructions */
|
||||
/********************/
|
||||
|
|
|
@ -29,15 +29,12 @@
|
|||
//------------------------------------------------------------------
|
||||
// legacy jump/align functions
|
||||
//------------------------------------------------------------------
|
||||
ATTR_DEP extern void x86SetPtr(u8* ptr);
|
||||
ATTR_DEP extern void x86SetJ8(u8* j8);
|
||||
ATTR_DEP extern void x86SetJ8A(u8* j8);
|
||||
ATTR_DEP extern void x86SetJ16(u16* j16);
|
||||
ATTR_DEP extern void x86SetJ16A(u16* j16);
|
||||
ATTR_DEP extern void x86SetJ32(u32* j32);
|
||||
ATTR_DEP extern void x86SetJ32A(u32* j32);
|
||||
ATTR_DEP extern void x86Align(int bytes);
|
||||
ATTR_DEP extern void x86AlignExecutable(int align);
|
||||
//------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////
|
||||
|
|
|
@ -38,7 +38,6 @@ GSDrawScanline::GSDrawScanline()
|
|||
: m_sp_map("GSSetupPrim")
|
||||
, m_ds_map("GSDrawScanline")
|
||||
{
|
||||
GSCodeReserve::GetInstance().AllowModification();
|
||||
GSCodeReserve::GetInstance().Reset();
|
||||
}
|
||||
|
||||
|
@ -46,8 +45,6 @@ GSDrawScanline::~GSDrawScanline()
|
|||
{
|
||||
if (const size_t used = GSCodeReserve::GetInstance().GetMemoryUsed(); used > 0)
|
||||
DevCon.WriteLn("SW JIT generated %zu bytes of code", used);
|
||||
|
||||
GSCodeReserve::GetInstance().ForbidModification();
|
||||
}
|
||||
|
||||
void GSDrawScanline::BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local)
|
||||
|
|
|
@ -334,13 +334,3 @@ void RecompiledCodeReserve::Reset()
|
|||
std::memset(m_baseptr, 0xCC, m_size);
|
||||
}
|
||||
}
|
||||
|
||||
void RecompiledCodeReserve::AllowModification()
|
||||
{
|
||||
HostSys::MemProtect(m_baseptr, m_size, PageAccess_Any());
|
||||
}
|
||||
|
||||
void RecompiledCodeReserve::ForbidModification()
|
||||
{
|
||||
HostSys::MemProtect(m_baseptr, m_size, PageProtectionMode().Read().Execute());
|
||||
}
|
||||
|
|
|
@ -161,9 +161,6 @@ public:
|
|||
void Assign(VirtualMemoryManagerPtr allocator, size_t offset, size_t size);
|
||||
void Reset();
|
||||
|
||||
void ForbidModification();
|
||||
void AllowModification();
|
||||
|
||||
operator u8*() { return m_baseptr; }
|
||||
operator const u8*() const { return m_baseptr; }
|
||||
};
|
|
@ -1245,9 +1245,6 @@ void vtlb_Init()
|
|||
// The LUT is only used for 1 game so we allocate it only when the gamefix is enabled (save 4MB)
|
||||
if (EmuConfig.Gamefixes.GoemonTlbHack)
|
||||
vtlb_Alloc_Ppmap();
|
||||
|
||||
extern void vtlb_dynarec_init();
|
||||
vtlb_dynarec_init();
|
||||
}
|
||||
|
||||
// vtlb_Reset -- Performs a COP0-level reset of the PS2's TLB.
|
||||
|
|
|
@ -124,6 +124,8 @@ extern int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, vtlb_ReadRegAlloc
|
|||
extern void vtlb_DynGenWrite(u32 sz, bool xmm, int addr_reg, int value_reg);
|
||||
extern void vtlb_DynGenWrite_Const(u32 bits, bool xmm, u32 addr_const, int value_reg);
|
||||
|
||||
extern void vtlb_DynGenDispatchers();
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// VtlbMemoryReserve
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
|
|
@ -165,9 +165,9 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
void SetJITCompile(void (*recompiler_)())
|
||||
void SetJITCompile(const void *recompiler_)
|
||||
{
|
||||
recompiler = (uptr)recompiler_;
|
||||
recompiler = reinterpret_cast<uptr>(recompiler_);
|
||||
}
|
||||
|
||||
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
|
||||
|
|
|
@ -171,19 +171,14 @@ static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* forma
|
|||
// Dynamically Compiled Dispatchers - R3000A style
|
||||
// =====================================================================================================
|
||||
|
||||
static void iopRecRecompile(const u32 startpc);
|
||||
static void iopRecRecompile(u32 startpc);
|
||||
|
||||
// Recompiled code buffer for EE recompiler dispatchers!
|
||||
alignas(__pagesize) static u8 iopRecDispatchers[__pagesize];
|
||||
|
||||
typedef void DynGenFunc();
|
||||
|
||||
static DynGenFunc* iopDispatcherEvent = NULL;
|
||||
static DynGenFunc* iopDispatcherReg = NULL;
|
||||
static DynGenFunc* iopJITCompile = NULL;
|
||||
static DynGenFunc* iopJITCompileInBlock = NULL;
|
||||
static DynGenFunc* iopEnterRecompiledCode = NULL;
|
||||
static DynGenFunc* iopExitRecompiledCode = NULL;
|
||||
static const void* iopDispatcherEvent = nullptr;
|
||||
static const void* iopDispatcherReg = nullptr;
|
||||
static const void* iopJITCompile = nullptr;
|
||||
static const void* iopJITCompileInBlock = nullptr;
|
||||
static const void* iopEnterRecompiledCode = nullptr;
|
||||
static const void* iopExitRecompiledCode = nullptr;
|
||||
|
||||
static void recEventTest()
|
||||
{
|
||||
|
@ -192,7 +187,7 @@ static void recEventTest()
|
|||
|
||||
// The address for all cleared blocks. It recompiles the current pc and then
|
||||
// dispatches to the recompiled block address.
|
||||
static DynGenFunc* _DynGen_JITCompile()
|
||||
static const void* _DynGen_JITCompile()
|
||||
{
|
||||
pxAssertMsg(iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks.");
|
||||
|
||||
|
@ -206,18 +201,18 @@ static DynGenFunc* _DynGen_JITCompile()
|
|||
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_JITCompileInBlock()
|
||||
static const void* _DynGen_JITCompileInBlock()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
xJMP((void*)iopJITCompile);
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
// called when jumping to variable pc address
|
||||
static DynGenFunc* _DynGen_DispatcherReg()
|
||||
static const void* _DynGen_DispatcherReg()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
|
||||
|
@ -227,13 +222,13 @@ static DynGenFunc* _DynGen_DispatcherReg()
|
|||
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// EnterRecompiledCode - dynamic compilation stub!
|
||||
// --------------------------------------------------------------------------------------
|
||||
static DynGenFunc* _DynGen_EnterRecompiledCode()
|
||||
static const void* _DynGen_EnterRecompiledCode()
|
||||
{
|
||||
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
|
||||
// allocating any room on the stack for it (which is important since the IOP's entry
|
||||
|
@ -251,27 +246,21 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
|||
xJMP((void*)iopDispatcherReg);
|
||||
|
||||
// Save an exit point
|
||||
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
|
||||
iopExitRecompiledCode = xGetPtr();
|
||||
}
|
||||
|
||||
xRET();
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void _DynGen_Dispatchers()
|
||||
{
|
||||
// In case init gets called multiple times:
|
||||
HostSys::MemProtectStatic(iopRecDispatchers, PageAccess_ReadWrite());
|
||||
|
||||
// clear the buffer to 0xcc (easier debugging).
|
||||
memset(iopRecDispatchers, 0xcc, __pagesize);
|
||||
|
||||
xSetPtr(iopRecDispatchers);
|
||||
const u8* start = xGetAlignedCallTarget();
|
||||
|
||||
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
||||
// most and stand to benefit from strong alignment and direct referencing.
|
||||
iopDispatcherEvent = (DynGenFunc*)xGetPtr();
|
||||
iopDispatcherEvent = xGetPtr();
|
||||
xFastCall((void*)recEventTest);
|
||||
iopDispatcherReg = _DynGen_DispatcherReg();
|
||||
|
||||
|
@ -279,11 +268,9 @@ static void _DynGen_Dispatchers()
|
|||
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
|
||||
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
|
||||
|
||||
HostSys::MemProtectStatic(iopRecDispatchers, PageAccess_ExecOnly());
|
||||
|
||||
recBlocks.SetJITCompile(iopJITCompile);
|
||||
|
||||
Perf::any.Register((void*)iopRecDispatchers, 4096, "IOP Dispatcher");
|
||||
Perf::any.Register(start, xGetPtr() - start, "IOP Dispatcher");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -931,8 +918,6 @@ static void recAlloc()
|
|||
if (!s_pInstCache)
|
||||
pxFailRel("Failed to allocate R3000 InstCache array.");
|
||||
}
|
||||
|
||||
_DynGen_Dispatchers();
|
||||
}
|
||||
|
||||
void recResetIOP()
|
||||
|
@ -941,6 +926,9 @@ void recResetIOP()
|
|||
|
||||
recAlloc();
|
||||
recMem->Reset();
|
||||
xSetPtr(*recMem);
|
||||
_DynGen_Dispatchers();
|
||||
recPtr = xGetPtr();
|
||||
|
||||
iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
|
||||
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) / 4)));
|
||||
|
@ -990,7 +978,6 @@ void recResetIOP()
|
|||
recBlocks.Reset();
|
||||
g_psxMaxRecMem = 0;
|
||||
|
||||
recPtr = *recMem;
|
||||
psxbranch = 0;
|
||||
}
|
||||
|
||||
|
@ -1036,7 +1023,7 @@ static __noinline s32 recExecuteBlock(s32 eeCycles)
|
|||
// mov edx,dword ptr [iopCycleEE (832A84h)]
|
||||
// lea eax,[edx+ecx]
|
||||
|
||||
iopEnterRecompiledCode();
|
||||
((void(*)())iopEnterRecompiledCode)();
|
||||
|
||||
return psxRegs.iopBreak + psxRegs.iopCycleEE;
|
||||
}
|
||||
|
@ -1579,9 +1566,8 @@ static void iopRecRecompile(const u32 startpc)
|
|||
recResetIOP();
|
||||
}
|
||||
|
||||
x86SetPtr(recPtr);
|
||||
x86Align(16);
|
||||
recPtr = x86Ptr;
|
||||
xSetPtr(recPtr);
|
||||
recPtr = xGetAlignedCallTarget();
|
||||
|
||||
s_pCurBlock = PSX_GETBLOCK(startpc);
|
||||
|
||||
|
|
|
@ -360,19 +360,14 @@ static void recRecompile(const u32 startpc);
|
|||
static void dyna_block_discard(u32 start, u32 sz);
|
||||
static void dyna_page_reset(u32 start, u32 sz);
|
||||
|
||||
// Recompiled code buffer for EE recompiler dispatchers!
|
||||
alignas(__pagesize) static u8 eeRecDispatchers[__pagesize];
|
||||
|
||||
typedef void DynGenFunc();
|
||||
|
||||
static DynGenFunc* DispatcherEvent = NULL;
|
||||
static DynGenFunc* DispatcherReg = NULL;
|
||||
static DynGenFunc* JITCompile = NULL;
|
||||
static DynGenFunc* JITCompileInBlock = NULL;
|
||||
static DynGenFunc* EnterRecompiledCode = NULL;
|
||||
static DynGenFunc* ExitRecompiledCode = NULL;
|
||||
static DynGenFunc* DispatchBlockDiscard = NULL;
|
||||
static DynGenFunc* DispatchPageReset = NULL;
|
||||
static const void* DispatcherEvent = nullptr;
|
||||
static const void* DispatcherReg = nullptr;
|
||||
static const void* JITCompile = nullptr;
|
||||
static const void* JITCompileInBlock = nullptr;
|
||||
static const void* EnterRecompiledCode = nullptr;
|
||||
static const void* ExitRecompiledCode = nullptr;
|
||||
static const void* DispatchBlockDiscard = nullptr;
|
||||
static const void* DispatchPageReset = nullptr;
|
||||
|
||||
static void recEventTest()
|
||||
{
|
||||
|
@ -387,13 +382,13 @@ static void recEventTest()
|
|||
|
||||
// The address for all cleared blocks. It recompiles the current pc and then
|
||||
// dispatches to the recompiled block address.
|
||||
static DynGenFunc* _DynGen_JITCompile()
|
||||
static const void* _DynGen_JITCompile()
|
||||
{
|
||||
pxAssertMsg(DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks.");
|
||||
|
||||
u8* retval = xGetAlignedCallTarget();
|
||||
|
||||
xFastCall((void*)recRecompile, ptr32[&cpuRegs.pc]);
|
||||
xFastCall((const void*)recRecompile, ptr32[&cpuRegs.pc]);
|
||||
|
||||
// C equivalent:
|
||||
// u32 addr = cpuRegs.pc;
|
||||
|
@ -405,18 +400,18 @@ static DynGenFunc* _DynGen_JITCompile()
|
|||
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_JITCompileInBlock()
|
||||
static const void* _DynGen_JITCompileInBlock()
|
||||
{
|
||||
u8* retval = xGetAlignedCallTarget();
|
||||
xJMP((void*)JITCompile);
|
||||
return (DynGenFunc*)retval;
|
||||
xJMP(JITCompile);
|
||||
return retval;
|
||||
}
|
||||
|
||||
// called when jumping to variable pc address
|
||||
static DynGenFunc* _DynGen_DispatcherReg()
|
||||
static const void* _DynGen_DispatcherReg()
|
||||
{
|
||||
u8* retval = xGetPtr(); // fallthrough target, can't align it!
|
||||
|
||||
|
@ -430,19 +425,19 @@ static DynGenFunc* _DynGen_DispatcherReg()
|
|||
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_DispatcherEvent()
|
||||
static const void* _DynGen_DispatcherEvent()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
|
||||
xFastCall((void*)recEventTest);
|
||||
xFastCall((const void*)recEventTest);
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_EnterRecompiledCode()
|
||||
static const void* _DynGen_EnterRecompiledCode()
|
||||
{
|
||||
pxAssertDev(DispatcherReg != NULL, "Dynamically generated dispatchers are required prior to generating EnterRecompiledCode!");
|
||||
|
||||
|
@ -461,39 +456,33 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
|||
xJMP((void*)DispatcherReg);
|
||||
|
||||
// Save an exit point
|
||||
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
|
||||
ExitRecompiledCode = xGetPtr();
|
||||
}
|
||||
|
||||
xRET();
|
||||
|
||||
return (DynGenFunc*)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_DispatchBlockDiscard()
|
||||
static const void* _DynGen_DispatchBlockDiscard()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
xFastCall((void*)dyna_block_discard);
|
||||
xJMP((void*)ExitRecompiledCode);
|
||||
return (DynGenFunc*)retval;
|
||||
xFastCall((const void*)dyna_block_discard);
|
||||
xJMP((const void*)ExitRecompiledCode);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static DynGenFunc* _DynGen_DispatchPageReset()
|
||||
static const void* _DynGen_DispatchPageReset()
|
||||
{
|
||||
u8* retval = xGetPtr();
|
||||
xFastCall((void*)dyna_page_reset);
|
||||
xJMP((void*)ExitRecompiledCode);
|
||||
return (DynGenFunc*)retval;
|
||||
xFastCall((const void*)dyna_page_reset);
|
||||
xJMP((const void*)ExitRecompiledCode);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void _DynGen_Dispatchers()
|
||||
{
|
||||
// In case init gets called multiple times:
|
||||
HostSys::MemProtectStatic(eeRecDispatchers, PageAccess_ReadWrite());
|
||||
|
||||
// clear the buffer to 0xcc (easier debugging).
|
||||
memset(eeRecDispatchers, 0xcc, __pagesize);
|
||||
|
||||
xSetPtr(eeRecDispatchers);
|
||||
const u8* start = xGetAlignedCallTarget();
|
||||
|
||||
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
||||
// most and stand to benefit from strong alignment and direct referencing.
|
||||
|
@ -506,11 +495,9 @@ static void _DynGen_Dispatchers()
|
|||
DispatchBlockDiscard = _DynGen_DispatchBlockDiscard();
|
||||
DispatchPageReset = _DynGen_DispatchPageReset();
|
||||
|
||||
HostSys::MemProtectStatic(eeRecDispatchers, PageAccess_ExecOnly());
|
||||
|
||||
recBlocks.SetJITCompile(JITCompile);
|
||||
|
||||
Perf::any.Register((void*)eeRecDispatchers, 4096, "EE Dispatcher");
|
||||
Perf::any.Register(start, static_cast<u32>(xGetPtr() - start), "EE Dispatcher");
|
||||
}
|
||||
|
||||
|
||||
|
@ -597,10 +584,6 @@ static void recAlloc()
|
|||
if (!s_pInstCache)
|
||||
pxFailRel("Failed to allocate R5900 InstCache array");
|
||||
}
|
||||
|
||||
// No errors.. Proceed with initialization:
|
||||
|
||||
_DynGen_Dispatchers();
|
||||
}
|
||||
|
||||
alignas(16) static u16 manual_page[Ps2MemSize::MainRam >> 12];
|
||||
|
@ -616,6 +599,11 @@ static void recResetRaw()
|
|||
recAlloc();
|
||||
|
||||
recMem->Reset();
|
||||
xSetPtr(*recMem);
|
||||
_DynGen_Dispatchers();
|
||||
vtlb_DynGenDispatchers();
|
||||
recPtr = xGetPtr();
|
||||
|
||||
ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize);
|
||||
memset(recRAMCopy, 0, Ps2MemSize::MainRam);
|
||||
|
||||
|
@ -628,10 +616,6 @@ static void recResetRaw()
|
|||
mmap_ResetBlockTracking();
|
||||
vtlb_ClearLoadStoreInfo();
|
||||
|
||||
x86SetPtr(*recMem);
|
||||
|
||||
recPtr = *recMem;
|
||||
|
||||
g_branch = 0;
|
||||
g_resetEeScalingStats = true;
|
||||
}
|
||||
|
@ -644,7 +628,7 @@ static void recShutdown()
|
|||
|
||||
recBlocks.Reset();
|
||||
|
||||
recRAM = recROM = recROM1 = recROM2 = NULL;
|
||||
recRAM = recROM = recROM1 = recROM2 = nullptr;
|
||||
|
||||
safe_free(s_pInstCache);
|
||||
s_nInstCacheSize = 0;
|
||||
|
@ -720,13 +704,7 @@ static void recExecute()
|
|||
if (!fastjmp_set(&m_SetJmp_StateCheck))
|
||||
{
|
||||
eeCpuExecuting = true;
|
||||
|
||||
// Important! Most of the console logging and such has cancel points in it. This is great
|
||||
// in Windows, where SEH lets us safely kill a thread from anywhere we want. This is bad
|
||||
// in Linux, which cannot have a C++ exception cross the recompiler. Hence the changing
|
||||
// of the cancelstate here!
|
||||
|
||||
EnterRecompiledCode();
|
||||
((void(*)())EnterRecompiledCode)();
|
||||
|
||||
// Generally unreachable code here ...
|
||||
}
|
||||
|
@ -1636,11 +1614,17 @@ void recMemcheck(u32 op, u32 bits, bool store)
|
|||
// Preserve ecx (address) and edx (address+size) because we aren't breaking
|
||||
// out of this loops iteration and dynarecMemLogcheck will clobber them
|
||||
// Also keep 16 byte stack alignment
|
||||
if(!(checks[i].result & MEMCHECK_BREAK))
|
||||
if (!(checks[i].result & MEMCHECK_BREAK))
|
||||
{
|
||||
xPUSH(eax); xPUSH(ebx); xPUSH(ecx); xPUSH(edx);
|
||||
xPUSH(eax);
|
||||
xPUSH(ebx);
|
||||
xPUSH(ecx);
|
||||
xPUSH(edx);
|
||||
xFastCall((void*)dynarecMemLogcheck, ecx, edx);
|
||||
xPOP(edx); xPOP(ecx); xPOP(ebx); xPOP(eax);
|
||||
xPOP(edx);
|
||||
xPOP(ecx);
|
||||
xPOP(ebx);
|
||||
xPOP(eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1926,7 +1910,7 @@ void recompileNextInstruction(bool delayslot, bool swapped_delay_slot)
|
|||
std::string disasm = "";
|
||||
disR5900Fasm(disasm, memRead32(i), i, false);
|
||||
Console.Warning("%x %s%08X %s", i, i == pc - 4 ? "*" : i == p ? "=" :
|
||||
" ",
|
||||
" ",
|
||||
memRead32(i), disasm.c_str());
|
||||
}
|
||||
break;
|
||||
|
@ -1952,7 +1936,7 @@ void recompileNextInstruction(bool delayslot, bool swapped_delay_slot)
|
|||
disasm = "";
|
||||
disR5900Fasm(disasm, memRead32(i), i, false);
|
||||
Console.Warning("%x %s%08X %s", i, i == pc - 4 ? "*" : i == p ? "=" :
|
||||
" ",
|
||||
" ",
|
||||
memRead32(i), disasm.c_str());
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -239,13 +239,9 @@ namespace vtlb_private
|
|||
}
|
||||
} // namespace vtlb_private
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// allocate one page for our naked indirect dispatcher function.
|
||||
// this *must* be a full page, since we'll give it execution permission later.
|
||||
// If it were smaller than a page we'd end up allowing execution rights on some
|
||||
// other vars additionally (bad!).
|
||||
//
|
||||
alignas(__pagesize) static u8 m_IndirectDispatchers[__pagesize];
|
||||
static constexpr u32 INDIRECT_DISPATCHER_SIZE = 32;
|
||||
static constexpr u32 INDIRECT_DISPATCHERS_SIZE = 2 * 5 * 2 * INDIRECT_DISPATCHER_SIZE;
|
||||
static u8* m_IndirectDispatchers = nullptr;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// mode - 0 for read, 1 for write!
|
||||
|
@ -255,16 +251,8 @@ static u8* GetIndirectDispatcherPtr(int mode, int operandsize, int sign = 0)
|
|||
{
|
||||
assert(mode || operandsize >= 3 ? !sign : true);
|
||||
|
||||
// Each dispatcher is aligned to 64 bytes. The actual dispatchers are only like
|
||||
// 20-some bytes each, but 64 byte alignment on functions that are called
|
||||
// more frequently than a hot sex hotline at 1:15am is probably a good thing.
|
||||
|
||||
// 7*64? 5 widths with two sign extension modes for 8 and 16 bit reads
|
||||
|
||||
// Gregory: a 32 bytes alignment is likely enough and more cache friendly
|
||||
const int A = 32;
|
||||
|
||||
return &m_IndirectDispatchers[(mode * (8 * A)) + (sign * 5 * A) + (operandsize * A)];
|
||||
return &m_IndirectDispatchers[(mode * (8 * INDIRECT_DISPATCHER_SIZE)) + (sign * 5 * INDIRECT_DISPATCHER_SIZE) +
|
||||
(operandsize * INDIRECT_DISPATCHER_SIZE)];
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -359,18 +347,12 @@ static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
|
|||
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the
|
||||
// process will be ignored.
|
||||
//
|
||||
void vtlb_dynarec_init()
|
||||
void vtlb_DynGenDispatchers()
|
||||
{
|
||||
static bool hasBeenCalled = false;
|
||||
if (hasBeenCalled)
|
||||
return;
|
||||
hasBeenCalled = true;
|
||||
|
||||
// In case init gets called multiple times:
|
||||
HostSys::MemProtectStatic(m_IndirectDispatchers, PageAccess_ReadWrite());
|
||||
m_IndirectDispatchers = xGetAlignedCallTarget();
|
||||
|
||||
// clear the buffer to 0xcc (easier debugging).
|
||||
memset(m_IndirectDispatchers, 0xcc, __pagesize);
|
||||
std::memset(m_IndirectDispatchers, 0xcc, INDIRECT_DISPATCHERS_SIZE);
|
||||
|
||||
for (int mode = 0; mode < 2; ++mode)
|
||||
{
|
||||
|
@ -385,9 +367,9 @@ void vtlb_dynarec_init()
|
|||
}
|
||||
}
|
||||
|
||||
HostSys::MemProtectStatic(m_IndirectDispatchers, PageAccess_ExecOnly());
|
||||
Perf::any.Register(m_IndirectDispatchers, INDIRECT_DISPATCHERS_SIZE, "TLB Dispatcher");
|
||||
|
||||
Perf::any.Register(m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
|
||||
xSetPtr(m_IndirectDispatchers + INDIRECT_DISPATCHERS_SIZE);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -25,8 +25,6 @@
|
|||
//------------------------------------------------------------------
|
||||
// Micro VU - Main Functions
|
||||
//------------------------------------------------------------------
|
||||
alignas(__pagesize) static u8 vu0_RecDispatchers[mVUdispCacheSize];
|
||||
alignas(__pagesize) static u8 vu1_RecDispatchers[mVUdispCacheSize];
|
||||
|
||||
void mVUreserveCache(microVU& mVU)
|
||||
{
|
||||
|
@ -49,18 +47,12 @@ void mVUinit(microVU& mVU, uint vuIndex)
|
|||
mVU.progSize = (mVU.index ? 0x4000 : 0x1000) / 4;
|
||||
mVU.progMemMask = mVU.progSize-1;
|
||||
mVU.cacheSize = mVUcacheReserve;
|
||||
mVU.cache = NULL;
|
||||
mVU.dispCache = NULL;
|
||||
mVU.startFunct = NULL;
|
||||
mVU.exitFunct = NULL;
|
||||
mVU.cache = nullptr;
|
||||
mVU.startFunct = nullptr;
|
||||
mVU.exitFunct = nullptr;
|
||||
|
||||
mVUreserveCache(mVU);
|
||||
|
||||
if (vuIndex)
|
||||
mVU.dispCache = vu1_RecDispatchers;
|
||||
else
|
||||
mVU.dispCache = vu0_RecDispatchers;
|
||||
|
||||
mVU.regAlloc.reset(new microRegAlloc(mVU.index));
|
||||
}
|
||||
|
||||
|
@ -82,15 +74,12 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
|||
if (resetReserve)
|
||||
mVU.cache_reserve->Reset();
|
||||
|
||||
HostSys::MemProtect(mVU.dispCache, mVUdispCacheSize, PageAccess_ReadWrite());
|
||||
memset(mVU.dispCache, 0xcc, mVUdispCacheSize);
|
||||
|
||||
x86SetPtr(mVU.dispCache);
|
||||
xSetPtr(mVU.cache);
|
||||
mVUdispatcherAB(mVU);
|
||||
mVUdispatcherCD(mVU);
|
||||
mvuGenerateWaitMTVU(mVU);
|
||||
mvuGenerateCopyPipelineState(mVU);
|
||||
mVUemitSearch();
|
||||
mVUGenerateWaitMTVU(mVU);
|
||||
mVUGenerateCopyPipelineState(mVU);
|
||||
mVUGenerateCompareState(mVU);
|
||||
|
||||
mVU.regs().nextBlockCycles = 0;
|
||||
memset(&mVU.prog.lpState, 0, sizeof(mVU.prog.lpState));
|
||||
|
@ -104,10 +93,9 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
|||
mVU.prog.curFrame = 0;
|
||||
|
||||
// Setup Dynarec Cache Limits for Each Program
|
||||
u8* z = mVU.cache;
|
||||
mVU.prog.x86start = z;
|
||||
mVU.prog.x86ptr = z;
|
||||
mVU.prog.x86end = z + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb);
|
||||
mVU.prog.x86start = xGetAlignedCallTarget();
|
||||
mVU.prog.x86ptr = mVU.prog.x86start;
|
||||
mVU.prog.x86end = mVU.cache + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb);
|
||||
|
||||
for (u32 i = 0; i < (mVU.progSize / 2); i++)
|
||||
{
|
||||
|
@ -125,8 +113,6 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
|||
mVU.prog.quick[i].block = NULL;
|
||||
mVU.prog.quick[i].prog = NULL;
|
||||
}
|
||||
|
||||
HostSys::MemProtect(mVU.dispCache, mVUdispCacheSize, PageAccess_ExecOnly());
|
||||
}
|
||||
|
||||
// Free Allocated Resources
|
||||
|
|
|
@ -37,6 +37,8 @@ using namespace x86Emitter;
|
|||
#include "microVU_Profiler.h"
|
||||
#include "common/Perf.h"
|
||||
|
||||
class microBlockManager;
|
||||
|
||||
struct microBlockLink
|
||||
{
|
||||
microBlock block;
|
||||
|
@ -49,135 +51,6 @@ struct microBlockLinkRef
|
|||
u64 quick;
|
||||
};
|
||||
|
||||
class microBlockManager
|
||||
{
|
||||
private:
|
||||
microBlockLink *qBlockList, *qBlockEnd; // Quick Search
|
||||
microBlockLink *fBlockList, *fBlockEnd; // Full Search
|
||||
std::vector<microBlockLinkRef> quickLookup;
|
||||
int qListI, fListI;
|
||||
|
||||
public:
|
||||
inline int getFullListCount() const { return fListI; }
|
||||
microBlockManager()
|
||||
{
|
||||
qListI = fListI = 0;
|
||||
qBlockEnd = qBlockList = nullptr;
|
||||
fBlockEnd = fBlockList = nullptr;
|
||||
}
|
||||
~microBlockManager() { reset(); }
|
||||
void reset()
|
||||
{
|
||||
for (microBlockLink* linkI = qBlockList; linkI != nullptr;)
|
||||
{
|
||||
microBlockLink* freeI = linkI;
|
||||
safe_delete_array(linkI->block.jumpCache);
|
||||
linkI = linkI->next;
|
||||
_aligned_free(freeI);
|
||||
}
|
||||
for (microBlockLink* linkI = fBlockList; linkI != nullptr;)
|
||||
{
|
||||
microBlockLink* freeI = linkI;
|
||||
safe_delete_array(linkI->block.jumpCache);
|
||||
linkI = linkI->next;
|
||||
_aligned_free(freeI);
|
||||
}
|
||||
qListI = fListI = 0;
|
||||
qBlockEnd = qBlockList = nullptr;
|
||||
fBlockEnd = fBlockList = nullptr;
|
||||
quickLookup.clear();
|
||||
};
|
||||
microBlock* add(microBlock* pBlock)
|
||||
{
|
||||
microBlock* thisBlock = search(&pBlock->pState);
|
||||
if (!thisBlock)
|
||||
{
|
||||
u8 fullCmp = pBlock->pState.needExactMatch;
|
||||
if (fullCmp)
|
||||
fListI++;
|
||||
else
|
||||
qListI++;
|
||||
|
||||
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
|
||||
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
|
||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 32);
|
||||
newBlock->block.jumpCache = nullptr;
|
||||
newBlock->next = nullptr;
|
||||
|
||||
if (blockEnd)
|
||||
{
|
||||
blockEnd->next = newBlock;
|
||||
blockEnd = newBlock;
|
||||
}
|
||||
else
|
||||
{
|
||||
blockEnd = blockList = newBlock;
|
||||
}
|
||||
|
||||
std::memcpy(&newBlock->block, pBlock, sizeof(microBlock));
|
||||
thisBlock = &newBlock->block;
|
||||
|
||||
quickLookup.push_back({&newBlock->block, pBlock->pState.quick64[0]});
|
||||
}
|
||||
return thisBlock;
|
||||
}
|
||||
__ri microBlock* search(microRegInfo* pState)
|
||||
{
|
||||
if (pState->needExactMatch) // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
{
|
||||
microBlockLink* prevI = nullptr;
|
||||
for (microBlockLink* linkI = fBlockList; linkI != nullptr; prevI = linkI, linkI = linkI->next)
|
||||
{
|
||||
if (mVUquickSearch(pState, &linkI->block.pState, sizeof(microRegInfo)))
|
||||
{
|
||||
if (linkI != fBlockList)
|
||||
{
|
||||
prevI->next = linkI->next;
|
||||
linkI->next = fBlockList;
|
||||
fBlockList = linkI;
|
||||
}
|
||||
|
||||
return &linkI->block;
|
||||
}
|
||||
}
|
||||
}
|
||||
else // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||
{
|
||||
const u64 quick64 = pState->quick64[0];
|
||||
for (const microBlockLinkRef& ref : quickLookup)
|
||||
{
|
||||
if (ref.quick != quick64) continue;
|
||||
if (doConstProp && (ref.pBlock->pState.vi15 != pState->vi15)) continue;
|
||||
if (doConstProp && (ref.pBlock->pState.vi15v != pState->vi15v)) continue;
|
||||
return ref.pBlock;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
void printInfo(int pc, bool printQuick)
|
||||
{
|
||||
int listI = printQuick ? qListI : fListI;
|
||||
if (listI < 7)
|
||||
return;
|
||||
microBlockLink* linkI = printQuick ? qBlockList : fBlockList;
|
||||
for (int i = 0; i <= listI; i++)
|
||||
{
|
||||
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4;
|
||||
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
|
||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].x + (linkI->block.pState.VF[j].y << 8) + (linkI->block.pState.VF[j].z << 16) + (linkI->block.pState.VF[j].w << 24);
|
||||
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
|
||||
DevCon.WriteLn(Color_Green,
|
||||
"[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]"
|
||||
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]",
|
||||
pc, i, crc, linkI->block.pState.q,
|
||||
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v,
|
||||
linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch,
|
||||
linkI->block.pState.blockType, viCRC, vfCRC);
|
||||
linkI = linkI->next;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct microRange
|
||||
{
|
||||
s32 start; // Start PC (The opcode the block starts at)
|
||||
|
@ -246,11 +119,11 @@ struct microVU
|
|||
|
||||
RecompiledCodeReserve* cache_reserve;
|
||||
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
|
||||
u8* dispCache; // Dispatchers Cache (where startFunct and exitFunct are written to)
|
||||
u8* startFunct; // Function Ptr to the recompiler dispatcher (start)
|
||||
u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit)
|
||||
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
|
||||
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
|
||||
u8* compareStateF;// Function Ptr to search which compares all state.
|
||||
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
|
||||
u8* copyPLState; // Ptr to function to copy pipeline state into microVU
|
||||
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
|
||||
|
@ -275,6 +148,139 @@ struct microVU
|
|||
{
|
||||
return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs();
|
||||
}
|
||||
|
||||
__fi u32 compareState(microRegInfo* lhs, microRegInfo* rhs) const {
|
||||
return reinterpret_cast<u32(*)(void*, void*)>(compareStateF)(lhs, rhs);
|
||||
}
|
||||
};
|
||||
|
||||
class microBlockManager
|
||||
{
|
||||
private:
|
||||
microBlockLink *qBlockList, *qBlockEnd; // Quick Search
|
||||
microBlockLink *fBlockList, *fBlockEnd; // Full Search
|
||||
std::vector<microBlockLinkRef> quickLookup;
|
||||
int qListI, fListI;
|
||||
|
||||
public:
|
||||
inline int getFullListCount() const { return fListI; }
|
||||
microBlockManager()
|
||||
{
|
||||
qListI = fListI = 0;
|
||||
qBlockEnd = qBlockList = nullptr;
|
||||
fBlockEnd = fBlockList = nullptr;
|
||||
}
|
||||
~microBlockManager() { reset(); }
|
||||
void reset()
|
||||
{
|
||||
for (microBlockLink* linkI = qBlockList; linkI != nullptr;)
|
||||
{
|
||||
microBlockLink* freeI = linkI;
|
||||
safe_delete_array(linkI->block.jumpCache);
|
||||
linkI = linkI->next;
|
||||
_aligned_free(freeI);
|
||||
}
|
||||
for (microBlockLink* linkI = fBlockList; linkI != nullptr;)
|
||||
{
|
||||
microBlockLink* freeI = linkI;
|
||||
safe_delete_array(linkI->block.jumpCache);
|
||||
linkI = linkI->next;
|
||||
_aligned_free(freeI);
|
||||
}
|
||||
qListI = fListI = 0;
|
||||
qBlockEnd = qBlockList = nullptr;
|
||||
fBlockEnd = fBlockList = nullptr;
|
||||
quickLookup.clear();
|
||||
};
|
||||
microBlock* add(microVU& mVU, microBlock* pBlock)
|
||||
{
|
||||
microBlock* thisBlock = search(mVU, &pBlock->pState);
|
||||
if (!thisBlock)
|
||||
{
|
||||
u8 fullCmp = pBlock->pState.needExactMatch;
|
||||
if (fullCmp)
|
||||
fListI++;
|
||||
else
|
||||
qListI++;
|
||||
|
||||
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
|
||||
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
|
||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 32);
|
||||
newBlock->block.jumpCache = nullptr;
|
||||
newBlock->next = nullptr;
|
||||
|
||||
if (blockEnd)
|
||||
{
|
||||
blockEnd->next = newBlock;
|
||||
blockEnd = newBlock;
|
||||
}
|
||||
else
|
||||
{
|
||||
blockEnd = blockList = newBlock;
|
||||
}
|
||||
|
||||
std::memcpy(&newBlock->block, pBlock, sizeof(microBlock));
|
||||
thisBlock = &newBlock->block;
|
||||
|
||||
quickLookup.push_back({&newBlock->block, pBlock->pState.quick64[0]});
|
||||
}
|
||||
return thisBlock;
|
||||
}
|
||||
__ri microBlock* search(microVU& mVU, microRegInfo* pState)
|
||||
{
|
||||
if (pState->needExactMatch) // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
{
|
||||
microBlockLink* prevI = nullptr;
|
||||
for (microBlockLink* linkI = fBlockList; linkI != nullptr; prevI = linkI, linkI = linkI->next)
|
||||
{
|
||||
if (mVU.compareState(pState, &linkI->block.pState) == 0)
|
||||
{
|
||||
if (linkI != fBlockList)
|
||||
{
|
||||
prevI->next = linkI->next;
|
||||
linkI->next = fBlockList;
|
||||
fBlockList = linkI;
|
||||
}
|
||||
|
||||
return &linkI->block;
|
||||
}
|
||||
}
|
||||
}
|
||||
else // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||
{
|
||||
const u64 quick64 = pState->quick64[0];
|
||||
for (const microBlockLinkRef& ref : quickLookup)
|
||||
{
|
||||
if (ref.quick != quick64) continue;
|
||||
if (doConstProp && (ref.pBlock->pState.vi15 != pState->vi15)) continue;
|
||||
if (doConstProp && (ref.pBlock->pState.vi15v != pState->vi15v)) continue;
|
||||
return ref.pBlock;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
void printInfo(int pc, bool printQuick)
|
||||
{
|
||||
int listI = printQuick ? qListI : fListI;
|
||||
if (listI < 7)
|
||||
return;
|
||||
microBlockLink* linkI = printQuick ? qBlockList : fBlockList;
|
||||
for (int i = 0; i <= listI; i++)
|
||||
{
|
||||
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4;
|
||||
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
|
||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].x + (linkI->block.pState.VF[j].y << 8) + (linkI->block.pState.VF[j].z << 16) + (linkI->block.pState.VF[j].w << 24);
|
||||
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
|
||||
DevCon.WriteLn(Color_Green,
|
||||
"[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]"
|
||||
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]",
|
||||
pc, i, crc, linkI->block.pState.q,
|
||||
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v,
|
||||
linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch,
|
||||
linkI->block.pState.blockType, viCRC, vfCRC);
|
||||
linkI = linkI->next;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// microVU rec structs
|
||||
|
|
|
@ -290,7 +290,7 @@ void normBranchCompile(microVU& mVU, u32 branchPC)
|
|||
{
|
||||
microBlock* pBlock;
|
||||
blockCreate(branchPC / 8);
|
||||
pBlock = mVUblocks[branchPC / 8]->search((microRegInfo*)&mVUregs);
|
||||
pBlock = mVUblocks[branchPC / 8]->search(mVU, (microRegInfo*)&mVUregs);
|
||||
if (pBlock)
|
||||
xJMP(pBlock->x86ptrStart);
|
||||
else
|
||||
|
@ -540,7 +540,7 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
|
|||
microBlock* bBlock;
|
||||
incPC2(1); // Check if Branch Non-Taken Side has already been recompiled
|
||||
blockCreate(iPC / 2);
|
||||
bBlock = mVUblocks[iPC / 2]->search((microRegInfo*)&mVUregs);
|
||||
bBlock = mVUblocks[iPC / 2]->search(mVU, (microRegInfo*)&mVUregs);
|
||||
incPC2(-1);
|
||||
if (bBlock) // Branch non-taken has already been compiled
|
||||
{
|
||||
|
|
|
@ -531,7 +531,7 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr)
|
|||
memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
|
||||
}
|
||||
mVUblock.x86ptrStart = thisPtr;
|
||||
mVUpBlock = mVUblocks[mVUstartPC / 2]->add(&mVUblock); // Add this block to block manager
|
||||
mVUpBlock = mVUblocks[mVUstartPC / 2]->add(mVU, &mVUblock); // Add this block to block manager
|
||||
mVUregs.needExactMatch = (mVUpBlock->pState.blockType) ? 7 : 0; // ToDo: Fix 1-Op block flag linking (MGS2:Demo/Sly Cooper)
|
||||
mVUregs.blockType = 0;
|
||||
mVUregs.viBackUp = 0;
|
||||
|
@ -988,7 +988,7 @@ perf_and_return:
|
|||
// Returns the entry point of the block (compiles it if not found)
|
||||
__fi void* mVUentryGet(microVU& mVU, microBlockManager* block, u32 startPC, uptr pState)
|
||||
{
|
||||
microBlock* pBlock = block->search((microRegInfo*)pState);
|
||||
microBlock* pBlock = block->search(mVU, (microRegInfo*)pState);
|
||||
if (pBlock)
|
||||
return pBlock->x86ptrStart;
|
||||
else
|
||||
|
|
|
@ -31,7 +31,7 @@ static bool mvuNeedsFPCRUpdate(mV)
|
|||
// Generates the code for entering/exit recompiled blocks
|
||||
void mVUdispatcherAB(mV)
|
||||
{
|
||||
mVU.startFunct = x86Ptr;
|
||||
mVU.startFunct = xGetAlignedCallTarget();
|
||||
|
||||
{
|
||||
xScopedStackFrame frame(false, true);
|
||||
|
@ -92,9 +92,6 @@ void mVUdispatcherAB(mV)
|
|||
|
||||
xRET();
|
||||
|
||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||
|
||||
Perf::any.Register(mVU.startFunct, static_cast<u32>(xGetPtr() - mVU.startFunct),
|
||||
mVU.index ? "VU1StartFunc" : "VU0StartFunc");
|
||||
}
|
||||
|
@ -102,7 +99,7 @@ void mVUdispatcherAB(mV)
|
|||
// Generates the code for resuming/exit xgkick
|
||||
void mVUdispatcherCD(mV)
|
||||
{
|
||||
mVU.startFunctXG = x86Ptr;
|
||||
mVU.startFunctXG = xGetAlignedCallTarget();
|
||||
|
||||
{
|
||||
xScopedStackFrame frame(false, true);
|
||||
|
@ -135,17 +132,13 @@ void mVUdispatcherCD(mV)
|
|||
|
||||
xRET();
|
||||
|
||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||
|
||||
Perf::any.Register(mVU.startFunctXG, static_cast<u32>(xGetPtr() - mVU.startFunctXG),
|
||||
mVU.index ? "VU1StartFuncXG" : "VU0StartFuncXG");
|
||||
}
|
||||
|
||||
void mvuGenerateWaitMTVU(mV)
|
||||
static void mVUGenerateWaitMTVU(mV)
|
||||
{
|
||||
xAlignCallTarget();
|
||||
mVU.waitMTVU = x86Ptr;
|
||||
mVU.waitMTVU = xGetAlignedCallTarget();
|
||||
|
||||
int num_xmms = 0, num_gprs = 0;
|
||||
|
||||
|
@ -215,17 +208,13 @@ void mvuGenerateWaitMTVU(mV)
|
|||
|
||||
xRET();
|
||||
|
||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||
|
||||
Perf::any.Register(mVU.waitMTVU, static_cast<u32>(xGetPtr() - mVU.waitMTVU),
|
||||
mVU.index ? "VU1WaitMTVU" : "VU0WaitMTVU");
|
||||
}
|
||||
|
||||
void mvuGenerateCopyPipelineState(mV)
|
||||
static void mVUGenerateCopyPipelineState(mV)
|
||||
{
|
||||
xAlignCallTarget();
|
||||
mVU.copyPLState = x86Ptr;
|
||||
mVU.copyPLState = xGetAlignedCallTarget();
|
||||
|
||||
if (x86caps.hasAVX2)
|
||||
{
|
||||
|
@ -258,13 +247,76 @@ void mvuGenerateCopyPipelineState(mV)
|
|||
|
||||
xRET();
|
||||
|
||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||
|
||||
Perf::any.Register(mVU.copyPLState, static_cast<u32>(xGetPtr() - mVU.copyPLState),
|
||||
mVU.index ? "VU1CopyPLState" : "VU0CopyPLState");
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Custom Quick Search
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Generates a custom optimized block-search function
|
||||
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
|
||||
static void mVUGenerateCompareState(mV)
|
||||
{
|
||||
mVU.compareStateF = xGetAlignedCallTarget();
|
||||
|
||||
if (!x86caps.hasAVX2)
|
||||
{
|
||||
xMOVAPS (xmm0, ptr32[arg1reg]);
|
||||
xPCMP.EQD(xmm0, ptr32[arg2reg]);
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xXOR (eax, 0xf);
|
||||
xForwardJNZ8 exitPoint;
|
||||
|
||||
xMOVAPS (xmm0, ptr32[arg1reg + 0x20]);
|
||||
xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]);
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x30]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x40]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x40]);
|
||||
xMOVAPS (xmm2, ptr32[arg1reg + 0x50]);
|
||||
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x50]);
|
||||
xPAND (xmm1, xmm2);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xXOR(eax, 0xf);
|
||||
|
||||
exitPoint.SetTarget();
|
||||
}
|
||||
else
|
||||
{
|
||||
// We have to use unaligned loads here, because the blocks are only 16 byte aligned.
|
||||
xVMOVUPS(ymm0, ptr[arg1reg]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg]);
|
||||
xVPMOVMSKB(eax, ymm0);
|
||||
xXOR(eax, 0xffffffff);
|
||||
xForwardJNZ8 exitPoint;
|
||||
|
||||
xVMOVUPS(ymm0, ptr[arg1reg + 0x20]);
|
||||
xVMOVUPS(ymm1, ptr[arg1reg + 0x40]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
|
||||
xVPCMP.EQD(ymm1, ymm1, ptr[arg2reg + 0x40]);
|
||||
xVPAND(ymm0, ymm0, ymm1);
|
||||
|
||||
xVPMOVMSKB(eax, ymm0);
|
||||
xNOT(eax);
|
||||
|
||||
exitPoint.SetTarget();
|
||||
xVZEROUPPER();
|
||||
}
|
||||
|
||||
xRET();
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Execution Functions
|
||||
//------------------------------------------------------------------
|
||||
|
|
|
@ -190,18 +190,6 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
|
|||
#define opCase3 if (opCase == 3) // I Opcodes
|
||||
#define opCase4 if (opCase == 4) // Q Opcodes
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Define mVUquickSearch
|
||||
//------------------------------------------------------------------
|
||||
alignas(__pagesize) extern u8 mVUsearchXMM[__pagesize];
|
||||
typedef u32 (*mVUCall)(void*, void*);
|
||||
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0)
|
||||
#define mVUemitSearch() \
|
||||
{ \
|
||||
mVUcustomSearch(); \
|
||||
}
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Misc Macros...
|
||||
#define mVUcurProg mVU.prog.cur[0]
|
||||
#define mVUblocks mVU.prog.cur->block
|
||||
|
|
|
@ -606,73 +606,3 @@ void SSE_DIVSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, co
|
|||
{
|
||||
clampOp(xDIV.SS, false);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Custom Quick Search
|
||||
//------------------------------------------------------------------
|
||||
|
||||
alignas(__pagesize) u8 mVUsearchXMM[__pagesize];
|
||||
|
||||
// Generates a custom optimized block-search function
|
||||
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
|
||||
void mVUcustomSearch()
|
||||
{
|
||||
HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ReadWrite());
|
||||
memset(mVUsearchXMM, 0xcc, __pagesize);
|
||||
xSetPtr(mVUsearchXMM);
|
||||
|
||||
if (!x86caps.hasAVX2)
|
||||
{
|
||||
xMOVAPS (xmm0, ptr32[arg1reg]);
|
||||
xPCMP.EQD(xmm0, ptr32[arg2reg]);
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xXOR (eax, 0xf);
|
||||
xForwardJNZ8 exitPoint;
|
||||
|
||||
xMOVAPS (xmm0, ptr32[arg1reg + 0x20]);
|
||||
xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]);
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x30]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x40]);
|
||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x40]);
|
||||
xMOVAPS (xmm2, ptr32[arg1reg + 0x50]);
|
||||
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x50]);
|
||||
xPAND (xmm1, xmm2);
|
||||
xPAND (xmm0, xmm1);
|
||||
|
||||
xMOVMSKPS(eax, xmm0);
|
||||
xXOR(eax, 0xf);
|
||||
|
||||
exitPoint.SetTarget();
|
||||
}
|
||||
else
|
||||
{
|
||||
// We have to use unaligned loads here, because the blocks are only 16 byte aligned.
|
||||
xVMOVUPS(ymm0, ptr[arg1reg]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg]);
|
||||
xVPMOVMSKB(eax, ymm0);
|
||||
xXOR(eax, 0xffffffff);
|
||||
xForwardJNZ8 exitPoint;
|
||||
|
||||
xVMOVUPS(ymm0, ptr[arg1reg + 0x20]);
|
||||
xVMOVUPS(ymm1, ptr[arg1reg + 0x40]);
|
||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
|
||||
xVPCMP.EQD(ymm1, ymm1, ptr[arg2reg + 0x40]);
|
||||
xVPAND(ymm0, ymm0, ymm1);
|
||||
|
||||
xVPMOVMSKB(eax, ymm0);
|
||||
xNOT(eax);
|
||||
|
||||
exitPoint.SetTarget();
|
||||
xVZEROUPPER();
|
||||
}
|
||||
|
||||
xRET();
|
||||
HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ExecOnly());
|
||||
}
|
||||
|
|
|
@ -390,8 +390,6 @@ void VifUnpackSSE_Init()
|
|||
for (int c = 0; c < 4; c++)
|
||||
nVifGen(a, b, c);
|
||||
|
||||
nVifUpkExec->ForbidModification();
|
||||
|
||||
DevCon.WriteLn("Unpack function generation complete. Generated function statistics:");
|
||||
DevCon.Indent().WriteLn(
|
||||
"Reserved buffer : %u bytes @ 0x%016" PRIXPTR "\n"
|
||||
|
|
Loading…
Reference in New Issue