mirror of https://github.com/PCSX2/pcsx2.git
x86: Move dispatchers to recompiler code space
This commit is contained in:
parent
5f11ba0445
commit
377746f155
|
@ -140,12 +140,6 @@ namespace HostSys
|
||||||
|
|
||||||
extern void MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode);
|
extern void MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode);
|
||||||
|
|
||||||
template <uint size>
|
|
||||||
void MemProtectStatic(u8 (&arr)[size], const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
MemProtect(arr, size, mode);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern std::string GetFileMappingName(const char* prefix);
|
extern std::string GetFileMappingName(const char* prefix);
|
||||||
extern void* CreateSharedMemory(const char* name, size_t size);
|
extern void* CreateSharedMemory(const char* name, size_t size);
|
||||||
extern void DestroySharedMemory(void* ptr);
|
extern void DestroySharedMemory(void* ptr);
|
||||||
|
|
|
@ -32,10 +32,10 @@ namespace x86Emitter
|
||||||
|
|
||||||
// Special form for calling functions. This form automatically resolves the
|
// Special form for calling functions. This form automatically resolves the
|
||||||
// correct displacement based on the size of the instruction being generated.
|
// correct displacement based on the size of the instruction being generated.
|
||||||
void operator()(void* func) const
|
void operator()(const void* func) const
|
||||||
{
|
{
|
||||||
if (isJmp)
|
if (isJmp)
|
||||||
xJccKnownTarget(Jcc_Unconditional, (void*)(uptr)func, false); // double cast to/from (uptr) needed to appease GCC
|
xJccKnownTarget(Jcc_Unconditional, (const void*)(uptr)func, false); // double cast to/from (uptr) needed to appease GCC
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// calls are relative to the instruction after this one, and length is
|
// calls are relative to the instruction after this one, and length is
|
||||||
|
@ -58,32 +58,32 @@ namespace x86Emitter
|
||||||
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
|
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
|
||||||
// some registers. But I think it is enough to handle the first call.
|
// some registers. But I think it is enough to handle the first call.
|
||||||
|
|
||||||
void operator()(void* f, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const;
|
void operator()(const void* f, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const;
|
||||||
|
|
||||||
void operator()(void* f, u32 a1, const xRegister32& a2) const;
|
void operator()(const void* f, u32 a1, const xRegister32& a2) const;
|
||||||
void operator()(void* f, const xIndirect32& a1) const;
|
void operator()(const void* f, const xIndirect32& a1) const;
|
||||||
void operator()(void* f, u32 a1, u32 a2) const;
|
void operator()(const void* f, u32 a1, u32 a2) const;
|
||||||
void operator()(void* f, void* a1) const;
|
void operator()(const void* f, void* a1) const;
|
||||||
|
|
||||||
void operator()(void* f, const xRegisterLong& a1, const xRegisterLong& a2 = xEmptyReg) const;
|
void operator()(const void* f, const xRegisterLong& a1, const xRegisterLong& a2 = xEmptyReg) const;
|
||||||
void operator()(void* f, u32 a1, const xRegisterLong& a2) const;
|
void operator()(const void* f, u32 a1, const xRegisterLong& a2) const;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__fi void operator()(T* func, u32 a1, const xRegisterLong& a2 = xEmptyReg) const
|
__fi void operator()(T* func, u32 a1, const xRegisterLong& a2 = xEmptyReg) const
|
||||||
{
|
{
|
||||||
(*this)((void*)func, a1, a2);
|
(*this)((const void*)func, a1, a2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__fi void operator()(T* func, const xIndirect32& a1) const
|
__fi void operator()(T* func, const xIndirect32& a1) const
|
||||||
{
|
{
|
||||||
(*this)((void*)func, a1);
|
(*this)((const void*)func, a1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__fi void operator()(T* func, u32 a1, u32 a2) const
|
__fi void operator()(T* func, u32 a1, u32 a2) const
|
||||||
{
|
{
|
||||||
(*this)((void*)func, a1, a2);
|
(*this)((const void*)func, a1, a2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator()(const xIndirectNative& f, const xRegisterLong& a1 = xEmptyReg, const xRegisterLong& a2 = xEmptyReg) const;
|
void operator()(const xIndirectNative& f, const xRegisterLong& a1 = xEmptyReg, const xRegisterLong& a2 = xEmptyReg) const;
|
||||||
|
|
|
@ -78,7 +78,7 @@ namespace x86Emitter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void xImpl_FastCall::operator()(void* f, const xRegister32& a1, const xRegister32& a2) const
|
void xImpl_FastCall::operator()(const void* f, const xRegister32& a1, const xRegister32& a2) const
|
||||||
{
|
{
|
||||||
prepareRegsForFastcall(a1, a2);
|
prepareRegsForFastcall(a1, a2);
|
||||||
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
||||||
|
@ -93,7 +93,7 @@ namespace x86Emitter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void xImpl_FastCall::operator()(void* f, const xRegisterLong& a1, const xRegisterLong& a2) const
|
void xImpl_FastCall::operator()(const void* f, const xRegisterLong& a1, const xRegisterLong& a2) const
|
||||||
{
|
{
|
||||||
prepareRegsForFastcall(a1, a2);
|
prepareRegsForFastcall(a1, a2);
|
||||||
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
||||||
|
@ -108,7 +108,7 @@ namespace x86Emitter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void xImpl_FastCall::operator()(void* f, u32 a1, const xRegisterLong& a2) const
|
void xImpl_FastCall::operator()(const void* f, u32 a1, const xRegisterLong& a2) const
|
||||||
{
|
{
|
||||||
if (!a2.IsEmpty())
|
if (!a2.IsEmpty())
|
||||||
{
|
{
|
||||||
|
@ -118,13 +118,13 @@ namespace x86Emitter
|
||||||
(*this)(f, arg1reg, arg2reg);
|
(*this)(f, arg1reg, arg2reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void xImpl_FastCall::operator()(void* f, void* a1) const
|
void xImpl_FastCall::operator()(const void* f, void* a1) const
|
||||||
{
|
{
|
||||||
xLEA(arg1reg, ptr[a1]);
|
xLEA(arg1reg, ptr[a1]);
|
||||||
(*this)(f, arg1reg, arg2reg);
|
(*this)(f, arg1reg, arg2reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void xImpl_FastCall::operator()(void* f, u32 a1, const xRegister32& a2) const
|
void xImpl_FastCall::operator()(const void* f, u32 a1, const xRegister32& a2) const
|
||||||
{
|
{
|
||||||
if (!a2.IsEmpty())
|
if (!a2.IsEmpty())
|
||||||
{
|
{
|
||||||
|
@ -134,13 +134,13 @@ namespace x86Emitter
|
||||||
(*this)(f, arg1regd, arg2regd);
|
(*this)(f, arg1regd, arg2regd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void xImpl_FastCall::operator()(void* f, const xIndirect32& a1) const
|
void xImpl_FastCall::operator()(const void* f, const xIndirect32& a1) const
|
||||||
{
|
{
|
||||||
xMOV(arg1regd, a1);
|
xMOV(arg1regd, a1);
|
||||||
(*this)(f, arg1regd);
|
(*this)(f, arg1regd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void xImpl_FastCall::operator()(void* f, u32 a1, u32 a2) const
|
void xImpl_FastCall::operator()(const void* f, u32 a1, u32 a2) const
|
||||||
{
|
{
|
||||||
xMOV(arg1regd, a1);
|
xMOV(arg1regd, a1);
|
||||||
xMOV(arg2regd, a2);
|
xMOV(arg2regd, a2);
|
||||||
|
|
|
@ -144,13 +144,6 @@ emitterT void x86SetJ32A(u32* j32)
|
||||||
x86SetJ32(j32);
|
x86SetJ32(j32);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
|
||||||
emitterT void x86Align(int bytes)
|
|
||||||
{
|
|
||||||
// forward align
|
|
||||||
x86Ptr = (u8*)(((uptr)x86Ptr + bytes - 1) & ~(bytes - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
/********************/
|
/********************/
|
||||||
/* IX86 instructions */
|
/* IX86 instructions */
|
||||||
/********************/
|
/********************/
|
||||||
|
|
|
@ -29,15 +29,12 @@
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// legacy jump/align functions
|
// legacy jump/align functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
ATTR_DEP extern void x86SetPtr(u8* ptr);
|
|
||||||
ATTR_DEP extern void x86SetJ8(u8* j8);
|
ATTR_DEP extern void x86SetJ8(u8* j8);
|
||||||
ATTR_DEP extern void x86SetJ8A(u8* j8);
|
ATTR_DEP extern void x86SetJ8A(u8* j8);
|
||||||
ATTR_DEP extern void x86SetJ16(u16* j16);
|
ATTR_DEP extern void x86SetJ16(u16* j16);
|
||||||
ATTR_DEP extern void x86SetJ16A(u16* j16);
|
ATTR_DEP extern void x86SetJ16A(u16* j16);
|
||||||
ATTR_DEP extern void x86SetJ32(u32* j32);
|
ATTR_DEP extern void x86SetJ32(u32* j32);
|
||||||
ATTR_DEP extern void x86SetJ32A(u32* j32);
|
ATTR_DEP extern void x86SetJ32A(u32* j32);
|
||||||
ATTR_DEP extern void x86Align(int bytes);
|
|
||||||
ATTR_DEP extern void x86AlignExecutable(int align);
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
|
|
|
@ -38,7 +38,6 @@ GSDrawScanline::GSDrawScanline()
|
||||||
: m_sp_map("GSSetupPrim")
|
: m_sp_map("GSSetupPrim")
|
||||||
, m_ds_map("GSDrawScanline")
|
, m_ds_map("GSDrawScanline")
|
||||||
{
|
{
|
||||||
GSCodeReserve::GetInstance().AllowModification();
|
|
||||||
GSCodeReserve::GetInstance().Reset();
|
GSCodeReserve::GetInstance().Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,8 +45,6 @@ GSDrawScanline::~GSDrawScanline()
|
||||||
{
|
{
|
||||||
if (const size_t used = GSCodeReserve::GetInstance().GetMemoryUsed(); used > 0)
|
if (const size_t used = GSCodeReserve::GetInstance().GetMemoryUsed(); used > 0)
|
||||||
DevCon.WriteLn("SW JIT generated %zu bytes of code", used);
|
DevCon.WriteLn("SW JIT generated %zu bytes of code", used);
|
||||||
|
|
||||||
GSCodeReserve::GetInstance().ForbidModification();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanline::BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local)
|
void GSDrawScanline::BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local)
|
||||||
|
|
|
@ -334,13 +334,3 @@ void RecompiledCodeReserve::Reset()
|
||||||
std::memset(m_baseptr, 0xCC, m_size);
|
std::memset(m_baseptr, 0xCC, m_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RecompiledCodeReserve::AllowModification()
|
|
||||||
{
|
|
||||||
HostSys::MemProtect(m_baseptr, m_size, PageAccess_Any());
|
|
||||||
}
|
|
||||||
|
|
||||||
void RecompiledCodeReserve::ForbidModification()
|
|
||||||
{
|
|
||||||
HostSys::MemProtect(m_baseptr, m_size, PageProtectionMode().Read().Execute());
|
|
||||||
}
|
|
||||||
|
|
|
@ -161,9 +161,6 @@ public:
|
||||||
void Assign(VirtualMemoryManagerPtr allocator, size_t offset, size_t size);
|
void Assign(VirtualMemoryManagerPtr allocator, size_t offset, size_t size);
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
void ForbidModification();
|
|
||||||
void AllowModification();
|
|
||||||
|
|
||||||
operator u8*() { return m_baseptr; }
|
operator u8*() { return m_baseptr; }
|
||||||
operator const u8*() const { return m_baseptr; }
|
operator const u8*() const { return m_baseptr; }
|
||||||
};
|
};
|
|
@ -1245,9 +1245,6 @@ void vtlb_Init()
|
||||||
// The LUT is only used for 1 game so we allocate it only when the gamefix is enabled (save 4MB)
|
// The LUT is only used for 1 game so we allocate it only when the gamefix is enabled (save 4MB)
|
||||||
if (EmuConfig.Gamefixes.GoemonTlbHack)
|
if (EmuConfig.Gamefixes.GoemonTlbHack)
|
||||||
vtlb_Alloc_Ppmap();
|
vtlb_Alloc_Ppmap();
|
||||||
|
|
||||||
extern void vtlb_dynarec_init();
|
|
||||||
vtlb_dynarec_init();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// vtlb_Reset -- Performs a COP0-level reset of the PS2's TLB.
|
// vtlb_Reset -- Performs a COP0-level reset of the PS2's TLB.
|
||||||
|
|
|
@ -124,6 +124,8 @@ extern int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, vtlb_ReadRegAlloc
|
||||||
extern void vtlb_DynGenWrite(u32 sz, bool xmm, int addr_reg, int value_reg);
|
extern void vtlb_DynGenWrite(u32 sz, bool xmm, int addr_reg, int value_reg);
|
||||||
extern void vtlb_DynGenWrite_Const(u32 bits, bool xmm, u32 addr_const, int value_reg);
|
extern void vtlb_DynGenWrite_Const(u32 bits, bool xmm, u32 addr_const, int value_reg);
|
||||||
|
|
||||||
|
extern void vtlb_DynGenDispatchers();
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
// VtlbMemoryReserve
|
// VtlbMemoryReserve
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
|
|
|
@ -165,9 +165,9 @@ public:
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetJITCompile(void (*recompiler_)())
|
void SetJITCompile(const void *recompiler_)
|
||||||
{
|
{
|
||||||
recompiler = (uptr)recompiler_;
|
recompiler = reinterpret_cast<uptr>(recompiler_);
|
||||||
}
|
}
|
||||||
|
|
||||||
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
|
BASEBLOCKEX* New(u32 startpc, uptr fnptr);
|
||||||
|
|
|
@ -171,19 +171,14 @@ static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* forma
|
||||||
// Dynamically Compiled Dispatchers - R3000A style
|
// Dynamically Compiled Dispatchers - R3000A style
|
||||||
// =====================================================================================================
|
// =====================================================================================================
|
||||||
|
|
||||||
static void iopRecRecompile(const u32 startpc);
|
static void iopRecRecompile(u32 startpc);
|
||||||
|
|
||||||
// Recompiled code buffer for EE recompiler dispatchers!
|
static const void* iopDispatcherEvent = nullptr;
|
||||||
alignas(__pagesize) static u8 iopRecDispatchers[__pagesize];
|
static const void* iopDispatcherReg = nullptr;
|
||||||
|
static const void* iopJITCompile = nullptr;
|
||||||
typedef void DynGenFunc();
|
static const void* iopJITCompileInBlock = nullptr;
|
||||||
|
static const void* iopEnterRecompiledCode = nullptr;
|
||||||
static DynGenFunc* iopDispatcherEvent = NULL;
|
static const void* iopExitRecompiledCode = nullptr;
|
||||||
static DynGenFunc* iopDispatcherReg = NULL;
|
|
||||||
static DynGenFunc* iopJITCompile = NULL;
|
|
||||||
static DynGenFunc* iopJITCompileInBlock = NULL;
|
|
||||||
static DynGenFunc* iopEnterRecompiledCode = NULL;
|
|
||||||
static DynGenFunc* iopExitRecompiledCode = NULL;
|
|
||||||
|
|
||||||
static void recEventTest()
|
static void recEventTest()
|
||||||
{
|
{
|
||||||
|
@ -192,7 +187,7 @@ static void recEventTest()
|
||||||
|
|
||||||
// The address for all cleared blocks. It recompiles the current pc and then
|
// The address for all cleared blocks. It recompiles the current pc and then
|
||||||
// dispatches to the recompiled block address.
|
// dispatches to the recompiled block address.
|
||||||
static DynGenFunc* _DynGen_JITCompile()
|
static const void* _DynGen_JITCompile()
|
||||||
{
|
{
|
||||||
pxAssertMsg(iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks.");
|
pxAssertMsg(iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks.");
|
||||||
|
|
||||||
|
@ -206,18 +201,18 @@ static DynGenFunc* _DynGen_JITCompile()
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DynGenFunc* _DynGen_JITCompileInBlock()
|
static const void* _DynGen_JITCompileInBlock()
|
||||||
{
|
{
|
||||||
u8* retval = xGetPtr();
|
u8* retval = xGetPtr();
|
||||||
xJMP((void*)iopJITCompile);
|
xJMP((void*)iopJITCompile);
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
// called when jumping to variable pc address
|
// called when jumping to variable pc address
|
||||||
static DynGenFunc* _DynGen_DispatcherReg()
|
static const void* _DynGen_DispatcherReg()
|
||||||
{
|
{
|
||||||
u8* retval = xGetPtr();
|
u8* retval = xGetPtr();
|
||||||
|
|
||||||
|
@ -227,13 +222,13 @@ static DynGenFunc* _DynGen_DispatcherReg()
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
// EnterRecompiledCode - dynamic compilation stub!
|
// EnterRecompiledCode - dynamic compilation stub!
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
static DynGenFunc* _DynGen_EnterRecompiledCode()
|
static const void* _DynGen_EnterRecompiledCode()
|
||||||
{
|
{
|
||||||
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
|
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
|
||||||
// allocating any room on the stack for it (which is important since the IOP's entry
|
// allocating any room on the stack for it (which is important since the IOP's entry
|
||||||
|
@ -251,27 +246,21 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
||||||
xJMP((void*)iopDispatcherReg);
|
xJMP((void*)iopDispatcherReg);
|
||||||
|
|
||||||
// Save an exit point
|
// Save an exit point
|
||||||
iopExitRecompiledCode = (DynGenFunc*)xGetPtr();
|
iopExitRecompiledCode = xGetPtr();
|
||||||
}
|
}
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _DynGen_Dispatchers()
|
static void _DynGen_Dispatchers()
|
||||||
{
|
{
|
||||||
// In case init gets called multiple times:
|
const u8* start = xGetAlignedCallTarget();
|
||||||
HostSys::MemProtectStatic(iopRecDispatchers, PageAccess_ReadWrite());
|
|
||||||
|
|
||||||
// clear the buffer to 0xcc (easier debugging).
|
|
||||||
memset(iopRecDispatchers, 0xcc, __pagesize);
|
|
||||||
|
|
||||||
xSetPtr(iopRecDispatchers);
|
|
||||||
|
|
||||||
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
||||||
// most and stand to benefit from strong alignment and direct referencing.
|
// most and stand to benefit from strong alignment and direct referencing.
|
||||||
iopDispatcherEvent = (DynGenFunc*)xGetPtr();
|
iopDispatcherEvent = xGetPtr();
|
||||||
xFastCall((void*)recEventTest);
|
xFastCall((void*)recEventTest);
|
||||||
iopDispatcherReg = _DynGen_DispatcherReg();
|
iopDispatcherReg = _DynGen_DispatcherReg();
|
||||||
|
|
||||||
|
@ -279,11 +268,9 @@ static void _DynGen_Dispatchers()
|
||||||
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
|
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
|
||||||
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
|
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
|
||||||
|
|
||||||
HostSys::MemProtectStatic(iopRecDispatchers, PageAccess_ExecOnly());
|
|
||||||
|
|
||||||
recBlocks.SetJITCompile(iopJITCompile);
|
recBlocks.SetJITCompile(iopJITCompile);
|
||||||
|
|
||||||
Perf::any.Register((void*)iopRecDispatchers, 4096, "IOP Dispatcher");
|
Perf::any.Register(start, xGetPtr() - start, "IOP Dispatcher");
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
|
@ -931,8 +918,6 @@ static void recAlloc()
|
||||||
if (!s_pInstCache)
|
if (!s_pInstCache)
|
||||||
pxFailRel("Failed to allocate R3000 InstCache array.");
|
pxFailRel("Failed to allocate R3000 InstCache array.");
|
||||||
}
|
}
|
||||||
|
|
||||||
_DynGen_Dispatchers();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void recResetIOP()
|
void recResetIOP()
|
||||||
|
@ -941,6 +926,9 @@ void recResetIOP()
|
||||||
|
|
||||||
recAlloc();
|
recAlloc();
|
||||||
recMem->Reset();
|
recMem->Reset();
|
||||||
|
xSetPtr(*recMem);
|
||||||
|
_DynGen_Dispatchers();
|
||||||
|
recPtr = xGetPtr();
|
||||||
|
|
||||||
iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
|
iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
|
||||||
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) / 4)));
|
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) / 4)));
|
||||||
|
@ -990,7 +978,6 @@ void recResetIOP()
|
||||||
recBlocks.Reset();
|
recBlocks.Reset();
|
||||||
g_psxMaxRecMem = 0;
|
g_psxMaxRecMem = 0;
|
||||||
|
|
||||||
recPtr = *recMem;
|
|
||||||
psxbranch = 0;
|
psxbranch = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1036,7 +1023,7 @@ static __noinline s32 recExecuteBlock(s32 eeCycles)
|
||||||
// mov edx,dword ptr [iopCycleEE (832A84h)]
|
// mov edx,dword ptr [iopCycleEE (832A84h)]
|
||||||
// lea eax,[edx+ecx]
|
// lea eax,[edx+ecx]
|
||||||
|
|
||||||
iopEnterRecompiledCode();
|
((void(*)())iopEnterRecompiledCode)();
|
||||||
|
|
||||||
return psxRegs.iopBreak + psxRegs.iopCycleEE;
|
return psxRegs.iopBreak + psxRegs.iopCycleEE;
|
||||||
}
|
}
|
||||||
|
@ -1579,9 +1566,8 @@ static void iopRecRecompile(const u32 startpc)
|
||||||
recResetIOP();
|
recResetIOP();
|
||||||
}
|
}
|
||||||
|
|
||||||
x86SetPtr(recPtr);
|
xSetPtr(recPtr);
|
||||||
x86Align(16);
|
recPtr = xGetAlignedCallTarget();
|
||||||
recPtr = x86Ptr;
|
|
||||||
|
|
||||||
s_pCurBlock = PSX_GETBLOCK(startpc);
|
s_pCurBlock = PSX_GETBLOCK(startpc);
|
||||||
|
|
||||||
|
|
|
@ -360,19 +360,14 @@ static void recRecompile(const u32 startpc);
|
||||||
static void dyna_block_discard(u32 start, u32 sz);
|
static void dyna_block_discard(u32 start, u32 sz);
|
||||||
static void dyna_page_reset(u32 start, u32 sz);
|
static void dyna_page_reset(u32 start, u32 sz);
|
||||||
|
|
||||||
// Recompiled code buffer for EE recompiler dispatchers!
|
static const void* DispatcherEvent = nullptr;
|
||||||
alignas(__pagesize) static u8 eeRecDispatchers[__pagesize];
|
static const void* DispatcherReg = nullptr;
|
||||||
|
static const void* JITCompile = nullptr;
|
||||||
typedef void DynGenFunc();
|
static const void* JITCompileInBlock = nullptr;
|
||||||
|
static const void* EnterRecompiledCode = nullptr;
|
||||||
static DynGenFunc* DispatcherEvent = NULL;
|
static const void* ExitRecompiledCode = nullptr;
|
||||||
static DynGenFunc* DispatcherReg = NULL;
|
static const void* DispatchBlockDiscard = nullptr;
|
||||||
static DynGenFunc* JITCompile = NULL;
|
static const void* DispatchPageReset = nullptr;
|
||||||
static DynGenFunc* JITCompileInBlock = NULL;
|
|
||||||
static DynGenFunc* EnterRecompiledCode = NULL;
|
|
||||||
static DynGenFunc* ExitRecompiledCode = NULL;
|
|
||||||
static DynGenFunc* DispatchBlockDiscard = NULL;
|
|
||||||
static DynGenFunc* DispatchPageReset = NULL;
|
|
||||||
|
|
||||||
static void recEventTest()
|
static void recEventTest()
|
||||||
{
|
{
|
||||||
|
@ -387,13 +382,13 @@ static void recEventTest()
|
||||||
|
|
||||||
// The address for all cleared blocks. It recompiles the current pc and then
|
// The address for all cleared blocks. It recompiles the current pc and then
|
||||||
// dispatches to the recompiled block address.
|
// dispatches to the recompiled block address.
|
||||||
static DynGenFunc* _DynGen_JITCompile()
|
static const void* _DynGen_JITCompile()
|
||||||
{
|
{
|
||||||
pxAssertMsg(DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks.");
|
pxAssertMsg(DispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks.");
|
||||||
|
|
||||||
u8* retval = xGetAlignedCallTarget();
|
u8* retval = xGetAlignedCallTarget();
|
||||||
|
|
||||||
xFastCall((void*)recRecompile, ptr32[&cpuRegs.pc]);
|
xFastCall((const void*)recRecompile, ptr32[&cpuRegs.pc]);
|
||||||
|
|
||||||
// C equivalent:
|
// C equivalent:
|
||||||
// u32 addr = cpuRegs.pc;
|
// u32 addr = cpuRegs.pc;
|
||||||
|
@ -405,18 +400,18 @@ static DynGenFunc* _DynGen_JITCompile()
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DynGenFunc* _DynGen_JITCompileInBlock()
|
static const void* _DynGen_JITCompileInBlock()
|
||||||
{
|
{
|
||||||
u8* retval = xGetAlignedCallTarget();
|
u8* retval = xGetAlignedCallTarget();
|
||||||
xJMP((void*)JITCompile);
|
xJMP(JITCompile);
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
// called when jumping to variable pc address
|
// called when jumping to variable pc address
|
||||||
static DynGenFunc* _DynGen_DispatcherReg()
|
static const void* _DynGen_DispatcherReg()
|
||||||
{
|
{
|
||||||
u8* retval = xGetPtr(); // fallthrough target, can't align it!
|
u8* retval = xGetPtr(); // fallthrough target, can't align it!
|
||||||
|
|
||||||
|
@ -430,19 +425,19 @@ static DynGenFunc* _DynGen_DispatcherReg()
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DynGenFunc* _DynGen_DispatcherEvent()
|
static const void* _DynGen_DispatcherEvent()
|
||||||
{
|
{
|
||||||
u8* retval = xGetPtr();
|
u8* retval = xGetPtr();
|
||||||
|
|
||||||
xFastCall((void*)recEventTest);
|
xFastCall((const void*)recEventTest);
|
||||||
|
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DynGenFunc* _DynGen_EnterRecompiledCode()
|
static const void* _DynGen_EnterRecompiledCode()
|
||||||
{
|
{
|
||||||
pxAssertDev(DispatcherReg != NULL, "Dynamically generated dispatchers are required prior to generating EnterRecompiledCode!");
|
pxAssertDev(DispatcherReg != NULL, "Dynamically generated dispatchers are required prior to generating EnterRecompiledCode!");
|
||||||
|
|
||||||
|
@ -461,39 +456,33 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
||||||
xJMP((void*)DispatcherReg);
|
xJMP((void*)DispatcherReg);
|
||||||
|
|
||||||
// Save an exit point
|
// Save an exit point
|
||||||
ExitRecompiledCode = (DynGenFunc*)xGetPtr();
|
ExitRecompiledCode = xGetPtr();
|
||||||
}
|
}
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DynGenFunc* _DynGen_DispatchBlockDiscard()
|
static const void* _DynGen_DispatchBlockDiscard()
|
||||||
{
|
{
|
||||||
u8* retval = xGetPtr();
|
u8* retval = xGetPtr();
|
||||||
xFastCall((void*)dyna_block_discard);
|
xFastCall((const void*)dyna_block_discard);
|
||||||
xJMP((void*)ExitRecompiledCode);
|
xJMP((const void*)ExitRecompiledCode);
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DynGenFunc* _DynGen_DispatchPageReset()
|
static const void* _DynGen_DispatchPageReset()
|
||||||
{
|
{
|
||||||
u8* retval = xGetPtr();
|
u8* retval = xGetPtr();
|
||||||
xFastCall((void*)dyna_page_reset);
|
xFastCall((const void*)dyna_page_reset);
|
||||||
xJMP((void*)ExitRecompiledCode);
|
xJMP((const void*)ExitRecompiledCode);
|
||||||
return (DynGenFunc*)retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _DynGen_Dispatchers()
|
static void _DynGen_Dispatchers()
|
||||||
{
|
{
|
||||||
// In case init gets called multiple times:
|
const u8* start = xGetAlignedCallTarget();
|
||||||
HostSys::MemProtectStatic(eeRecDispatchers, PageAccess_ReadWrite());
|
|
||||||
|
|
||||||
// clear the buffer to 0xcc (easier debugging).
|
|
||||||
memset(eeRecDispatchers, 0xcc, __pagesize);
|
|
||||||
|
|
||||||
xSetPtr(eeRecDispatchers);
|
|
||||||
|
|
||||||
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
||||||
// most and stand to benefit from strong alignment and direct referencing.
|
// most and stand to benefit from strong alignment and direct referencing.
|
||||||
|
@ -506,11 +495,9 @@ static void _DynGen_Dispatchers()
|
||||||
DispatchBlockDiscard = _DynGen_DispatchBlockDiscard();
|
DispatchBlockDiscard = _DynGen_DispatchBlockDiscard();
|
||||||
DispatchPageReset = _DynGen_DispatchPageReset();
|
DispatchPageReset = _DynGen_DispatchPageReset();
|
||||||
|
|
||||||
HostSys::MemProtectStatic(eeRecDispatchers, PageAccess_ExecOnly());
|
|
||||||
|
|
||||||
recBlocks.SetJITCompile(JITCompile);
|
recBlocks.SetJITCompile(JITCompile);
|
||||||
|
|
||||||
Perf::any.Register((void*)eeRecDispatchers, 4096, "EE Dispatcher");
|
Perf::any.Register(start, static_cast<u32>(xGetPtr() - start), "EE Dispatcher");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -597,10 +584,6 @@ static void recAlloc()
|
||||||
if (!s_pInstCache)
|
if (!s_pInstCache)
|
||||||
pxFailRel("Failed to allocate R5900 InstCache array");
|
pxFailRel("Failed to allocate R5900 InstCache array");
|
||||||
}
|
}
|
||||||
|
|
||||||
// No errors.. Proceed with initialization:
|
|
||||||
|
|
||||||
_DynGen_Dispatchers();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
alignas(16) static u16 manual_page[Ps2MemSize::MainRam >> 12];
|
alignas(16) static u16 manual_page[Ps2MemSize::MainRam >> 12];
|
||||||
|
@ -616,6 +599,11 @@ static void recResetRaw()
|
||||||
recAlloc();
|
recAlloc();
|
||||||
|
|
||||||
recMem->Reset();
|
recMem->Reset();
|
||||||
|
xSetPtr(*recMem);
|
||||||
|
_DynGen_Dispatchers();
|
||||||
|
vtlb_DynGenDispatchers();
|
||||||
|
recPtr = xGetPtr();
|
||||||
|
|
||||||
ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize);
|
ClearRecLUT((BASEBLOCK*)recLutReserve_RAM, recLutSize);
|
||||||
memset(recRAMCopy, 0, Ps2MemSize::MainRam);
|
memset(recRAMCopy, 0, Ps2MemSize::MainRam);
|
||||||
|
|
||||||
|
@ -628,10 +616,6 @@ static void recResetRaw()
|
||||||
mmap_ResetBlockTracking();
|
mmap_ResetBlockTracking();
|
||||||
vtlb_ClearLoadStoreInfo();
|
vtlb_ClearLoadStoreInfo();
|
||||||
|
|
||||||
x86SetPtr(*recMem);
|
|
||||||
|
|
||||||
recPtr = *recMem;
|
|
||||||
|
|
||||||
g_branch = 0;
|
g_branch = 0;
|
||||||
g_resetEeScalingStats = true;
|
g_resetEeScalingStats = true;
|
||||||
}
|
}
|
||||||
|
@ -644,7 +628,7 @@ static void recShutdown()
|
||||||
|
|
||||||
recBlocks.Reset();
|
recBlocks.Reset();
|
||||||
|
|
||||||
recRAM = recROM = recROM1 = recROM2 = NULL;
|
recRAM = recROM = recROM1 = recROM2 = nullptr;
|
||||||
|
|
||||||
safe_free(s_pInstCache);
|
safe_free(s_pInstCache);
|
||||||
s_nInstCacheSize = 0;
|
s_nInstCacheSize = 0;
|
||||||
|
@ -720,13 +704,7 @@ static void recExecute()
|
||||||
if (!fastjmp_set(&m_SetJmp_StateCheck))
|
if (!fastjmp_set(&m_SetJmp_StateCheck))
|
||||||
{
|
{
|
||||||
eeCpuExecuting = true;
|
eeCpuExecuting = true;
|
||||||
|
((void(*)())EnterRecompiledCode)();
|
||||||
// Important! Most of the console logging and such has cancel points in it. This is great
|
|
||||||
// in Windows, where SEH lets us safely kill a thread from anywhere we want. This is bad
|
|
||||||
// in Linux, which cannot have a C++ exception cross the recompiler. Hence the changing
|
|
||||||
// of the cancelstate here!
|
|
||||||
|
|
||||||
EnterRecompiledCode();
|
|
||||||
|
|
||||||
// Generally unreachable code here ...
|
// Generally unreachable code here ...
|
||||||
}
|
}
|
||||||
|
@ -1636,11 +1614,17 @@ void recMemcheck(u32 op, u32 bits, bool store)
|
||||||
// Preserve ecx (address) and edx (address+size) because we aren't breaking
|
// Preserve ecx (address) and edx (address+size) because we aren't breaking
|
||||||
// out of this loops iteration and dynarecMemLogcheck will clobber them
|
// out of this loops iteration and dynarecMemLogcheck will clobber them
|
||||||
// Also keep 16 byte stack alignment
|
// Also keep 16 byte stack alignment
|
||||||
if(!(checks[i].result & MEMCHECK_BREAK))
|
if (!(checks[i].result & MEMCHECK_BREAK))
|
||||||
{
|
{
|
||||||
xPUSH(eax); xPUSH(ebx); xPUSH(ecx); xPUSH(edx);
|
xPUSH(eax);
|
||||||
|
xPUSH(ebx);
|
||||||
|
xPUSH(ecx);
|
||||||
|
xPUSH(edx);
|
||||||
xFastCall((void*)dynarecMemLogcheck, ecx, edx);
|
xFastCall((void*)dynarecMemLogcheck, ecx, edx);
|
||||||
xPOP(edx); xPOP(ecx); xPOP(ebx); xPOP(eax);
|
xPOP(edx);
|
||||||
|
xPOP(ecx);
|
||||||
|
xPOP(ebx);
|
||||||
|
xPOP(eax);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1926,7 +1910,7 @@ void recompileNextInstruction(bool delayslot, bool swapped_delay_slot)
|
||||||
std::string disasm = "";
|
std::string disasm = "";
|
||||||
disR5900Fasm(disasm, memRead32(i), i, false);
|
disR5900Fasm(disasm, memRead32(i), i, false);
|
||||||
Console.Warning("%x %s%08X %s", i, i == pc - 4 ? "*" : i == p ? "=" :
|
Console.Warning("%x %s%08X %s", i, i == pc - 4 ? "*" : i == p ? "=" :
|
||||||
" ",
|
" ",
|
||||||
memRead32(i), disasm.c_str());
|
memRead32(i), disasm.c_str());
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -1952,7 +1936,7 @@ void recompileNextInstruction(bool delayslot, bool swapped_delay_slot)
|
||||||
disasm = "";
|
disasm = "";
|
||||||
disR5900Fasm(disasm, memRead32(i), i, false);
|
disR5900Fasm(disasm, memRead32(i), i, false);
|
||||||
Console.Warning("%x %s%08X %s", i, i == pc - 4 ? "*" : i == p ? "=" :
|
Console.Warning("%x %s%08X %s", i, i == pc - 4 ? "*" : i == p ? "=" :
|
||||||
" ",
|
" ",
|
||||||
memRead32(i), disasm.c_str());
|
memRead32(i), disasm.c_str());
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -239,13 +239,9 @@ namespace vtlb_private
|
||||||
}
|
}
|
||||||
} // namespace vtlb_private
|
} // namespace vtlb_private
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
static constexpr u32 INDIRECT_DISPATCHER_SIZE = 32;
|
||||||
// allocate one page for our naked indirect dispatcher function.
|
static constexpr u32 INDIRECT_DISPATCHERS_SIZE = 2 * 5 * 2 * INDIRECT_DISPATCHER_SIZE;
|
||||||
// this *must* be a full page, since we'll give it execution permission later.
|
static u8* m_IndirectDispatchers = nullptr;
|
||||||
// If it were smaller than a page we'd end up allowing execution rights on some
|
|
||||||
// other vars additionally (bad!).
|
|
||||||
//
|
|
||||||
alignas(__pagesize) static u8 m_IndirectDispatchers[__pagesize];
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// mode - 0 for read, 1 for write!
|
// mode - 0 for read, 1 for write!
|
||||||
|
@ -255,16 +251,8 @@ static u8* GetIndirectDispatcherPtr(int mode, int operandsize, int sign = 0)
|
||||||
{
|
{
|
||||||
assert(mode || operandsize >= 3 ? !sign : true);
|
assert(mode || operandsize >= 3 ? !sign : true);
|
||||||
|
|
||||||
// Each dispatcher is aligned to 64 bytes. The actual dispatchers are only like
|
return &m_IndirectDispatchers[(mode * (8 * INDIRECT_DISPATCHER_SIZE)) + (sign * 5 * INDIRECT_DISPATCHER_SIZE) +
|
||||||
// 20-some bytes each, but 64 byte alignment on functions that are called
|
(operandsize * INDIRECT_DISPATCHER_SIZE)];
|
||||||
// more frequently than a hot sex hotline at 1:15am is probably a good thing.
|
|
||||||
|
|
||||||
// 7*64? 5 widths with two sign extension modes for 8 and 16 bit reads
|
|
||||||
|
|
||||||
// Gregory: a 32 bytes alignment is likely enough and more cache friendly
|
|
||||||
const int A = 32;
|
|
||||||
|
|
||||||
return &m_IndirectDispatchers[(mode * (8 * A)) + (sign * 5 * A) + (operandsize * A)];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
|
@ -359,18 +347,12 @@ static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
|
||||||
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the
|
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the
|
||||||
// process will be ignored.
|
// process will be ignored.
|
||||||
//
|
//
|
||||||
void vtlb_dynarec_init()
|
void vtlb_DynGenDispatchers()
|
||||||
{
|
{
|
||||||
static bool hasBeenCalled = false;
|
m_IndirectDispatchers = xGetAlignedCallTarget();
|
||||||
if (hasBeenCalled)
|
|
||||||
return;
|
|
||||||
hasBeenCalled = true;
|
|
||||||
|
|
||||||
// In case init gets called multiple times:
|
|
||||||
HostSys::MemProtectStatic(m_IndirectDispatchers, PageAccess_ReadWrite());
|
|
||||||
|
|
||||||
// clear the buffer to 0xcc (easier debugging).
|
// clear the buffer to 0xcc (easier debugging).
|
||||||
memset(m_IndirectDispatchers, 0xcc, __pagesize);
|
std::memset(m_IndirectDispatchers, 0xcc, INDIRECT_DISPATCHERS_SIZE);
|
||||||
|
|
||||||
for (int mode = 0; mode < 2; ++mode)
|
for (int mode = 0; mode < 2; ++mode)
|
||||||
{
|
{
|
||||||
|
@ -385,9 +367,9 @@ void vtlb_dynarec_init()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HostSys::MemProtectStatic(m_IndirectDispatchers, PageAccess_ExecOnly());
|
Perf::any.Register(m_IndirectDispatchers, INDIRECT_DISPATCHERS_SIZE, "TLB Dispatcher");
|
||||||
|
|
||||||
Perf::any.Register(m_IndirectDispatchers, __pagesize, "TLB Dispatcher");
|
xSetPtr(m_IndirectDispatchers + INDIRECT_DISPATCHERS_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -25,8 +25,6 @@
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU - Main Functions
|
// Micro VU - Main Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
alignas(__pagesize) static u8 vu0_RecDispatchers[mVUdispCacheSize];
|
|
||||||
alignas(__pagesize) static u8 vu1_RecDispatchers[mVUdispCacheSize];
|
|
||||||
|
|
||||||
void mVUreserveCache(microVU& mVU)
|
void mVUreserveCache(microVU& mVU)
|
||||||
{
|
{
|
||||||
|
@ -49,18 +47,12 @@ void mVUinit(microVU& mVU, uint vuIndex)
|
||||||
mVU.progSize = (mVU.index ? 0x4000 : 0x1000) / 4;
|
mVU.progSize = (mVU.index ? 0x4000 : 0x1000) / 4;
|
||||||
mVU.progMemMask = mVU.progSize-1;
|
mVU.progMemMask = mVU.progSize-1;
|
||||||
mVU.cacheSize = mVUcacheReserve;
|
mVU.cacheSize = mVUcacheReserve;
|
||||||
mVU.cache = NULL;
|
mVU.cache = nullptr;
|
||||||
mVU.dispCache = NULL;
|
mVU.startFunct = nullptr;
|
||||||
mVU.startFunct = NULL;
|
mVU.exitFunct = nullptr;
|
||||||
mVU.exitFunct = NULL;
|
|
||||||
|
|
||||||
mVUreserveCache(mVU);
|
mVUreserveCache(mVU);
|
||||||
|
|
||||||
if (vuIndex)
|
|
||||||
mVU.dispCache = vu1_RecDispatchers;
|
|
||||||
else
|
|
||||||
mVU.dispCache = vu0_RecDispatchers;
|
|
||||||
|
|
||||||
mVU.regAlloc.reset(new microRegAlloc(mVU.index));
|
mVU.regAlloc.reset(new microRegAlloc(mVU.index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,15 +74,12 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
||||||
if (resetReserve)
|
if (resetReserve)
|
||||||
mVU.cache_reserve->Reset();
|
mVU.cache_reserve->Reset();
|
||||||
|
|
||||||
HostSys::MemProtect(mVU.dispCache, mVUdispCacheSize, PageAccess_ReadWrite());
|
xSetPtr(mVU.cache);
|
||||||
memset(mVU.dispCache, 0xcc, mVUdispCacheSize);
|
|
||||||
|
|
||||||
x86SetPtr(mVU.dispCache);
|
|
||||||
mVUdispatcherAB(mVU);
|
mVUdispatcherAB(mVU);
|
||||||
mVUdispatcherCD(mVU);
|
mVUdispatcherCD(mVU);
|
||||||
mvuGenerateWaitMTVU(mVU);
|
mVUGenerateWaitMTVU(mVU);
|
||||||
mvuGenerateCopyPipelineState(mVU);
|
mVUGenerateCopyPipelineState(mVU);
|
||||||
mVUemitSearch();
|
mVUGenerateCompareState(mVU);
|
||||||
|
|
||||||
mVU.regs().nextBlockCycles = 0;
|
mVU.regs().nextBlockCycles = 0;
|
||||||
memset(&mVU.prog.lpState, 0, sizeof(mVU.prog.lpState));
|
memset(&mVU.prog.lpState, 0, sizeof(mVU.prog.lpState));
|
||||||
|
@ -104,10 +93,9 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
||||||
mVU.prog.curFrame = 0;
|
mVU.prog.curFrame = 0;
|
||||||
|
|
||||||
// Setup Dynarec Cache Limits for Each Program
|
// Setup Dynarec Cache Limits for Each Program
|
||||||
u8* z = mVU.cache;
|
mVU.prog.x86start = xGetAlignedCallTarget();
|
||||||
mVU.prog.x86start = z;
|
mVU.prog.x86ptr = mVU.prog.x86start;
|
||||||
mVU.prog.x86ptr = z;
|
mVU.prog.x86end = mVU.cache + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb);
|
||||||
mVU.prog.x86end = z + ((mVU.cacheSize - mVUcacheSafeZone) * _1mb);
|
|
||||||
|
|
||||||
for (u32 i = 0; i < (mVU.progSize / 2); i++)
|
for (u32 i = 0; i < (mVU.progSize / 2); i++)
|
||||||
{
|
{
|
||||||
|
@ -125,8 +113,6 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
||||||
mVU.prog.quick[i].block = NULL;
|
mVU.prog.quick[i].block = NULL;
|
||||||
mVU.prog.quick[i].prog = NULL;
|
mVU.prog.quick[i].prog = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
HostSys::MemProtect(mVU.dispCache, mVUdispCacheSize, PageAccess_ExecOnly());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Free Allocated Resources
|
// Free Allocated Resources
|
||||||
|
|
|
@ -37,6 +37,8 @@ using namespace x86Emitter;
|
||||||
#include "microVU_Profiler.h"
|
#include "microVU_Profiler.h"
|
||||||
#include "common/Perf.h"
|
#include "common/Perf.h"
|
||||||
|
|
||||||
|
class microBlockManager;
|
||||||
|
|
||||||
struct microBlockLink
|
struct microBlockLink
|
||||||
{
|
{
|
||||||
microBlock block;
|
microBlock block;
|
||||||
|
@ -49,135 +51,6 @@ struct microBlockLinkRef
|
||||||
u64 quick;
|
u64 quick;
|
||||||
};
|
};
|
||||||
|
|
||||||
class microBlockManager
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
microBlockLink *qBlockList, *qBlockEnd; // Quick Search
|
|
||||||
microBlockLink *fBlockList, *fBlockEnd; // Full Search
|
|
||||||
std::vector<microBlockLinkRef> quickLookup;
|
|
||||||
int qListI, fListI;
|
|
||||||
|
|
||||||
public:
|
|
||||||
inline int getFullListCount() const { return fListI; }
|
|
||||||
microBlockManager()
|
|
||||||
{
|
|
||||||
qListI = fListI = 0;
|
|
||||||
qBlockEnd = qBlockList = nullptr;
|
|
||||||
fBlockEnd = fBlockList = nullptr;
|
|
||||||
}
|
|
||||||
~microBlockManager() { reset(); }
|
|
||||||
void reset()
|
|
||||||
{
|
|
||||||
for (microBlockLink* linkI = qBlockList; linkI != nullptr;)
|
|
||||||
{
|
|
||||||
microBlockLink* freeI = linkI;
|
|
||||||
safe_delete_array(linkI->block.jumpCache);
|
|
||||||
linkI = linkI->next;
|
|
||||||
_aligned_free(freeI);
|
|
||||||
}
|
|
||||||
for (microBlockLink* linkI = fBlockList; linkI != nullptr;)
|
|
||||||
{
|
|
||||||
microBlockLink* freeI = linkI;
|
|
||||||
safe_delete_array(linkI->block.jumpCache);
|
|
||||||
linkI = linkI->next;
|
|
||||||
_aligned_free(freeI);
|
|
||||||
}
|
|
||||||
qListI = fListI = 0;
|
|
||||||
qBlockEnd = qBlockList = nullptr;
|
|
||||||
fBlockEnd = fBlockList = nullptr;
|
|
||||||
quickLookup.clear();
|
|
||||||
};
|
|
||||||
microBlock* add(microBlock* pBlock)
|
|
||||||
{
|
|
||||||
microBlock* thisBlock = search(&pBlock->pState);
|
|
||||||
if (!thisBlock)
|
|
||||||
{
|
|
||||||
u8 fullCmp = pBlock->pState.needExactMatch;
|
|
||||||
if (fullCmp)
|
|
||||||
fListI++;
|
|
||||||
else
|
|
||||||
qListI++;
|
|
||||||
|
|
||||||
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
|
|
||||||
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
|
|
||||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 32);
|
|
||||||
newBlock->block.jumpCache = nullptr;
|
|
||||||
newBlock->next = nullptr;
|
|
||||||
|
|
||||||
if (blockEnd)
|
|
||||||
{
|
|
||||||
blockEnd->next = newBlock;
|
|
||||||
blockEnd = newBlock;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
blockEnd = blockList = newBlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::memcpy(&newBlock->block, pBlock, sizeof(microBlock));
|
|
||||||
thisBlock = &newBlock->block;
|
|
||||||
|
|
||||||
quickLookup.push_back({&newBlock->block, pBlock->pState.quick64[0]});
|
|
||||||
}
|
|
||||||
return thisBlock;
|
|
||||||
}
|
|
||||||
__ri microBlock* search(microRegInfo* pState)
|
|
||||||
{
|
|
||||||
if (pState->needExactMatch) // Needs Detailed Search (Exact Match of Pipeline State)
|
|
||||||
{
|
|
||||||
microBlockLink* prevI = nullptr;
|
|
||||||
for (microBlockLink* linkI = fBlockList; linkI != nullptr; prevI = linkI, linkI = linkI->next)
|
|
||||||
{
|
|
||||||
if (mVUquickSearch(pState, &linkI->block.pState, sizeof(microRegInfo)))
|
|
||||||
{
|
|
||||||
if (linkI != fBlockList)
|
|
||||||
{
|
|
||||||
prevI->next = linkI->next;
|
|
||||||
linkI->next = fBlockList;
|
|
||||||
fBlockList = linkI;
|
|
||||||
}
|
|
||||||
|
|
||||||
return &linkI->block;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
|
||||||
{
|
|
||||||
const u64 quick64 = pState->quick64[0];
|
|
||||||
for (const microBlockLinkRef& ref : quickLookup)
|
|
||||||
{
|
|
||||||
if (ref.quick != quick64) continue;
|
|
||||||
if (doConstProp && (ref.pBlock->pState.vi15 != pState->vi15)) continue;
|
|
||||||
if (doConstProp && (ref.pBlock->pState.vi15v != pState->vi15v)) continue;
|
|
||||||
return ref.pBlock;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
void printInfo(int pc, bool printQuick)
|
|
||||||
{
|
|
||||||
int listI = printQuick ? qListI : fListI;
|
|
||||||
if (listI < 7)
|
|
||||||
return;
|
|
||||||
microBlockLink* linkI = printQuick ? qBlockList : fBlockList;
|
|
||||||
for (int i = 0; i <= listI; i++)
|
|
||||||
{
|
|
||||||
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4;
|
|
||||||
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
|
|
||||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].x + (linkI->block.pState.VF[j].y << 8) + (linkI->block.pState.VF[j].z << 16) + (linkI->block.pState.VF[j].w << 24);
|
|
||||||
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
|
|
||||||
DevCon.WriteLn(Color_Green,
|
|
||||||
"[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]"
|
|
||||||
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]",
|
|
||||||
pc, i, crc, linkI->block.pState.q,
|
|
||||||
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v,
|
|
||||||
linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch,
|
|
||||||
linkI->block.pState.blockType, viCRC, vfCRC);
|
|
||||||
linkI = linkI->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct microRange
|
struct microRange
|
||||||
{
|
{
|
||||||
s32 start; // Start PC (The opcode the block starts at)
|
s32 start; // Start PC (The opcode the block starts at)
|
||||||
|
@ -246,11 +119,11 @@ struct microVU
|
||||||
|
|
||||||
RecompiledCodeReserve* cache_reserve;
|
RecompiledCodeReserve* cache_reserve;
|
||||||
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
|
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
|
||||||
u8* dispCache; // Dispatchers Cache (where startFunct and exitFunct are written to)
|
|
||||||
u8* startFunct; // Function Ptr to the recompiler dispatcher (start)
|
u8* startFunct; // Function Ptr to the recompiler dispatcher (start)
|
||||||
u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit)
|
u8* exitFunct; // Function Ptr to the recompiler dispatcher (exit)
|
||||||
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
|
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
|
||||||
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
|
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
|
||||||
|
u8* compareStateF;// Function Ptr to search which compares all state.
|
||||||
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
|
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
|
||||||
u8* copyPLState; // Ptr to function to copy pipeline state into microVU
|
u8* copyPLState; // Ptr to function to copy pipeline state into microVU
|
||||||
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
|
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
|
||||||
|
@ -275,6 +148,139 @@ struct microVU
|
||||||
{
|
{
|
||||||
return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs();
|
return (index && THREAD_VU1) ? vu1Thread.vifRegs : regs().GetVifRegs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__fi u32 compareState(microRegInfo* lhs, microRegInfo* rhs) const {
|
||||||
|
return reinterpret_cast<u32(*)(void*, void*)>(compareStateF)(lhs, rhs);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class microBlockManager
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
microBlockLink *qBlockList, *qBlockEnd; // Quick Search
|
||||||
|
microBlockLink *fBlockList, *fBlockEnd; // Full Search
|
||||||
|
std::vector<microBlockLinkRef> quickLookup;
|
||||||
|
int qListI, fListI;
|
||||||
|
|
||||||
|
public:
|
||||||
|
inline int getFullListCount() const { return fListI; }
|
||||||
|
microBlockManager()
|
||||||
|
{
|
||||||
|
qListI = fListI = 0;
|
||||||
|
qBlockEnd = qBlockList = nullptr;
|
||||||
|
fBlockEnd = fBlockList = nullptr;
|
||||||
|
}
|
||||||
|
~microBlockManager() { reset(); }
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
for (microBlockLink* linkI = qBlockList; linkI != nullptr;)
|
||||||
|
{
|
||||||
|
microBlockLink* freeI = linkI;
|
||||||
|
safe_delete_array(linkI->block.jumpCache);
|
||||||
|
linkI = linkI->next;
|
||||||
|
_aligned_free(freeI);
|
||||||
|
}
|
||||||
|
for (microBlockLink* linkI = fBlockList; linkI != nullptr;)
|
||||||
|
{
|
||||||
|
microBlockLink* freeI = linkI;
|
||||||
|
safe_delete_array(linkI->block.jumpCache);
|
||||||
|
linkI = linkI->next;
|
||||||
|
_aligned_free(freeI);
|
||||||
|
}
|
||||||
|
qListI = fListI = 0;
|
||||||
|
qBlockEnd = qBlockList = nullptr;
|
||||||
|
fBlockEnd = fBlockList = nullptr;
|
||||||
|
quickLookup.clear();
|
||||||
|
};
|
||||||
|
microBlock* add(microVU& mVU, microBlock* pBlock)
|
||||||
|
{
|
||||||
|
microBlock* thisBlock = search(mVU, &pBlock->pState);
|
||||||
|
if (!thisBlock)
|
||||||
|
{
|
||||||
|
u8 fullCmp = pBlock->pState.needExactMatch;
|
||||||
|
if (fullCmp)
|
||||||
|
fListI++;
|
||||||
|
else
|
||||||
|
qListI++;
|
||||||
|
|
||||||
|
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
|
||||||
|
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
|
||||||
|
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 32);
|
||||||
|
newBlock->block.jumpCache = nullptr;
|
||||||
|
newBlock->next = nullptr;
|
||||||
|
|
||||||
|
if (blockEnd)
|
||||||
|
{
|
||||||
|
blockEnd->next = newBlock;
|
||||||
|
blockEnd = newBlock;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
blockEnd = blockList = newBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(&newBlock->block, pBlock, sizeof(microBlock));
|
||||||
|
thisBlock = &newBlock->block;
|
||||||
|
|
||||||
|
quickLookup.push_back({&newBlock->block, pBlock->pState.quick64[0]});
|
||||||
|
}
|
||||||
|
return thisBlock;
|
||||||
|
}
|
||||||
|
__ri microBlock* search(microVU& mVU, microRegInfo* pState)
|
||||||
|
{
|
||||||
|
if (pState->needExactMatch) // Needs Detailed Search (Exact Match of Pipeline State)
|
||||||
|
{
|
||||||
|
microBlockLink* prevI = nullptr;
|
||||||
|
for (microBlockLink* linkI = fBlockList; linkI != nullptr; prevI = linkI, linkI = linkI->next)
|
||||||
|
{
|
||||||
|
if (mVU.compareState(pState, &linkI->block.pState) == 0)
|
||||||
|
{
|
||||||
|
if (linkI != fBlockList)
|
||||||
|
{
|
||||||
|
prevI->next = linkI->next;
|
||||||
|
linkI->next = fBlockList;
|
||||||
|
fBlockList = linkI;
|
||||||
|
}
|
||||||
|
|
||||||
|
return &linkI->block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||||
|
{
|
||||||
|
const u64 quick64 = pState->quick64[0];
|
||||||
|
for (const microBlockLinkRef& ref : quickLookup)
|
||||||
|
{
|
||||||
|
if (ref.quick != quick64) continue;
|
||||||
|
if (doConstProp && (ref.pBlock->pState.vi15 != pState->vi15)) continue;
|
||||||
|
if (doConstProp && (ref.pBlock->pState.vi15v != pState->vi15v)) continue;
|
||||||
|
return ref.pBlock;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
void printInfo(int pc, bool printQuick)
|
||||||
|
{
|
||||||
|
int listI = printQuick ? qListI : fListI;
|
||||||
|
if (listI < 7)
|
||||||
|
return;
|
||||||
|
microBlockLink* linkI = printQuick ? qBlockList : fBlockList;
|
||||||
|
for (int i = 0; i <= listI; i++)
|
||||||
|
{
|
||||||
|
u32 viCRC = 0, vfCRC = 0, crc = 0, z = sizeof(microRegInfo) / 4;
|
||||||
|
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
|
||||||
|
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].x + (linkI->block.pState.VF[j].y << 8) + (linkI->block.pState.VF[j].z << 16) + (linkI->block.pState.VF[j].w << 24);
|
||||||
|
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
|
||||||
|
DevCon.WriteLn(Color_Green,
|
||||||
|
"[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]"
|
||||||
|
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]",
|
||||||
|
pc, i, crc, linkI->block.pState.q,
|
||||||
|
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v,
|
||||||
|
linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch,
|
||||||
|
linkI->block.pState.blockType, viCRC, vfCRC);
|
||||||
|
linkI = linkI->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// microVU rec structs
|
// microVU rec structs
|
||||||
|
|
|
@ -290,7 +290,7 @@ void normBranchCompile(microVU& mVU, u32 branchPC)
|
||||||
{
|
{
|
||||||
microBlock* pBlock;
|
microBlock* pBlock;
|
||||||
blockCreate(branchPC / 8);
|
blockCreate(branchPC / 8);
|
||||||
pBlock = mVUblocks[branchPC / 8]->search((microRegInfo*)&mVUregs);
|
pBlock = mVUblocks[branchPC / 8]->search(mVU, (microRegInfo*)&mVUregs);
|
||||||
if (pBlock)
|
if (pBlock)
|
||||||
xJMP(pBlock->x86ptrStart);
|
xJMP(pBlock->x86ptrStart);
|
||||||
else
|
else
|
||||||
|
@ -540,7 +540,7 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc)
|
||||||
microBlock* bBlock;
|
microBlock* bBlock;
|
||||||
incPC2(1); // Check if Branch Non-Taken Side has already been recompiled
|
incPC2(1); // Check if Branch Non-Taken Side has already been recompiled
|
||||||
blockCreate(iPC / 2);
|
blockCreate(iPC / 2);
|
||||||
bBlock = mVUblocks[iPC / 2]->search((microRegInfo*)&mVUregs);
|
bBlock = mVUblocks[iPC / 2]->search(mVU, (microRegInfo*)&mVUregs);
|
||||||
incPC2(-1);
|
incPC2(-1);
|
||||||
if (bBlock) // Branch non-taken has already been compiled
|
if (bBlock) // Branch non-taken has already been compiled
|
||||||
{
|
{
|
||||||
|
|
|
@ -531,7 +531,7 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr)
|
||||||
memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
|
memcpy((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
|
||||||
}
|
}
|
||||||
mVUblock.x86ptrStart = thisPtr;
|
mVUblock.x86ptrStart = thisPtr;
|
||||||
mVUpBlock = mVUblocks[mVUstartPC / 2]->add(&mVUblock); // Add this block to block manager
|
mVUpBlock = mVUblocks[mVUstartPC / 2]->add(mVU, &mVUblock); // Add this block to block manager
|
||||||
mVUregs.needExactMatch = (mVUpBlock->pState.blockType) ? 7 : 0; // ToDo: Fix 1-Op block flag linking (MGS2:Demo/Sly Cooper)
|
mVUregs.needExactMatch = (mVUpBlock->pState.blockType) ? 7 : 0; // ToDo: Fix 1-Op block flag linking (MGS2:Demo/Sly Cooper)
|
||||||
mVUregs.blockType = 0;
|
mVUregs.blockType = 0;
|
||||||
mVUregs.viBackUp = 0;
|
mVUregs.viBackUp = 0;
|
||||||
|
@ -988,7 +988,7 @@ perf_and_return:
|
||||||
// Returns the entry point of the block (compiles it if not found)
|
// Returns the entry point of the block (compiles it if not found)
|
||||||
__fi void* mVUentryGet(microVU& mVU, microBlockManager* block, u32 startPC, uptr pState)
|
__fi void* mVUentryGet(microVU& mVU, microBlockManager* block, u32 startPC, uptr pState)
|
||||||
{
|
{
|
||||||
microBlock* pBlock = block->search((microRegInfo*)pState);
|
microBlock* pBlock = block->search(mVU, (microRegInfo*)pState);
|
||||||
if (pBlock)
|
if (pBlock)
|
||||||
return pBlock->x86ptrStart;
|
return pBlock->x86ptrStart;
|
||||||
else
|
else
|
||||||
|
|
|
@ -31,7 +31,7 @@ static bool mvuNeedsFPCRUpdate(mV)
|
||||||
// Generates the code for entering/exit recompiled blocks
|
// Generates the code for entering/exit recompiled blocks
|
||||||
void mVUdispatcherAB(mV)
|
void mVUdispatcherAB(mV)
|
||||||
{
|
{
|
||||||
mVU.startFunct = x86Ptr;
|
mVU.startFunct = xGetAlignedCallTarget();
|
||||||
|
|
||||||
{
|
{
|
||||||
xScopedStackFrame frame(false, true);
|
xScopedStackFrame frame(false, true);
|
||||||
|
@ -92,9 +92,6 @@ void mVUdispatcherAB(mV)
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
|
||||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
|
||||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
|
||||||
|
|
||||||
Perf::any.Register(mVU.startFunct, static_cast<u32>(xGetPtr() - mVU.startFunct),
|
Perf::any.Register(mVU.startFunct, static_cast<u32>(xGetPtr() - mVU.startFunct),
|
||||||
mVU.index ? "VU1StartFunc" : "VU0StartFunc");
|
mVU.index ? "VU1StartFunc" : "VU0StartFunc");
|
||||||
}
|
}
|
||||||
|
@ -102,7 +99,7 @@ void mVUdispatcherAB(mV)
|
||||||
// Generates the code for resuming/exit xgkick
|
// Generates the code for resuming/exit xgkick
|
||||||
void mVUdispatcherCD(mV)
|
void mVUdispatcherCD(mV)
|
||||||
{
|
{
|
||||||
mVU.startFunctXG = x86Ptr;
|
mVU.startFunctXG = xGetAlignedCallTarget();
|
||||||
|
|
||||||
{
|
{
|
||||||
xScopedStackFrame frame(false, true);
|
xScopedStackFrame frame(false, true);
|
||||||
|
@ -135,17 +132,13 @@ void mVUdispatcherCD(mV)
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
|
||||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
|
||||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
|
||||||
|
|
||||||
Perf::any.Register(mVU.startFunctXG, static_cast<u32>(xGetPtr() - mVU.startFunctXG),
|
Perf::any.Register(mVU.startFunctXG, static_cast<u32>(xGetPtr() - mVU.startFunctXG),
|
||||||
mVU.index ? "VU1StartFuncXG" : "VU0StartFuncXG");
|
mVU.index ? "VU1StartFuncXG" : "VU0StartFuncXG");
|
||||||
}
|
}
|
||||||
|
|
||||||
void mvuGenerateWaitMTVU(mV)
|
static void mVUGenerateWaitMTVU(mV)
|
||||||
{
|
{
|
||||||
xAlignCallTarget();
|
mVU.waitMTVU = xGetAlignedCallTarget();
|
||||||
mVU.waitMTVU = x86Ptr;
|
|
||||||
|
|
||||||
int num_xmms = 0, num_gprs = 0;
|
int num_xmms = 0, num_gprs = 0;
|
||||||
|
|
||||||
|
@ -215,17 +208,13 @@ void mvuGenerateWaitMTVU(mV)
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
|
||||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
|
||||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
|
||||||
|
|
||||||
Perf::any.Register(mVU.waitMTVU, static_cast<u32>(xGetPtr() - mVU.waitMTVU),
|
Perf::any.Register(mVU.waitMTVU, static_cast<u32>(xGetPtr() - mVU.waitMTVU),
|
||||||
mVU.index ? "VU1WaitMTVU" : "VU0WaitMTVU");
|
mVU.index ? "VU1WaitMTVU" : "VU0WaitMTVU");
|
||||||
}
|
}
|
||||||
|
|
||||||
void mvuGenerateCopyPipelineState(mV)
|
static void mVUGenerateCopyPipelineState(mV)
|
||||||
{
|
{
|
||||||
xAlignCallTarget();
|
mVU.copyPLState = xGetAlignedCallTarget();
|
||||||
mVU.copyPLState = x86Ptr;
|
|
||||||
|
|
||||||
if (x86caps.hasAVX2)
|
if (x86caps.hasAVX2)
|
||||||
{
|
{
|
||||||
|
@ -258,13 +247,76 @@ void mvuGenerateCopyPipelineState(mV)
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
|
||||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
|
||||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
|
||||||
|
|
||||||
Perf::any.Register(mVU.copyPLState, static_cast<u32>(xGetPtr() - mVU.copyPLState),
|
Perf::any.Register(mVU.copyPLState, static_cast<u32>(xGetPtr() - mVU.copyPLState),
|
||||||
mVU.index ? "VU1CopyPLState" : "VU0CopyPLState");
|
mVU.index ? "VU1CopyPLState" : "VU0CopyPLState");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
// Micro VU - Custom Quick Search
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Generates a custom optimized block-search function
|
||||||
|
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
|
||||||
|
static void mVUGenerateCompareState(mV)
|
||||||
|
{
|
||||||
|
mVU.compareStateF = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
if (!x86caps.hasAVX2)
|
||||||
|
{
|
||||||
|
xMOVAPS (xmm0, ptr32[arg1reg]);
|
||||||
|
xPCMP.EQD(xmm0, ptr32[arg2reg]);
|
||||||
|
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
|
||||||
|
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]);
|
||||||
|
xPAND (xmm0, xmm1);
|
||||||
|
|
||||||
|
xMOVMSKPS(eax, xmm0);
|
||||||
|
xXOR (eax, 0xf);
|
||||||
|
xForwardJNZ8 exitPoint;
|
||||||
|
|
||||||
|
xMOVAPS (xmm0, ptr32[arg1reg + 0x20]);
|
||||||
|
xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]);
|
||||||
|
xMOVAPS (xmm1, ptr32[arg1reg + 0x30]);
|
||||||
|
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]);
|
||||||
|
xPAND (xmm0, xmm1);
|
||||||
|
|
||||||
|
xMOVAPS (xmm1, ptr32[arg1reg + 0x40]);
|
||||||
|
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x40]);
|
||||||
|
xMOVAPS (xmm2, ptr32[arg1reg + 0x50]);
|
||||||
|
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x50]);
|
||||||
|
xPAND (xmm1, xmm2);
|
||||||
|
xPAND (xmm0, xmm1);
|
||||||
|
|
||||||
|
xMOVMSKPS(eax, xmm0);
|
||||||
|
xXOR(eax, 0xf);
|
||||||
|
|
||||||
|
exitPoint.SetTarget();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We have to use unaligned loads here, because the blocks are only 16 byte aligned.
|
||||||
|
xVMOVUPS(ymm0, ptr[arg1reg]);
|
||||||
|
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg]);
|
||||||
|
xVPMOVMSKB(eax, ymm0);
|
||||||
|
xXOR(eax, 0xffffffff);
|
||||||
|
xForwardJNZ8 exitPoint;
|
||||||
|
|
||||||
|
xVMOVUPS(ymm0, ptr[arg1reg + 0x20]);
|
||||||
|
xVMOVUPS(ymm1, ptr[arg1reg + 0x40]);
|
||||||
|
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
|
||||||
|
xVPCMP.EQD(ymm1, ymm1, ptr[arg2reg + 0x40]);
|
||||||
|
xVPAND(ymm0, ymm0, ymm1);
|
||||||
|
|
||||||
|
xVPMOVMSKB(eax, ymm0);
|
||||||
|
xNOT(eax);
|
||||||
|
|
||||||
|
exitPoint.SetTarget();
|
||||||
|
xVZEROUPPER();
|
||||||
|
}
|
||||||
|
|
||||||
|
xRET();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Execution Functions
|
// Execution Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
|
@ -190,18 +190,6 @@ typedef Fntype_mVUrecInst* Fnptr_mVUrecInst;
|
||||||
#define opCase3 if (opCase == 3) // I Opcodes
|
#define opCase3 if (opCase == 3) // I Opcodes
|
||||||
#define opCase4 if (opCase == 4) // Q Opcodes
|
#define opCase4 if (opCase == 4) // Q Opcodes
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
|
||||||
// Define mVUquickSearch
|
|
||||||
//------------------------------------------------------------------
|
|
||||||
alignas(__pagesize) extern u8 mVUsearchXMM[__pagesize];
|
|
||||||
typedef u32 (*mVUCall)(void*, void*);
|
|
||||||
#define mVUquickSearch(dest, src, size) ((((mVUCall)((void*)mVUsearchXMM))(dest, src)) == 0)
|
|
||||||
#define mVUemitSearch() \
|
|
||||||
{ \
|
|
||||||
mVUcustomSearch(); \
|
|
||||||
}
|
|
||||||
//------------------------------------------------------------------
|
|
||||||
|
|
||||||
// Misc Macros...
|
// Misc Macros...
|
||||||
#define mVUcurProg mVU.prog.cur[0]
|
#define mVUcurProg mVU.prog.cur[0]
|
||||||
#define mVUblocks mVU.prog.cur->block
|
#define mVUblocks mVU.prog.cur->block
|
||||||
|
|
|
@ -606,73 +606,3 @@ void SSE_DIVSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, co
|
||||||
{
|
{
|
||||||
clampOp(xDIV.SS, false);
|
clampOp(xDIV.SS, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
|
||||||
// Micro VU - Custom Quick Search
|
|
||||||
//------------------------------------------------------------------
|
|
||||||
|
|
||||||
alignas(__pagesize) u8 mVUsearchXMM[__pagesize];
|
|
||||||
|
|
||||||
// Generates a custom optimized block-search function
|
|
||||||
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
|
|
||||||
void mVUcustomSearch()
|
|
||||||
{
|
|
||||||
HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ReadWrite());
|
|
||||||
memset(mVUsearchXMM, 0xcc, __pagesize);
|
|
||||||
xSetPtr(mVUsearchXMM);
|
|
||||||
|
|
||||||
if (!x86caps.hasAVX2)
|
|
||||||
{
|
|
||||||
xMOVAPS (xmm0, ptr32[arg1reg]);
|
|
||||||
xPCMP.EQD(xmm0, ptr32[arg2reg]);
|
|
||||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x10]);
|
|
||||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x10]);
|
|
||||||
xPAND (xmm0, xmm1);
|
|
||||||
|
|
||||||
xMOVMSKPS(eax, xmm0);
|
|
||||||
xXOR (eax, 0xf);
|
|
||||||
xForwardJNZ8 exitPoint;
|
|
||||||
|
|
||||||
xMOVAPS (xmm0, ptr32[arg1reg + 0x20]);
|
|
||||||
xPCMP.EQD(xmm0, ptr32[arg2reg + 0x20]);
|
|
||||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x30]);
|
|
||||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x30]);
|
|
||||||
xPAND (xmm0, xmm1);
|
|
||||||
|
|
||||||
xMOVAPS (xmm1, ptr32[arg1reg + 0x40]);
|
|
||||||
xPCMP.EQD(xmm1, ptr32[arg2reg + 0x40]);
|
|
||||||
xMOVAPS (xmm2, ptr32[arg1reg + 0x50]);
|
|
||||||
xPCMP.EQD(xmm2, ptr32[arg2reg + 0x50]);
|
|
||||||
xPAND (xmm1, xmm2);
|
|
||||||
xPAND (xmm0, xmm1);
|
|
||||||
|
|
||||||
xMOVMSKPS(eax, xmm0);
|
|
||||||
xXOR(eax, 0xf);
|
|
||||||
|
|
||||||
exitPoint.SetTarget();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// We have to use unaligned loads here, because the blocks are only 16 byte aligned.
|
|
||||||
xVMOVUPS(ymm0, ptr[arg1reg]);
|
|
||||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg]);
|
|
||||||
xVPMOVMSKB(eax, ymm0);
|
|
||||||
xXOR(eax, 0xffffffff);
|
|
||||||
xForwardJNZ8 exitPoint;
|
|
||||||
|
|
||||||
xVMOVUPS(ymm0, ptr[arg1reg + 0x20]);
|
|
||||||
xVMOVUPS(ymm1, ptr[arg1reg + 0x40]);
|
|
||||||
xVPCMP.EQD(ymm0, ymm0, ptr[arg2reg + 0x20]);
|
|
||||||
xVPCMP.EQD(ymm1, ymm1, ptr[arg2reg + 0x40]);
|
|
||||||
xVPAND(ymm0, ymm0, ymm1);
|
|
||||||
|
|
||||||
xVPMOVMSKB(eax, ymm0);
|
|
||||||
xNOT(eax);
|
|
||||||
|
|
||||||
exitPoint.SetTarget();
|
|
||||||
xVZEROUPPER();
|
|
||||||
}
|
|
||||||
|
|
||||||
xRET();
|
|
||||||
HostSys::MemProtectStatic(mVUsearchXMM, PageAccess_ExecOnly());
|
|
||||||
}
|
|
||||||
|
|
|
@ -390,8 +390,6 @@ void VifUnpackSSE_Init()
|
||||||
for (int c = 0; c < 4; c++)
|
for (int c = 0; c < 4; c++)
|
||||||
nVifGen(a, b, c);
|
nVifGen(a, b, c);
|
||||||
|
|
||||||
nVifUpkExec->ForbidModification();
|
|
||||||
|
|
||||||
DevCon.WriteLn("Unpack function generation complete. Generated function statistics:");
|
DevCon.WriteLn("Unpack function generation complete. Generated function statistics:");
|
||||||
DevCon.Indent().WriteLn(
|
DevCon.Indent().WriteLn(
|
||||||
"Reserved buffer : %u bytes @ 0x%016" PRIXPTR "\n"
|
"Reserved buffer : %u bytes @ 0x%016" PRIXPTR "\n"
|
||||||
|
|
Loading…
Reference in New Issue