mirror of https://github.com/PCSX2/pcsx2.git
x86/microVU: Use unrolled copy loop for updating PL state
This commit is contained in:
parent
00d768a6bf
commit
cdadad5689
|
@ -90,6 +90,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
|||
mVUdispatcherAB(mVU);
|
||||
mVUdispatcherCD(mVU);
|
||||
mvuGenerateWaitMTVU(mVU);
|
||||
mvuGenerateCopyPipelineState(mVU);
|
||||
mVUemitSearch();
|
||||
|
||||
mVU.regs().nextBlockCycles = 0;
|
||||
|
|
|
@ -252,6 +252,7 @@ struct microVU
|
|||
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
|
||||
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
|
||||
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
|
||||
u8* copyPLState; // Ptr to function to copy pipeline state into microVU
|
||||
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
|
||||
u32 code; // Contains the current Instruction
|
||||
u32 divFlag; // 1 instance of I/D flags
|
||||
|
|
|
@ -459,16 +459,6 @@ void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC)
|
|||
}
|
||||
}
|
||||
|
||||
// Saves Pipeline State for resuming from early exits
|
||||
__fi void mVUsavePipelineState(microVU& mVU)
|
||||
{
|
||||
u32* lpS = (u32*)&mVU.prog.lpState;
|
||||
for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++)
|
||||
{
|
||||
xMOV(ptr32[lpS], lpS[0]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test cycles to see if we need to exit-early...
|
||||
void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
|
||||
{
|
||||
|
@ -483,12 +473,29 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
|
|||
xCMP(eax, 0);
|
||||
xForwardJGE32 skip;
|
||||
|
||||
mVUsavePipelineState(mVU);
|
||||
u8* writeback = x86Ptr;
|
||||
xLoadFarAddr(rax, x86Ptr);
|
||||
xFastCall((void*)mVU.copyPLState);
|
||||
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
|
||||
|
||||
mVUendProgram(mVU, &mFC, 0);
|
||||
|
||||
{
|
||||
xAlignPtr(SSE_ALIGN_N);
|
||||
|
||||
u8* curx86Ptr = x86Ptr;
|
||||
x86SetPtr(writeback);
|
||||
xLoadFarAddr(rax, curx86Ptr);
|
||||
x86SetPtr(curx86Ptr);
|
||||
|
||||
static_assert((sizeof(microRegInfo) % 4) == 0);
|
||||
const u32* lpPtr = reinterpret_cast<const u32*>(&mVU.prog.lpState);
|
||||
const u32* lpEnd = lpPtr + (sizeof(microRegInfo) / 4);
|
||||
while (lpPtr != lpEnd)
|
||||
xWrite32(*(lpPtr++));
|
||||
}
|
||||
|
||||
skip.SetTarget();
|
||||
|
||||
xSUB(ptr32[&mVU.cycles], mVUcycles);
|
||||
|
|
|
@ -125,6 +125,7 @@ void mVUdispatcherCD(mV)
|
|||
|
||||
void mvuGenerateWaitMTVU(mV)
|
||||
{
|
||||
xAlignCallTarget();
|
||||
mVU.waitMTVU = x86Ptr;
|
||||
|
||||
int num_xmms = 0, num_gprs = 0;
|
||||
|
@ -198,6 +199,58 @@ void mvuGenerateWaitMTVU(mV)
|
|||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||
}
|
||||
|
||||
void mvuGenerateCopyPipelineState(mV)
|
||||
{
|
||||
xAlignCallTarget();
|
||||
mVU.copyPLState = x86Ptr;
|
||||
|
||||
if (x86caps.hasAVX2)
|
||||
{
|
||||
xVMOVAPS(ymm0, ptr[rax]);
|
||||
xVMOVAPS(ymm1, ptr[rax + 32u]);
|
||||
xVMOVAPS(ymm2, ptr[rax + 64u]);
|
||||
xVMOVAPS(ymm3, ptr[rax + 96u]);
|
||||
xVMOVAPS(ymm4, ptr[rax + 128u]);
|
||||
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], ymm0);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], ymm1);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], ymm2);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 96u], ymm3);
|
||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 128u], ymm4);
|
||||
|
||||
xVZEROUPPER();
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOVAPS(xmm0, ptr[rax]);
|
||||
xMOVAPS(xmm1, ptr[rax + 16u]);
|
||||
xMOVAPS(xmm2, ptr[rax + 32u]);
|
||||
xMOVAPS(xmm3, ptr[rax + 48u]);
|
||||
xMOVAPS(xmm4, ptr[rax + 64u]);
|
||||
xMOVAPS(xmm5, ptr[rax + 80u]);
|
||||
xMOVAPS(xmm6, ptr[rax + 96u]);
|
||||
xMOVAPS(xmm7, ptr[rax + 112u]);
|
||||
xMOVAPS(xmm8, ptr[rax + 128u]);
|
||||
xMOVAPS(xmm9, ptr[rax + 144u]);
|
||||
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], xmm0);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 16u], xmm1);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], xmm2);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 48u], xmm3);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], xmm4);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 80u], xmm5);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 96u], xmm6);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 112u], xmm7);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 128u], xmm8);
|
||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 144u], xmm9);
|
||||
}
|
||||
|
||||
xRET();
|
||||
|
||||
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
|
||||
"microVU: Dispatcher generation exceeded reserved cache area!");
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Execution Functions
|
||||
//------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue