x86/microVU: Use unrolled copy loop for updating PL state

This commit is contained in:
Stenzek 2022-12-25 22:14:19 +10:00 committed by refractionpcsx2
parent 00d768a6bf
commit cdadad5689
4 changed files with 74 additions and 12 deletions

View File

@ -90,6 +90,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
mVUdispatcherAB(mVU);
mVUdispatcherCD(mVU);
mvuGenerateWaitMTVU(mVU);
mvuGenerateCopyPipelineState(mVU);
mVUemitSearch();
mVU.regs().nextBlockCycles = 0;

View File

@ -252,6 +252,7 @@ struct microVU
u8* startFunctXG; // Function Ptr to the recompiler dispatcher (xgkick resume)
u8* exitFunctXG; // Function Ptr to the recompiler dispatcher (xgkick exit)
u8* waitMTVU; // Ptr to function to save registers/sync VU1 thread
u8* copyPLState; // Ptr to function to copy pipeline state into microVU
u8* resumePtrXG; // Ptr to recompiled code position to resume xgkick
u32 code; // Contains the current Instruction
u32 divFlag; // 1 instance of I/D flags

View File

@ -459,16 +459,6 @@ void mVUdebugPrintBlocks(microVU& mVU, bool isEndPC)
}
}
// Saves Pipeline State for resuming from early exits
__fi void mVUsavePipelineState(microVU& mVU)
{
u32* lpS = (u32*)&mVU.prog.lpState;
for (size_t i = 0; i < (sizeof(microRegInfo) - 4) / 4; i++, lpS++)
{
xMOV(ptr32[lpS], lpS[0]);
}
}
// Test cycles to see if we need to exit-early...
void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
{
@ -483,12 +473,29 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
xCMP(eax, 0);
xForwardJGE32 skip;
mVUsavePipelineState(mVU);
u8* writeback = x86Ptr;
xLoadFarAddr(rax, x86Ptr);
xFastCall((void*)mVU.copyPLState);
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
mVUendProgram(mVU, &mFC, 0);
{
xAlignPtr(SSE_ALIGN_N);
u8* curx86Ptr = x86Ptr;
x86SetPtr(writeback);
xLoadFarAddr(rax, curx86Ptr);
x86SetPtr(curx86Ptr);
static_assert((sizeof(microRegInfo) % 4) == 0);
const u32* lpPtr = reinterpret_cast<const u32*>(&mVU.prog.lpState);
const u32* lpEnd = lpPtr + (sizeof(microRegInfo) / 4);
while (lpPtr != lpEnd)
xWrite32(*(lpPtr++));
}
skip.SetTarget();
xSUB(ptr32[&mVU.cycles], mVUcycles);

View File

@ -125,6 +125,7 @@ void mVUdispatcherCD(mV)
void mvuGenerateWaitMTVU(mV)
{
xAlignCallTarget();
mVU.waitMTVU = x86Ptr;
int num_xmms = 0, num_gprs = 0;
@ -198,6 +199,58 @@ void mvuGenerateWaitMTVU(mV)
"microVU: Dispatcher generation exceeded reserved cache area!");
}
void mvuGenerateCopyPipelineState(mV)
{
xAlignCallTarget();
mVU.copyPLState = x86Ptr;
if (x86caps.hasAVX2)
{
xVMOVAPS(ymm0, ptr[rax]);
xVMOVAPS(ymm1, ptr[rax + 32u]);
xVMOVAPS(ymm2, ptr[rax + 64u]);
xVMOVAPS(ymm3, ptr[rax + 96u]);
xVMOVAPS(ymm4, ptr[rax + 128u]);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], ymm0);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], ymm1);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], ymm2);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 96u], ymm3);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 128u], ymm4);
xVZEROUPPER();
}
else
{
xMOVAPS(xmm0, ptr[rax]);
xMOVAPS(xmm1, ptr[rax + 16u]);
xMOVAPS(xmm2, ptr[rax + 32u]);
xMOVAPS(xmm3, ptr[rax + 48u]);
xMOVAPS(xmm4, ptr[rax + 64u]);
xMOVAPS(xmm5, ptr[rax + 80u]);
xMOVAPS(xmm6, ptr[rax + 96u]);
xMOVAPS(xmm7, ptr[rax + 112u]);
xMOVAPS(xmm8, ptr[rax + 128u]);
xMOVAPS(xmm9, ptr[rax + 144u]);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], xmm0);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 16u], xmm1);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], xmm2);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 48u], xmm3);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], xmm4);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 80u], xmm5);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 96u], xmm6);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 112u], xmm7);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 128u], xmm8);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 144u], xmm9);
}
xRET();
pxAssertDev(xGetPtr() < (mVU.dispCache + mVUdispCacheSize),
"microVU: Dispatcher generation exceeded reserved cache area!");
}
//------------------------------------------------------------------
// Execution Functions
//------------------------------------------------------------------