From 4f65ef4d33e256ca94e64f8af3499dc1ea7e9461 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 25 Dec 2022 22:14:21 +1000 Subject: [PATCH] x86/microVU: Preload registers at beginning of block --- pcsx2/x86/microVU_Compile.inl | 92 +++++++++++++++++++++++++++++++++++ pcsx2/x86/microVU_IR.h | 59 ++++++++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 3ed2b73866..17f368d213 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -604,6 +604,96 @@ void mVUSaveFlags(microVU& mVU, microFlagCycles& mFC, microFlagCycles& mFCBackup memcpy(&mFCBackup, &mFC, sizeof(microFlagCycles)); mVUsetFlags(mVU, mFCBackup); // Sets Up Flag instances } + +static void mvuPreloadRegisters(microVU& mVU, u32 endCount) +{ + static constexpr const int REQUIRED_FREE_XMMS = 3; // some space for temps + static constexpr const int REQUIRED_FREE_GPRS = 1; // some space for temps + + u32 vfs_loaded = 0; + u32 vis_loaded = 0; + + for (int reg = 0; reg < mVU.regAlloc->getXmmCount(); reg++) + { + const int vf = mVU.regAlloc->getRegVF(reg); + if (vf >= 0) + vfs_loaded |= (1u << vf); + } + + for (int reg = 0; reg < mVU.regAlloc->getGPRCount(); reg++) + { + const int vi = mVU.regAlloc->getRegVI(reg); + if (vi >= 0) + vis_loaded |= (1u << vi); + } + + const u32 orig_pc = iPC; + const u32 orig_code = mVU.code; + int free_regs = mVU.regAlloc->getFreeXmmCount(); + int free_gprs = mVU.regAlloc->getFreeGPRCount(); + + auto preloadVF = [&mVU, &vfs_loaded, &free_regs](u8 reg) + { + if (free_regs <= REQUIRED_FREE_XMMS || reg == 0 || (vfs_loaded & (1u << reg)) != 0) + return; + + mVU.regAlloc->clearNeeded(mVU.regAlloc->allocReg(reg)); + vfs_loaded |= (1u << reg); + free_regs--; + }; + + auto preloadVI = [&mVU, &vis_loaded, &free_gprs](u8 reg) + { + if (free_gprs <= REQUIRED_FREE_GPRS || reg == 0 || (vis_loaded & (1u << reg)) != 0) + return; + + mVU.regAlloc->clearNeeded(mVU.regAlloc->allocGPR(reg)); + vis_loaded |= (1u << reg); + free_gprs--; + }; + + auto canPreload = [&free_regs, &free_gprs]() { + return (free_regs >= REQUIRED_FREE_XMMS || free_gprs >= REQUIRED_FREE_GPRS); + }; + + for (u32 x = 0; x < endCount && canPreload(); x++) + { + incPC(1); + + const microOp* info = &mVUinfo; + if (info->doXGKICK) + break; + + for (u32 i = 0; i < 2; i++) + { + preloadVF(info->uOp.VF_read[i].reg); + preloadVF(info->lOp.VF_read[i].reg); + if (info->lOp.VI_read[i].used) + preloadVI(info->lOp.VI_read[i].reg); + } + + const microVFreg& uvfr = info->uOp.VF_write; + if (uvfr.reg != 0 && (!uvfr.x || !uvfr.y || !uvfr.z || !uvfr.w)) + { + // not writing entire vector + preloadVF(uvfr.reg); + } + + const microVFreg& lvfr = info->lOp.VF_write; + if (lvfr.reg != 0 && (!lvfr.x || !lvfr.y || !lvfr.z || !lvfr.w)) + { + // not writing entire vector + preloadVF(lvfr.reg); + } + + if (info->lOp.branch) + break; + } + + iPC = orig_pc; + mVU.code = orig_code; +} + void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { microFlagCycles mFC; @@ -769,6 +859,8 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) mVUbranch = 0; u32 x = 0; + mvuPreloadRegisters(mVU, endCount); + for (; x < endCount; x++) { #if 0 diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index a83ec58424..8371e10afd 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -433,11 +433,70 @@ public: { return xmmTotal + 1; } + + int getFreeXmmCount() + { + int count = 0; + + for (int i = 0; i < xmmTotal; i++) + { + if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) + count++; + } + + return count; + } + + bool hasRegVF(int vfreg) + { + for (int i = 0; i < xmmTotal; i++) + { + if (xmmMap[i].VFreg == vfreg) + return true; + } + + return false; + } + + int getRegVF(int i) + { + return (i < xmmTotal) ? xmmMap[i].VFreg : -1; + } + int getGPRCount() { return gprTotal; } + int getFreeGPRCount() + { + int count = 0; + + for (int i = 0; i < gprTotal; i++) + { + if (!gprMap[i].usable && (gprMap[i].VIreg < 0)) + count++; + } + + return count; + } + + bool hasRegVI(int vireg) + { + for (int i = 0; i < gprTotal; i++) + { + if (gprMap[i].VIreg == vireg) + return true; + } + + return false; + } + + int getRegVI(int i) + { + return (i < gprTotal) ? gprMap[i].VIreg : -1; + } + // Flushes all allocated registers (i.e. writes-back to memory all modified registers). // If clearState is 0, then it keeps cached reg data valid // If clearState is 1, then it invalidates all cached reg data after write-back