From 37675b6c495a747aba6ad6890447c05da8c559a8 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 27 Jul 2009 00:27:33 +0000 Subject: [PATCH] microVU: - Finished implementing regAlloc. Sadly the speedgain wasn't great (0%~2% in the games I tried). I think the speedup should be bigger with a CPU that supports SSE4.1, but I don't have one to test :p git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1573 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.h | 1 - pcsx2/x86/microVU_Compile.inl | 5 ++- pcsx2/x86/microVU_IR.h | 20 ++++++------ pcsx2/x86/microVU_Lower.inl | 59 ----------------------------------- pcsx2/x86/microVU_Misc.h | 3 ++ pcsx2/x86/microVU_Upper.inl | 50 +++++++++-------------------- 6 files changed, 32 insertions(+), 106 deletions(-) diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 90ab25ee21..605d8a9e59 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -136,7 +136,6 @@ struct microVU { PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution) PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution) PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ - PCSX2_ALIGNED16(u32 xmmVFb[4]); // Backup for VF regs u32 index; // VU Index (VU0 or VU1) u32 vuMemSize; // VU Main Memory Size (in bytes) diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index ed62120809..3c57ced0cd 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -259,6 +259,7 @@ microVUt(void) mVUendProgram(mV, int isEbit, int* xStatus, int* xMac, int* xClip int fClip = (isEbit) ? findFlagInst(xClip, 0x7fffffff) : cI; int qInst = 0; int pInst = 0; + mVU->regAlloc->flushAll(); if (isEbit) { mVUprint("mVUcompile ebit"); @@ -370,6 +371,7 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { mVUsetupRange(mVU, startPC, 1); // Reset regAlloc + mVU->regAlloc->flushAll(); mVU->regAlloc->reset(); // First Pass @@ -435,7 +437,8 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); } else { doSwapOp(mVU); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } - + if (!doRegAlloc) { mVU->regAlloc->flushAll(); } + if (!mVUinfo.isBdelay) { incPC(1); } else { microBlock* bBlock = NULL; diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index fde98f75c6..73041293f8 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -181,7 +181,6 @@ private: int findFreeRegRec(int startIdx) { for (int i = startIdx; i < xmmTotal; i++) { if (!xmmReg[i].isNeeded) { - if ((i+1) >= xmmTotal) return i; int x = findFreeRegRec(i+1); if (x == -1) return i; return ((xmmReg[i].count < xmmReg[x].count) ? i : x); @@ -223,14 +222,17 @@ public: xmmReg[reg].xyzw = 0; xmmReg[reg].isNeeded = 0; } - void writeBackReg(int reg) { + void writeBackReg(int reg, bool invalidateRegs = 1) { if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0 if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1); else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1); - for (int i = 0; i < xmmTotal; i++) { - if (i == reg) continue; - if (xmmReg[i].reg == xmmReg[reg].reg) { - clearReg(i); // Invalidate any Cached Regs of same vf Reg + if (invalidateRegs) { + for (int i = 0; i < xmmTotal; i++) { + if ((i == reg) || xmmReg[i].isNeeded) continue; + if (xmmReg[i].reg == xmmReg[reg].reg) { + if (xmmReg[i].xyzw && xmmReg[i].xyzw < 0xf) DevCon::Error("microVU Error: writeBackReg() [%d]", params xmmReg[i].reg); + clearReg(i); // Invalidate any Cached Regs of same vf Reg + } } } if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified @@ -271,8 +273,8 @@ public: counter++; if (vfLoadReg >= 0) { // Search For Cached Regs for (int i = 0; i < xmmTotal; i++) { - if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified - || (/*!xmmReg[i].isNeeded &&*/ xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0 + if ((xmmReg[i].reg == vfLoadReg) && (!xmmReg[i].xyzw // Reg Was Not Modified + || (xmmReg[i].reg && (xmmReg[i].xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0 int z = i; if (vfWriteReg >= 0) { // Reg will be modified if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg @@ -287,7 +289,7 @@ public: } else { // Don't clone reg, but shuffle to adjust for SS ops if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(z); } - else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1); + if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1); else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2); else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index e74d92f004..fb017f23e8 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -46,7 +46,6 @@ mVUop(mVU_DIV) { pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); } pass2 { u8 *ajmp, *bjmp, *cjmp, *djmp; - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); int t1 = mVU->regAlloc->allocReg(); @@ -80,7 +79,6 @@ mVUop(mVU_DIV) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); mVU->regAlloc->clearNeeded(t1); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); } } @@ -89,7 +87,6 @@ mVUop(mVU_SQRT) { pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); } pass2 { u8 *ajmp; - mVU->regAlloc->reset(); // Reset for Testing int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags @@ -102,7 +99,6 @@ mVUop(mVU_SQRT) { if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); } } @@ -111,7 +107,6 @@ mVUop(mVU_RSQRT) { pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); } pass2 { u8 *ajmp, *bjmp, *cjmp, *djmp; - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); int t1 = mVU->regAlloc->allocReg(); @@ -147,7 +142,6 @@ mVUop(mVU_RSQRT) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); mVU->regAlloc->clearNeeded(t1); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); } } @@ -195,7 +189,6 @@ mVUop(mVU_EATAN) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t2); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("EATAN P"); } } @@ -203,7 +196,6 @@ mVUop(mVU_EATAN) { mVUop(mVU_EATANxy) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); int Fs = mVU->regAlloc->allocReg(); int t2 = mVU->regAlloc->allocReg(); @@ -217,7 +209,6 @@ mVUop(mVU_EATANxy) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t2); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("EATANxy P"); } } @@ -225,7 +216,6 @@ mVUop(mVU_EATANxy) { mVUop(mVU_EATANxz) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); int Fs = mVU->regAlloc->allocReg(); int t2 = mVU->regAlloc->allocReg(); @@ -239,7 +229,6 @@ mVUop(mVU_EATANxz) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t2); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("EATANxz P"); } } @@ -254,7 +243,6 @@ mVUop(mVU_EATANxz) { mVUop(mVU_EEXP) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); int t1 = mVU->regAlloc->allocReg(); int t2 = mVU->regAlloc->allocReg(); @@ -282,7 +270,6 @@ mVUop(mVU_EEXP) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t2); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("EEXP P"); } } @@ -306,14 +293,12 @@ microVUt(void) mVU_sumXYZ(int PQ, int Fs) { mVUop(mVU_ELENG) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(xmmPQ, Fs); SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ELENG P"); } } @@ -321,7 +306,6 @@ mVUop(mVU_ELENG) { mVUop(mVU_ERCPR) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); @@ -330,7 +314,6 @@ mVUop(mVU_ERCPR) { SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ERCPR P"); } } @@ -338,7 +321,6 @@ mVUop(mVU_ERCPR) { mVUop(mVU_ERLENG) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(xmmPQ, Fs); @@ -348,7 +330,6 @@ mVUop(mVU_ERLENG) { SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ERLENG P"); } } @@ -356,7 +337,6 @@ mVUop(mVU_ERLENG) { mVUop(mVU_ERSADD) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(xmmPQ, Fs); @@ -365,7 +345,6 @@ mVUop(mVU_ERSADD) { SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ERSADD P"); } } @@ -373,7 +352,6 @@ mVUop(mVU_ERSADD) { mVUop(mVU_ERSQRT) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); @@ -382,7 +360,6 @@ mVUop(mVU_ERSQRT) { SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ERSQRT P"); } } @@ -390,13 +367,11 @@ mVUop(mVU_ERSQRT) { mVUop(mVU_ESADD) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ESADD P"); } } @@ -411,7 +386,6 @@ mVUop(mVU_ESADD) { mVUop(mVU_ESIN) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); int t1 = mVU->regAlloc->allocReg(); int t2 = mVU->regAlloc->allocReg(); @@ -433,7 +407,6 @@ mVUop(mVU_ESIN) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t2); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ESIN P"); } } @@ -441,13 +414,11 @@ mVUop(mVU_ESIN) { mVUop(mVU_ESQRT) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ESQRT P"); } } @@ -455,7 +426,6 @@ mVUop(mVU_ESQRT) { mVUop(mVU_ESUM) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); int t1 = mVU->regAlloc->allocReg(); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -467,7 +437,6 @@ mVUop(mVU_ESUM) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(t1); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ESUM P"); } } @@ -753,14 +722,12 @@ mVUop(mVU_ISUBIU) { mVUop(mVU_MFIR) { pass1 { if (!_Ft_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeReg2(_Ft_, mVUlow.VF_write, 1); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); mVUallocVIa(mVU, gprT1, _Is_); MOVSX32R16toR(gprT1, gprT1); SSE2_MOVD_R_to_XMM(Ft, gprT1); if (!_XYZW_SS) { mVUunpack_xyzw(Ft, Ft, 0); } mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("MFIR.%s vf%02d, vi%02d", _XYZW_String, _Ft_, _Fs_); } } @@ -768,11 +735,9 @@ mVUop(mVU_MFIR) { mVUop(mVU_MFP) { pass1 { mVUanalyzeMFP(mVU, _Ft_); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); getPreg(Ft); mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); } } @@ -780,10 +745,8 @@ mVUop(mVU_MFP) { mVUop(mVU_MOVE) { pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); } } @@ -791,14 +754,12 @@ mVUop(mVU_MOVE) { mVUop(mVU_MR32) { pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0)))); else SSE2_PSHUFD_XMM_to_XMM(Ft, Fs, 0x39); mVU->regAlloc->clearNeeded(Ft); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("MR32.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); } } @@ -806,12 +767,10 @@ mVUop(mVU_MR32) { mVUop(mVU_MTIR) { pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeReg5(_Fs_, _Fsf_, mVUlow.VF_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_MOVD_XMM_to_R(gprT1, Fs); mVUallocVIb(mVU, gprT1, _It_); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("MTIR vi%02d, vf%02d%s", _Ft_, _Fs_, _Fsf_String); } } @@ -914,7 +873,6 @@ mVUop(mVU_ISWR) { mVUop(mVU_LQ) { pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 0); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); if (_Is_) { mVUallocVIa(mVU, gprT1, _Is_); @@ -924,7 +882,6 @@ mVUop(mVU_LQ) { } else mVUloadReg(Ft, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("LQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } } @@ -932,7 +889,6 @@ mVUop(mVU_LQ) { mVUop(mVU_LQD) { pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing if (_Is_) { mVUallocVIa(mVU, gprT1, _Is_); SUB16ItoR(gprT1, 1); @@ -949,7 +905,6 @@ mVUop(mVU_LQD) { mVUloadReg(Ft, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVU->regAlloc->clearNeeded(Ft); } - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("LQD.%s vf%02d, --vi%02d", _XYZW_String, _Ft_, _Is_); } } @@ -957,7 +912,6 @@ mVUop(mVU_LQD) { mVUop(mVU_LQI) { pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, 1); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing if (_Is_) { mVUallocVIa(mVU, (!mVUlow.noWriteVF) ? gprT1 : gprT2, _Is_); if (!mVUlow.noWriteVF) { @@ -975,7 +929,6 @@ mVUop(mVU_LQI) { mVUloadReg(Ft, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVU->regAlloc->clearNeeded(Ft); } - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("LQI.%s vf%02d, vi%02d++", _XYZW_String, _Ft_, _Fs_); } } @@ -987,7 +940,6 @@ mVUop(mVU_LQI) { mVUop(mVU_SQ) { pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 0); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); if (_It_) { mVUallocVIa(mVU, gprT1, _It_); @@ -997,7 +949,6 @@ mVUop(mVU_SQ) { } else mVUsaveReg(Fs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W, 1); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("SQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Fs_, _Ft_, _Imm11_); } } @@ -1005,7 +956,6 @@ mVUop(mVU_SQ) { mVUop(mVU_SQD) { pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 1); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); if (_It_) { mVUallocVIa(mVU, gprT1, _It_); @@ -1016,7 +966,6 @@ mVUop(mVU_SQD) { } else mVUsaveReg(Fs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("SQD.%s vf%02d, --vi%02d", _XYZW_String, _Fs_, _Ft_); } } @@ -1024,7 +973,6 @@ mVUop(mVU_SQD) { mVUop(mVU_SQI) { pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, 1); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); if (_It_) { mVUallocVIa(mVU, gprT1, _It_); @@ -1036,7 +984,6 @@ mVUop(mVU_SQI) { } else mVUsaveReg(Fs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("SQI.%s vf%02d, vi%02d++", _XYZW_String, _Fs_, _Ft_); } } @@ -1049,14 +996,12 @@ mVUop(mVU_RINIT) { pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); } pass2 { if (_Fs_ || (_Fsf_ == 3)) { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_MOVD_XMM_to_R(gprT1, Fs); AND32ItoR(gprT1, 0x007fffff); OR32ItoR (gprT1, 0x3f800000); MOV32RtoM(Rmem, gprT1); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } else MOV32ItoM(Rmem, 0x3f800000); } @@ -1065,12 +1010,10 @@ mVUop(mVU_RINIT) { microVUt(void) mVU_RGET_(mV, int Rreg) { if (!mVUlow.noWriteVF) { - mVU->regAlloc->reset(); // Reset for Testing int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); SSE2_MOVD_R_to_XMM(Ft, Rreg); if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0); mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } } @@ -1109,13 +1052,11 @@ mVUop(mVU_RXOR) { pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); } pass2 { if (_Fs_ || (_Fsf_ == 3)) { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_MOVD_XMM_to_R(gprT1, Fs); AND32ItoR(gprT1, 0x7fffff); XOR32RtoM(Rmem, gprT1); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } } pass3 { mVUlog("RXOR R, vf%02d%s", _Fs_, _Fsf_String); } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index c5cb1e7857..04f2373f5b 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -267,6 +267,9 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); #define mVUdumpProg 0&& #endif +// Reg Alloc +#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction (Turns off regAlloc) + // Speed Hacks #define CHECK_VU_CONSTHACK 0 // Only use for GoW (will be slower on other games) #define CHECK_VU_FLAGHACK (u32)Config.Hacks.vuFlagHack // (Can cause Infinite loops, SPS, etc...) diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index bc7e56cf86..8b8ed08c78 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -28,7 +28,7 @@ // Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) { - int sReg, mReg = gprT1, xyzw = _X_Y_Z_W; + int sReg, mReg = gprT1, xyzw = _X_Y_Z_W, regT1b = 0; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); @@ -42,7 +42,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) { mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag if (sFLAG.doNonSticky) AND32ItoR(sReg, 0xfffc00ff); // Clear O,U,S,Z flags } - if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); } + if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; } //-------------------------Check for Signed flags------------------------------ @@ -75,6 +75,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) { OR32RtoR (sReg, mReg); } } + if (regT1b) mVU->regAlloc->clearNeeded(regT1); } //------------------------------------------------------------------ @@ -136,12 +137,11 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); } pass2 { int Fs, Ft, ACC; - mVU->regAlloc->reset(); // Reset for Testing setupFtReg(mVU, Ft, opCase); if (isACC) { - ACC = mVU->regAlloc->allocReg((_X_Y_Z_W == 0xf) ? -1 : 32, 32, 0xf, 0); Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + ACC = mVU->regAlloc->allocReg((_X_Y_Z_W == 0xf) ? -1 : 32, 32, 0xf, 0); if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } else { Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); } @@ -164,11 +164,9 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co mVU->regAlloc->clearNeeded(ACC); } else mVUupdateFlags(mVU, Fs, (((opCase==2)&&(!_XYZW_SS)) ? Ft : -1)); - - //if (isACC) SSE_MOVAPS_XMM_to_XMM(xmmACC, ACC); // For Testing + mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVU_printOP(mVU, opCase, opName, isACC); } } @@ -178,11 +176,10 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op pass1 { setupPass1(mVU, opCase, 1, 0); } pass2 { int Fs, Ft, ACC; - mVU->regAlloc->reset(); // Reset for Testing setupFtReg(mVU, Ft, opCase); - ACC = mVU->regAlloc->allocReg(32, 32, 0xf, 0); Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + ACC = mVU->regAlloc->allocReg(32, 32, 0xf, 0); if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } } @@ -209,11 +206,9 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op mVU->regAlloc->clearNeeded(tempACC); } - //SSE_MOVAPS_XMM_to_XMM(xmmACC, ACC); // For Testing mVU->regAlloc->clearNeeded(ACC); mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVU_printOP(mVU, opCase, opName, 1); } } @@ -223,7 +218,6 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { int Fs, Ft, ACC; - mVU->regAlloc->reset(); // Reset for Testing setupFtReg(mVU, Ft, opCase); ACC = mVU->regAlloc->allocReg(32); @@ -243,10 +237,9 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } } opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } } - mVU->regAlloc->clearNeeded(ACC); mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing + mVU->regAlloc->clearNeeded(ACC); } pass3 { mVU_printOP(mVU, opCase, opName, 0); } } @@ -256,11 +249,10 @@ void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { int Fs, Ft, Fd; - mVU->regAlloc->reset(); // Reset for Testing setupFtReg(mVU, Ft, opCase); - Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - Fd = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); + Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + Fd = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } } opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } } @@ -276,7 +268,6 @@ void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) { mVU->regAlloc->clearNeeded(Fd); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVU_printOP(mVU, opCase, opName, 0); } } @@ -286,11 +277,9 @@ mVUop(mVU_ABS) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; - mVU->regAlloc->reset(); // Reset for Testing - int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, ((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); + int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("ABS"); mVUlogFtFs(); } } @@ -299,9 +288,8 @@ mVUop(mVU_ABS) { mVUop(mVU_OPMULA) { pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing - int Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W); int Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); + int Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W); SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ @@ -310,7 +298,6 @@ mVUop(mVU_OPMULA) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("OPMULA"); mVUlogACC(); mVUlogFt(); } } @@ -319,7 +306,6 @@ mVUop(mVU_OPMULA) { mVUop(mVU_OPMSUB) { pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf); int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); int ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); @@ -332,9 +318,7 @@ mVUop(mVU_OPMSUB) { mVU->regAlloc->clearNeeded(ACC); mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->clearNeeded(Ft); - mVU->regAlloc->flushAll(); // Flush All for Testing - + mVU->regAlloc->clearNeeded(Ft); } pass3 { mVUlog("OPMSUB"); mVUlogFd(); mVUlogFt(); } } @@ -344,8 +328,7 @@ void mVU_FTOIx(mP, uptr addr, const char* opName) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; - mVU->regAlloc->reset(); // Reset for Testing - int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, ((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); + int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); int t1 = mVU->regAlloc->allocReg(); int t2 = mVU->regAlloc->allocReg(); @@ -363,7 +346,6 @@ void mVU_FTOIx(mP, uptr addr, const char* opName) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t2); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog(opName); mVUlogFtFs(); } } @@ -373,15 +355,13 @@ void mVU_ITOFx(mP, uptr addr, const char* opName) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; - mVU->regAlloc->reset(); // Reset for Testing - int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, ((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); + int Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); SSE2_CVTDQ2PS_XMM_to_XMM(Fs, Fs); if (addr) { SSE_MULPS_M128_to_XMM(Fs, addr); } //mVUclamp2(Fs, xmmT1, 15); // Clamp (not sure if this is needed) mVU->regAlloc->clearNeeded(Fs); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog(opName); mVUlogFtFs(); } } @@ -390,7 +370,6 @@ void mVU_ITOFx(mP, uptr addr, const char* opName) { mVUop(mVU_CLIP) { pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); } pass2 { - mVU->regAlloc->reset(); // Reset for Testing int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1); int t1 = mVU->regAlloc->allocReg(); @@ -424,7 +403,6 @@ mVUop(mVU_CLIP) { mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); mVU->regAlloc->clearNeeded(t1); - mVU->regAlloc->flushAll(); // Flush All for Testing } pass3 { mVUlog("CLIP"); mVUlogCLIP(); } }