From 3f2f8571dffeb538cb0e4ddf7e111ee45613017b Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 5 Aug 2009 04:03:24 +0000 Subject: [PATCH] microVU: some optimizations... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1605 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Alloc.inl | 6 ------ pcsx2/x86/microVU_Compile.inl | 30 ++++++++++++++++++------------ pcsx2/x86/microVU_Flags.inl | 1 + pcsx2/x86/microVU_IR.h | 21 +++++++++++++++------ pcsx2/x86/microVU_Misc.h | 1 + pcsx2/x86/microVU_Misc.inl | 5 +++++ pcsx2/x86/microVU_Upper.inl | 8 ++++---- 7 files changed, 44 insertions(+), 28 deletions(-) diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 6f1d741b35..d794fc0082 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -117,12 +117,6 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) { // I/P/Q Reg Allocators //------------------------------------------------------------------ -microVUt(void) getIreg(mV, int reg, bool modXYZW) { - SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); - if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8); - if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } -} - microVUt(void) getPreg(mV, int reg) { mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index e7c5769bb8..43266236c2 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -75,15 +75,19 @@ microVUt(void) doSwapOp(mV) { else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } } -microVUt(void) doIbit(mV) { +microVUt(void) doIbit(microVU* mVU) { if (mVUup.iBit) { incPC(-1); + u32 tempI; + mVU->regAlloc->clearRegVF(33); + if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) { Console::Status("microVU%d: Clamping I Reg", params mVU->index); - int tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg - MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI); + tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg } - else MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); + else tempI = curI; + + MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI); incPC(1); } } @@ -174,7 +178,9 @@ microVUt(void) mVUoptimizePipeState(mV) { for (int i = 0; i < 16; i++) { optimizeReg(mVUregs.VI[i]); } - mVUregs.r = 0; + if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(); } } + if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(); } } + mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info } // Recompiles Code for Proper Flags and Q/P regs on Block Linkings @@ -204,12 +210,12 @@ microVUt(void) mVUincCycles(mV, int x) { } if (mVUregs.q) { if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } } - else { calcCycles(mVUregs.q, x); } + else { calcCycles(mVUregs.q, x); } if (!mVUregs.q) { incQ(); } } if (mVUregs.p) { calcCycles(mVUregs.p, x); - if (!mVUregs.p || (mVUregs.p && mVUregsTemp.p)) { incP(); } + if (!mVUregs.p || mVUregsTemp.p) { incP(); } } if (mVUregs.xgkick) { calcCycles(mVUregs.xgkick, x); @@ -277,7 +283,8 @@ microVUt(void) mVUendProgram(mV, int isEbit, int* xStatus, int* xMac, int* xClip if (isEbit) { mVUprint("mVUcompile ebit"); - memset(&mVUinfo, 0, sizeof(mVUinfo)); + memset(&mVUinfo, 0, sizeof(mVUinfo)); + memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0) mVUcycles -= 100; qInst = mVU->q; @@ -430,13 +437,12 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { // Sets Up Flag instances int xStatus[4], xMac[4], xClip[4]; int xCycles = mVUsetFlags(mVU, xStatus, xMac, xClip); - mVUtestCycles(mVU); - + // Fix up vi15 const info for propagation through blocks mVUregs.vi15 = (mVUconstReg[15].isValid && !CHECK_VU_CONSTHACK) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0; - // Optimize the End Pipeline State for nicer Block Linking - mVUoptimizePipeState(mVU); + mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking + mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary // Second Pass iPC = mVUstartPC; diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index ca4ccb1fd3..4abc004193 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -152,6 +152,7 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { } mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS); + iPC = endPC; return cycles; } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 44a1de55a3..f8faadd5ca 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -163,9 +163,10 @@ struct microIR { void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW); void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW); void mVUloadReg(int reg, uptr offset, int xyzw); +void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs); struct microXMM { - int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC) + int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg) int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid) int count; // Count of when last used bool isNeeded; // Is needed for current instruction @@ -221,10 +222,16 @@ public: xmmReg[reg].xyzw = 0; xmmReg[reg].isNeeded = 0; } + void clearRegVF(int VFreg) { + for (int i = 0; i < xmmTotal; i++) { + if (xmmReg[i].reg == VFreg) clearReg(i); + } + } void writeBackReg(int reg, bool invalidateRegs = 1) { if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0 - if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1); - else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1); + if (xmmReg[reg].reg == 33) SSE_MOVSS_XMM_to_M32((uptr)&vuRegs->VI[REG_I].UL, reg); + else if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1); + else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1); if (invalidateRegs) { for (int i = 0; i < xmmTotal; i++) { if ((i == reg) || xmmReg[i].isNeeded) continue; @@ -307,13 +314,15 @@ public: if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading) if ((vfLoadReg == 0) && !(xyzw & 1)) { SSE2_PXOR_XMM_to_XMM(x, x); } - else if (vfLoadReg == 32) mVUloadReg(x, (uptr)&vuRegs->ACC.UL[0], xyzw); - else if (vfLoadReg >= 0) mVUloadReg(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw); + else if (vfLoadReg == 33) mVUloadIreg(x, xyzw, vuRegs); + else if (vfLoadReg == 32) mVUloadReg (x, (uptr)&vuRegs->ACC.UL[0], xyzw); + else if (vfLoadReg >= 0) mVUloadReg (x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw); xmmReg[x].reg = vfWriteReg; xmmReg[x].xyzw = xyzw; } else { // Reg Will Not Be Modified (always load full reg for caching) - if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]); + if (vfLoadReg == 33) mVUloadIreg(x, 0xf, vuRegs); + else if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]); else if (vfLoadReg >= 0) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0]); xmmReg[x].reg = vfLoadReg; xmmReg[x].xyzw = 0; diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 44c17a32fe..c2a2d509c6 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -84,6 +84,7 @@ declareAllVariables #define _XYZW_SS (_X+_Y+_Z+_W==1) #define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8)) #define _XYZW_PS (_X_Y_Z_W == 0xf) +#define _XYZWss(x) ((x==8) || (x==4) || (x==2) || (x==1)) #define _bc_ (mVU->code & 0x3) #define _bc_x ((mVU->code & 0x3) == 0) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 31203d624a..81982e8054 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -94,6 +94,11 @@ void mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { } } +void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs) { + SSE_MOVSS_M32_to_XMM(reg, (uptr)&vuRegs->VI[REG_I].UL); + if (!_XYZWss(xyzw)) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0); +} + // Modifies the Source Reg! void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) { /*SSE_MOVAPS_M128_to_XMM(xmmT2, offset); diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index d7c7f69acc..6e2464f308 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -120,18 +120,18 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) { // Sets Up Ft Reg for Normal, BC, I, and Q Cases void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) { - opCase1 { + opCase1 { if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; } } - opCase2 { + opCase2 { tempFt = mVU->regAlloc->allocReg(_Ft_); - Ft = mVU->regAlloc->allocReg(); + Ft = mVU->regAlloc->allocReg(); mVUunpack_xyzw(Ft, tempFt, _bc_); mVU->regAlloc->clearNeeded(tempFt); tempFt = Ft; } - opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); } + opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; } opCase4 { if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; } else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }