diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index cb85aa442b..9a3e4e5ce6 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -47,7 +47,7 @@ namespace VU0micro if ((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return; FreezeXMMRegs(1); - if (useMVU0) runVUrec(VU0.VI[REG_TPC].UL, 50000, 0); + if (useMVU0) runVUrec(VU0.VI[REG_TPC].UL, 0x300, 0); else SuperVUExecuteProgram(VU0.VI[REG_TPC].UL & 0xfff, 0); FreezeXMMRegs(0); } diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index d7cc0d114d..85d9e4163e 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -100,6 +100,7 @@ struct microVU { PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution) PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution) PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ + PCSX2_ALIGNED16(u32 xmmVFb[4]); // Backup for VF regs u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 0143321dc1..67169db9ed 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -31,7 +31,7 @@ #define aMax(x, y) ((x > y) ? x : y) #define aMin(x, y) ((x < y) ? x : y) -// Read a VF reg by upper op +// Read a VF reg #define analyzeReg1(xReg, vfRead) { \ if (xReg) { \ if (_X) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \ @@ -41,20 +41,6 @@ } \ } -// Read a VF reg by lower op -#define analyzeReg1b(xReg, vfRead) { \ - if (xReg) { \ - analyzeReg1(xReg, vfRead); \ - if (mVUregsTemp.VFreg[0] == xReg) { \ - if ((mVUregsTemp.VF[0].x && _X) \ - || (mVUregsTemp.VF[0].y && _Y) \ - || (mVUregsTemp.VF[0].z && _Z) \ - || (mVUregsTemp.VF[0].w && _W)) \ - { mVUinfo.swapOps = 1; } \ - } \ - } \ -} - // Write to a VF reg #define analyzeReg2(xReg, vfWrite, isLowOp) { \ if (xReg) { \ @@ -92,13 +78,6 @@ case 2: mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; break; \ case 3: mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; break; \ } \ - if (mVUregsTemp.VFreg[0] == xReg) { \ - if ((mVUregsTemp.VF[0].x && (fxf == 0)) \ - || (mVUregsTemp.VF[0].y && (fxf == 1)) \ - || (mVUregsTemp.VF[0].z && (fxf == 2)) \ - || (mVUregsTemp.VF[0].w && (fxf == 3))) \ - { mVUinfo.swapOps = 1; } \ - } \ } \ } @@ -109,13 +88,6 @@ if (_Y) { mVUstall = aMax(mVUstall, aReg(xReg).z); vfRead.reg = xReg; vfRead.z = 1; } \ if (_Z) { mVUstall = aMax(mVUstall, aReg(xReg).w); vfRead.reg = xReg; vfRead.w = 1; } \ if (_W) { mVUstall = aMax(mVUstall, aReg(xReg).x); vfRead.reg = xReg; vfRead.x = 1; } \ - if (mVUregsTemp.VFreg[0] == xReg) { \ - if ((mVUregsTemp.VF[0].y && _X) \ - || (mVUregsTemp.VF[0].z && _Y) \ - || (mVUregsTemp.VF[0].w && _Z) \ - || (mVUregsTemp.VF[0].x && _W)) \ - { mVUinfo.swapOps = 1; } \ - } \ } \ } @@ -234,7 +206,7 @@ microVUt(void) mVUanalyzeEFU1(mV, int Fs, int Fsf, u8 xCycles) { microVUt(void) mVUanalyzeEFU2(mV, int Fs, u8 xCycles) { mVUprint("microVU: EFU Opcode"); - analyzeReg1b(Fs, mVUlow.VF_read[0]); + analyzeReg1(Fs, mVUlow.VF_read[0]); analyzePreg(xCycles); } @@ -253,8 +225,8 @@ microVUt(void) mVUanalyzeMFP(mV, int Ft) { microVUt(void) mVUanalyzeMOVE(mV, int Fs, int Ft) { if (!Ft || (Ft == Fs)) { mVUlow.isNOP = 1; } - analyzeReg1b(Fs, mVUlow.VF_read[0]); - analyzeReg2 (Ft, mVUlow.VF_write, 1); + analyzeReg1(Fs, mVUlow.VF_read[0]); + analyzeReg2(Ft, mVUlow.VF_write, 1); } //------------------------------------------------------------------ @@ -273,7 +245,7 @@ microVUt(void) mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) { //------------------------------------------------------------------ microVUt(void) mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) { - analyzeReg1b (Fs, mVUlow.VF_read[0]); + analyzeReg1 (Fs, mVUlow.VF_read[0]); analyzeVIreg1(It, mVUlow.VI_read[0]); if (writeIt) { analyzeVIreg2(It, mVUlow.VI_write, 1); } } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 0eb5a4be05..592b76774a 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -49,6 +49,30 @@ } \ } +#define doBackupVF1() { \ + if (mVUinfo.backupVF && !mVUlow.noWriteVF) { \ + DevCon::Status("microVU%d: Backing Up VF Reg [%04x]", params getIndex, xPC); \ + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VF[mVUlow.VF_write.reg].UL[0]); \ + SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmVFb, xmmT1); \ + } \ +} + +#define doBackupVF2() { \ + if (mVUinfo.backupVF && !mVUlow.noWriteVF) { \ + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->xmmVFb); \ + SSE_MOVAPS_M128_to_XMM(xmmT2, (uptr)&mVU->regs->VF[mVUlow.VF_write.reg].UL[0]); \ + SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->VF[mVUlow.VF_write.reg].UL[0], xmmT1); \ + SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmVFb, xmmT2); \ + } \ +} + +#define doBackupVF3() { \ + if (mVUinfo.backupVF && !mVUlow.noWriteVF) { \ + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->xmmVFb); \ + SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->VF[mVUlow.VF_write.reg].UL[0], xmmT1); \ + } \ +} + #define startLoop() { \ mVUdebug1(); \ memset(&mVUinfo, 0, sizeof(mVUinfo)); \ @@ -61,6 +85,7 @@ #define incQ() { mVU->q = (mVU->q+1) & 1; } #define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); } #define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); } +#define doSwapOp() { doBackupVF1(); mVUopL(mVU, 1); doBackupVF2(); incPC(1); doUpperOp(); doBackupVF3(); } #define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } } //------------------------------------------------------------------ @@ -134,12 +159,34 @@ microVUt(void) mVUincCycles(mV, int x) { calcCycles(mVUregs.r, x); } +#define cmpVFregs(VFreg1, VFreg2, xVar) { \ + if (VFreg1.reg == VFreg2.reg) { \ + if ((VFreg1.x && VFreg2.x) \ + || (VFreg1.y && VFreg2.y) \ + || (VFreg1.z && VFreg2.z) \ + || (VFreg1.w && VFreg2.w)) \ + { xVar = 1; } \ + } \ +} + microVUt(void) mVUsetCycles(mV) { incCycles(mVUstall); - if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg + // If upper Op && lower Op write to same VF reg: + if ((mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) && mVUregsTemp.VFreg[0]) { if (mVUregsTemp.r || mVUregsTemp.VI) mVUlow.noWriteVF = 1; else mVUlow.isNOP = 1; // If lower Op doesn't modify anything else, then make it a NOP } + // If lower op reads a VF reg that upper Op writes to: + if ((mVUlow.VF_read[0].reg || mVUlow.VF_read[1].reg) && mVUup.VF_write.reg) { + cmpVFregs(mVUup.VF_write, mVUlow.VF_read[0], mVUinfo.swapOps); + cmpVFregs(mVUup.VF_write, mVUlow.VF_read[1], mVUinfo.swapOps); + } + // If above case is true, and upper op reads a VF reg that lower Op Writes to: + if (mVUinfo.swapOps && ((mVUup.VF_read[0].reg || mVUup.VF_read[1].reg) && mVUlow.VF_write.reg)) { + cmpVFregs(mVUlow.VF_write, mVUup.VF_read[0], mVUinfo.backupVF); + cmpVFregs(mVUlow.VF_write, mVUup.VF_read[1], mVUinfo.backupVF); + } + tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].x, mVUregsTemp.VF[0].x); tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].y, mVUregsTemp.VF[0].y); tCycles(mVUregs.VF[mVUregsTemp.VFreg[0]].z, mVUregsTemp.VF[0].z); @@ -254,7 +301,6 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (curI & _Ebit_) { branch = 1; mVUup.eBit = 1; } if (curI & _DTbit_) { branch = 4; } if (curI & _Mbit_) { mVUup.mBit = 1; } - if (curI & _Ibit_) { mVUlow.isNOP = 1; mVUup.iBit = 1; } else { incPC(-1); mVUopL(mVU, 0); incPC(1); } mVUsetCycles(mVU); @@ -284,7 +330,7 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (mVUup.mBit) { OR32ItoM((uptr)&mVU->regs->flags, VUFLAG_MFLAGSET); } if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(); } else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); } - else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } + else { doSwapOp(); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } if (!mVUinfo.isBdelay) { incPC(1); } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 94b110779d..c3b6f979fb 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -115,6 +115,7 @@ struct microOp { bool isEOB; // Cur Instruction is last instruction in block (End of Block) bool isBdelay; // Cur Instruction in Branch Delay slot bool swapOps; // Run Lower Instruction before Upper Instruction + bool backupVF; // Backup mVUlow.VF_write.reg, and restore it before the Upper Instruction is called bool doXGKICK; // Do XGKICK transfer on this instruction bool doDivFlag; // Transfer Div flag to Status Flag on this instruction int readQ; // Q instance for reading