diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 7fd6efc281..7631cc9347 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -98,9 +98,10 @@ struct microProgManager { #define mVUcacheSize (0x2000000 / ((vuIndex) ? 1 : 4)) struct microVU { - PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution) - PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution) - PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ + PCSX2_ALIGNED16(u32 statusFlag[4]); // 4 instances of status flag (used in execution) + PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution) + PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution) + PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index eb0c5deda0..a04bc48008 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -634,28 +634,13 @@ microVUt(void) mVUallocFMAC26b(mV, int& ACCw, int& ACCr) { // Flag Allocators //------------------------------------------------------------------ -#define getFlagReg(regX, fInst) { \ - switch (fInst) { \ - case 0: regX = gprF0; break; \ - case 1: regX = gprF1; break; \ - case 2: regX = gprF2; break; \ - case 3: regX = gprF3; break; \ - default: \ - Console::Error("microVU: Flag Instance Error (fInst = %d)", params fInst); \ - regX = gprF0; \ - break; \ - } \ +microVUt(void) mVUallocSFLAGa(mV, int reg, int fInstance) { + MOVZX32M16toR(reg, (uptr)&mVU->statusFlag[fInstance]); } -microVUt(void) mVUallocSFLAGa(int reg, int fInstance) { - getFlagReg(fInstance, fInstance); - MOVZX32R16toR(reg, fInstance); -} - -microVUt(void) mVUallocSFLAGb(int reg, int fInstance) { - getFlagReg(fInstance, fInstance); +microVUt(void) mVUallocSFLAGb(mV, int reg, int fInstance) { //AND32ItoR(reg, 0xffff); - MOV32RtoR(fInstance, reg); + MOV32RtoM((uptr)&mVU->statusFlag[fInstance], reg); } microVUt(void) mVUallocMFLAGa(mV, int reg, int fInstance) { diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 74c1cbcfc9..8af22c363f 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -62,7 +62,8 @@ } microVUt(void) mVUanalyzeFMAC1(mV, int Fd, int Fs, int Ft) { - sFLAG.doFlag = 1; + mVUup.doFlags = 1; + sFLAG.doSticky = 1; analyzeReg1(Fs); analyzeReg1(Ft); analyzeReg2(Fd, 0); @@ -91,7 +92,8 @@ microVUt(void) mVUanalyzeFMAC2(mV, int Fs, int Ft) { } microVUt(void) mVUanalyzeFMAC3(mV, int Fd, int Fs, int Ft) { - sFLAG.doFlag = 1; + mVUup.doFlags = 1; + sFLAG.doSticky = 1; analyzeReg1(Fs); analyzeReg3(Ft); analyzeReg2(Fd, 0); @@ -269,6 +271,15 @@ microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) { // Sflag - Status Flag Opcodes //------------------------------------------------------------------ +#define setFlagInst(xDoFlag) { \ + int curPC = iPC; \ + for (int i = mVUcount, j = 0; i > 0; i--, j++) { \ + incPC2(-2); \ + if (mVUup.doFlags) { xDoFlag = 1; if (j >= 3) { break; } } \ + } \ + iPC = curPC; \ +} + microVUt(void) mVUanalyzeSflag(mV, int It) { if (!It) { mVUlow.isNOP = 1; } else { @@ -279,17 +290,15 @@ microVUt(void) mVUanalyzeSflag(mV, int It) { // Note: useSflag is used for status flag optimizations when a FSSET instruction is called. // Do to stalls, it can only be set one instruction prior to the status flag read instruction // if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior. + setFlagInst(sFLAG.doFlag); } analyzeVIreg3(It, 1); } microVUt(void) mVUanalyzeFSSET(mV) { - mVUlow.isFSSET = 1; - // mVUinfo &= ~_doStatus; - // Note: I'm not entirely sure if the non-sticky flags - // should be taken from the current upper instruction - // or if they should be taken from the previous instruction - // Uncomment the above line if the latter-case is true + mVUinfo.swapOps = 1; + mVUlow.isFSSET = 1; + sFLAG.doSticky = 0; } //------------------------------------------------------------------ @@ -301,12 +310,7 @@ microVUt(void) mVUanalyzeMflag(mV, int Is, int It) { else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo.swapOps = 1; if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << (/*mVUcount +*/ 4); } - int curPC = iPC; - for (int i = mVUcount, j = 0; i > 0; i--, j++) { - incPC2(-2); - if (sFLAG.doFlag) { mFLAG.doFlag = 1; if (j >= 3) { break; } } - } - iPC = curPC; + setFlagInst(mFLAG.doFlag); } analyzeVIreg1(Is); analyzeVIreg3(It, 1); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 689d8b82a4..f4674fc61c 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -59,7 +59,7 @@ #define tCycles(dest, src) { dest = aMax(dest, src); } #define incP() { mVU->p = (mVU->p+1) & 1; } #define incQ() { mVU->q = (mVU->q+1) & 1; } -#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); } +#define doUpperOp() { mVUdivSet(mVU); mVUopU(mVU, 1); } #define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); } #define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } } @@ -169,8 +169,7 @@ microVUt(void) mVUendProgram(mV, int qInst, int pInst, int fStatus, int fMac, in // Save Flag Instances if (!mVUflagHack) { - getFlagReg(fStatus, fStatus); - MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprST); } mVUallocMFLAGa(mVU, gprT1, fMac); mVUallocCFLAGa(mVU, gprT2, fClip); @@ -200,7 +199,6 @@ microVUt(void) mVUtestCycles(mV) { MOV32ItoR(gprT2, xPC); if (!isVU1) CALLFunc((uptr)mVUwarning0); else CALLFunc((uptr)mVUwarning1); - MOV32ItoR(gprR, Roffset); // Restore gprR mVUendProgram(mVU, 0, 0, sI, 0, cI); x86SetJ8(jmp8); } @@ -322,7 +320,7 @@ microVUf(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVUbackupRegs(mVU); MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) - MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall) + MOV32ItoR(gprT3, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall) if (!isVU1) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) else CALLFunc((uptr)mVUcompileVU1); @@ -375,12 +373,12 @@ eBitTemination: memset(&mVUinfo, 0, sizeof(mVUinfo)); incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0) mVUcycles -= 100; - if (mVUinfo.doDivFlag) { + /*if (mVUinfo.doDivFlag) { int flagReg; getFlagReg(flagReg, lStatus); AND32ItoR (flagReg, 0x0fcf); OR32MtoR (flagReg, (uptr)&mVU->divFlag); - } + }*/ if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } // Do E-bit end stuff here diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 0854b0bad3..8c5bb00c6b 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -41,11 +41,11 @@ microVUt(void) mVUdispatcherA(mV) { // Load Regs MOV32ItoR(gprR, Roffset); // Load VI Reg Offset - MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL); - AND32ItoR(gprF0, 0xffff); - MOV32RtoR(gprF1, gprF0); - MOV32RtoR(gprF2, gprF0); - MOV32RtoR(gprF3, gprF0); + MOV32MtoR(gprST, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL); + /*AND32ItoR(gprST, 0xffff); + MOV32RtoR(gprF1, gprST); + MOV32RtoR(gprF2, gprST); + MOV32RtoR(gprF3, gprST);*/ SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL); SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0); diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index fc63c3db9d..70f50f05fa 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -20,12 +20,9 @@ // Sets FDIV Flags at the proper time microVUt(void) mVUdivSet(mV) { - int flagReg1, flagReg2; if (mVUinfo.doDivFlag) { - getFlagReg(flagReg1, sFLAG.write); - if (!sFLAG.doFlag) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } - AND32ItoR(flagReg1, 0x0fcf); - OR32MtoR (flagReg1, (uptr)&mVU->divFlag); + AND32ItoR(gprST, 0xfffcffff); // Clear D/I bits + OR32MtoR (gprST, (uptr)&mVU->divFlag); // Set DS/IS/D/I bits } } @@ -34,18 +31,19 @@ microVUt(void) mVUstatusFlagOp(mV) { int curPC = iPC; int i = mVUcount; bool runLoop = 1; - if (sFLAG.doFlag) { mVUlow.useSflag = 1; } + if (mVUup.doFlags) { mVUlow.useSflag = 1; } else { for (; i > 0; i--) { incPC2(-2); if (mVUlow.useSflag) { runLoop = 0; break; } - if (sFLAG.doFlag) { mVUlow.useSflag = 1; break; } + if (mVUup.doFlags) { mVUlow.useSflag = 1; break; } } } if (runLoop) { for (; i > 0; i--) { incPC2(-2); if (mVUlow.useSflag) break; + sFLAG.doSticky = 0; sFLAG.doFlag = 0; } } @@ -79,7 +77,11 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { // Ensure last ~4+ instructions update mac flags (if next block's first 4 instructions will read them) for (int i = mVUcount; i > 0; i--, aCount++) { - if (sFLAG.doFlag) { if (__Mac) { mFLAG.doFlag = 1; } if (aCount >= 4) { break; } } + if (mVUup.doFlags) { + if (__Status) { sFLAG.doFlag = 1; } + if (__Mac) { mFLAG.doFlag = 1; } + if (aCount >= 4) { break; } + } incPC2(-2); } @@ -142,13 +144,13 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) { incPC2(2); } - mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS); + mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) /*| ((__Status) ? 0 : xS)*/; return cycles; } -#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0))) -#define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0]) -#define shuffleClip ((bClip[3]<<6)|(bClip[2]<<4)|(bClip[1]<<2)|bClip[0]) +#define shuffleStatus ((bStatus[3]<<6)|(bStatus[2]<<4)|(bStatus[1]<<2)|bStatus[0]) +#define shuffleMac ((bMac [3]<<6)|(bMac [2]<<4)|(bMac [1]<<2)|bMac [0]) +#define shuffleClip ((bClip [3]<<6)|(bClip [2]<<4)|(bClip [1]<<2)|bClip [0]) // Recompiles Code for Proper Flags on Block Linkings microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles) { @@ -156,14 +158,9 @@ microVUt(void) mVUsetupFlags(mV, int* xStatus, int* xMac, int* xClip, int cycles if (__Status && !mVUflagHack) { int bStatus[4]; sortFlag(xStatus, bStatus, cycles); - MOV32RtoR(gprT1, getFlagReg1(bStatus[0])); - MOV32RtoR(gprT2, getFlagReg1(bStatus[1])); - MOV32RtoR(gprR, getFlagReg1(bStatus[2])); - MOV32RtoR(gprF3, getFlagReg1(bStatus[3])); - MOV32RtoR(gprF0, gprT1); - MOV32RtoR(gprF1, gprT2); - MOV32RtoR(gprF2, gprR); - MOV32ItoR(gprR, Roffset); // Restore gprR + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->statusFlag); + SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleStatus); + SSE_MOVAPS_XMM_to_M128((uptr)mVU->statusFlag, xmmT1); } if (__Mac) { diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index a42a683a25..2ccd33b907 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -68,6 +68,7 @@ struct microBlock { struct microUpperOp { bool eBit; // Has E-bit set bool iBit; // Has I-bit set + bool doFlags; // This instruction updates Status/Mac Flags }; struct microLowerOp { @@ -83,7 +84,8 @@ struct microLowerOp { }; struct microFlagInst { - bool doFlag; // Update Flag on this Instruction + bool doSticky; // Update Sticky Flags (Status Flag Only) + bool doFlag; // Update Flag on this Instruction (For Status Flag, this means non-sticky bits) u8 write; // Points to the instance that should be written to (s-stage write) u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag) u8 read; // Points to the instance that should be read by a lower instruction (t-stage read) diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index c7a4b7e0ca..1bfd48e6e0 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -37,7 +37,7 @@ SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \ TEST32ItoR(gprTemp, 1); /* Check sign bit */ \ aJump = JZ8(0); /* Skip if positive */ \ - MOV32ItoM((uptr)&mVU->divFlag, 0x410); /* Set Invalid Flags */ \ + MOV32ItoM((uptr)&mVU->divFlag, 0x410000); /* Set Invalid Flags */ \ SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \ x86SetJ8(aJump); \ } @@ -54,10 +54,10 @@ mVUop(mVU_DIV) { testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero ajmp = JZ8(0); - MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0) + MOV32ItoM((uptr)&mVU->divFlag, 0x410000); // Set invalid flag (0/0) bjmp = JMP8(0); x86SetJ8(ajmp); - MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide (only when not 0/0) + MOV32ItoM((uptr)&mVU->divFlag, 0x820000); // Zero divide (only when not 0/0) x86SetJ8(bjmp); SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt); @@ -112,10 +112,10 @@ mVUop(mVU_RSQRT) { testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero bjmp = JZ8(0); // Skip if none are - MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0) + MOV32ItoM((uptr)&mVU->divFlag, 0x410000); // Set invalid flag (0/0) cjmp = JMP8(0); x86SetJ8(bjmp); - MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0) + MOV32ItoM((uptr)&mVU->divFlag, 0x820000); // Zero divide flag (only when not 0/0) x86SetJ8(cjmp); SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); @@ -522,7 +522,7 @@ mVUop(mVU_FMOR) { mVUop(mVU_FSAND) { pass1 { mVUanalyzeSflag(mVU, _It_); } pass2 { - mVUallocSFLAGa(gprT1, sFLAG.read); + mVUallocSFLAGa(mVU, gprT1, sFLAG.read); AND16ItoR(gprT1, _Imm12_); mVUallocVIb(mVU, gprT1, _It_); } @@ -533,7 +533,7 @@ mVUop(mVU_FSAND) { mVUop(mVU_FSEQ) { pass1 { mVUanalyzeSflag(mVU, _It_); } pass2 { - mVUallocSFLAGa(gprT1, sFLAG.read); + mVUallocSFLAGa(mVU, gprT1, sFLAG.read); XOR16ItoR(gprT1, _Imm12_); SUB16ItoR(gprT1, 1); SHR16ItoR(gprT1, 15); @@ -546,7 +546,7 @@ mVUop(mVU_FSEQ) { mVUop(mVU_FSOR) { pass1 { mVUanalyzeSflag(mVU, _It_); } pass2 { - mVUallocSFLAGa(gprT1, sFLAG.read); + mVUallocSFLAGa(mVU, gprT1, sFLAG.read); OR16ItoR(gprT1, _Imm12_); mVUallocVIb(mVU, gprT1, _It_); } @@ -557,11 +557,13 @@ mVUop(mVU_FSOR) { mVUop(mVU_FSSET) { pass1 { mVUanalyzeFSSET(mVU); } pass2 { - int flagReg1, flagReg2; - getFlagReg(flagReg1, sFLAG.write); - if (!(sFLAG.doFlag||mVUinfo.doDivFlag)) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction - AND32ItoR(flagReg1, 0x03f); - OR32ItoR (flagReg1, (_Imm12_ & 0xfc0)); + int mask; + if (_Imm12_ & 0x800) mask |= 0x800000; + if (_Imm12_ & 0x400) mask |= 0x400000; + if (_Imm12_ & 0x080) mask |= 0x0000f0; + if (_Imm12_ & 0xc40) mask |= 0x00000f; + AND32ItoR(gprST, 0x30000); + if (mask) OR32ItoR(gprST, mask); } pass3 { mVUlog("FSSET $%x", _Imm12_); } pass4 { mVUsFlagHack = 0; } @@ -966,23 +968,22 @@ mVUop(mVU_RNEXT) { pass1 { mVUanalyzeR2(mVU, _Ft_, 0); } pass2 { // algorithm from www.project-fao.org - MOV32MtoR(gprR, Rmem); - MOV32RtoR(gprT1, gprR); + MOV32MtoR(gprT3, Rmem); + MOV32RtoR(gprT1, gprT3); SHR32ItoR(gprT1, 4); AND32ItoR(gprT1, 1); - MOV32RtoR(gprT2, gprR); + MOV32RtoR(gprT2, gprT3); SHR32ItoR(gprT2, 22); AND32ItoR(gprT2, 1); - SHL32ItoR(gprR, 1); + SHL32ItoR(gprT3, 1); XOR32RtoR(gprT1, gprT2); - XOR32RtoR(gprR, gprT1); - AND32ItoR(gprR, 0x007fffff); - OR32ItoR (gprR, 0x3f800000); - MOV32RtoM(Rmem, gprR); - mVU_RGET_(mVU, gprR); - MOV32ItoR(gprR, Roffset); // Restore gprR + XOR32RtoR(gprT3, gprT1); + AND32ItoR(gprT3, 0x007fffff); + OR32ItoR (gprT3, 0x3f800000); + MOV32RtoM(Rmem, gprT3); + mVU_RGET_(mVU, gprT3); } pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); } } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index d36cfc7a8e..8d9ad3a747 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -131,12 +131,22 @@ declareAllVariables #define gprT1 0 // Temp Reg #define gprT2 1 // Temp Reg -#define gprR 2 // VI Reg Offset -#define gprF0 3 // Status Flag 0 +#define gprT3 2 // Temp Reg #define gprESP 4 // Don't use? -#define gprF1 5 // Status Flag 1 -#define gprF2 6 // Status Flag 2 -#define gprF3 7 // Status Flag 3 +#define gprT4 5 // Temp? +#define gprT5 6 // Temp? +#define gprR 7 // VI Reg Offset +#define gprST 3 // Status Sticky Flag +// gprST's Info is Stored as follows: +//----------------------------------------------------------------------------- +//|23 22 21 20||19 18 17 16||15 14 13 12||11 10 09 08||07 06 05 04||03 02 01 00| +//|DS|IS| || | D| I|| OS || US || SS || ZS | +//----------------------------------------------------------------------------- +// Storing Flags this way eliminates Jumps when updating sticky flags. +// +// When a Status Flag will be read, gprST is attached with +// the current status flag result in mVUupdateFlags. And the complete +// Status flag instance is stored in memory (mVU->statusFlag[instance]) // Function Params #define mP microVU* mVU, int recPass @@ -172,15 +182,13 @@ declareAllVariables #define mVUregsTemp mVUallocInfo.regsTemp #define iPC mVUallocInfo.curPC #define mVUsFlagHack mVUallocInfo.sFlagHack - -#define mVUinfo mVUallocInfo.info[iPC / 2] -#define mVUstall mVUinfo.stall -#define mVUup mVUinfo.uOp -#define mVUlow mVUinfo.lOp -#define sFLAG mVUinfo.sFlag -#define mFLAG mVUinfo.mFlag -#define cFLAG mVUinfo.cFlag - +#define mVUinfo mVUallocInfo.info[iPC/2] // IR info for current 64bit instruction +#define mVUstall mVUinfo.stall // Stall info for current instruction +#define mVUup mVUinfo.uOp // Upper Instruction Info +#define mVUlow mVUinfo.lOp // Lower Instruction Info +#define sFLAG mVUinfo.sFlag // Status Flag info for cur instruction +#define mFLAG mVUinfo.mFlag // Mac Flag info for cur instruction +#define cFLAG mVUinfo.cFlag // Clip Flag info for cur instruction #define mVUstartPC mVUallocInfo.startPC #define mVUflagInfo mVUregs.needExactMatch #define mVUflagHack (mVUcurProg.sFlagHack) @@ -268,6 +276,5 @@ declareAllVariables MOV32ItoR(gprT2, xPC); \ if (isEndPC) { CALLFunc((uptr)mVUprintPC2); } \ else { CALLFunc((uptr)mVUprintPC1); } \ - MOV32ItoR(gprR, Roffset); \ } \ } diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index f6d2c58b00..f7b0c1f3bc 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -292,7 +292,6 @@ microVUt(void) mVUrestoreRegs(mV) { SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]); SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); - MOV32ItoR(gprR, Roffset); // Restore gprR } // Reads entire microProgram and finds out if Status Flag is Used diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index c2bb91e294..578df21d49 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -28,21 +28,16 @@ // Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool modXYZW) { - int sReg, mReg = gprT1; + int sReg = gprT3, mReg = gprT1; static u8 *pjmp, *pjmp2; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); - if (mVUsFlagHack) { sFLAG.doFlag = 0; } - if (!sFLAG.doFlag && !mFLAG.doFlag) { return; } - if (!mFLAG.doFlag || (_XYZW_SS && modXYZW)) { regT1 = reg; } + if (mVUsFlagHack) { sFLAG.doSticky = 0; sFLAG.doFlag = 0; } + if (!mVUup.doFlags || (!sFLAG.doSticky && !sFLAG.doFlag && !mFLAG.doFlag)) { return; } + if (!mFLAG.doFlag || (_XYZW_SS && modXYZW)) { regT1 = reg; } else { SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); } // Flip wzyx to xyzw - if (sFLAG.doFlag) { - getFlagReg(sReg, sFLAG.write); // Set sReg to valid GPR by Cur Flag Instance - mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag - AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags - } - + if (sFLAG.doFlag) { XOR32RtoR(sReg, sReg); } //-------------------------Check for Signed flags------------------------------ // The following code makes sure the Signed Bit isn't set with Negative Zero @@ -55,24 +50,41 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are - if (mFLAG.doFlag) SHL32ItoR(mReg, 4 + ADD_XYZW); - if (sFLAG.doFlag) OR32ItoR(sReg, 0x82); // SS, S flags - if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking + if (mFLAG.doFlag || sFLAG.doSticky) SHL32ItoR(mReg, 4 + ADD_XYZW); + if (sFLAG.doFlag) OR32ItoR (sReg, 0x82); // SS, S flags + if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking if (sFLAG.doFlag) x86SetJ8(pjmp); //-------------------------Check for Zero flags------------------------------ AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are - if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); } - if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags + if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); } + if (sFLAG.doSticky && !mFLAG.doFlag) { OR32RtoR(mReg, gprT2); } + if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags if (sFLAG.doFlag) x86SetJ8(pjmp); //-------------------------Write back flags------------------------------ if (sFLAG.doFlag && _XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here - - if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag + + if (sFLAG.doSticky) OR32RtoR(gprST, mReg); // Set Sticky Register (gprST) + if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag + if (sFLAG.doFlag) { // Attach Sticky Register With sReg + TEST32ItoR(gprST, 0x0f); + pjmp = JZ8(0); // Set Z bit? + OR32ItoR(sReg, 0x40); + x86SetJ8(pjmp); + TEST32ItoR(gprST, 0xf0); + pjmp = JZ8(0); // Set S bit? + OR32ItoR(sReg, 0x80); + x86SetJ8(pjmp); + MOV32RtoR(mReg, gprST); // Backup gprST + AND32ItoR(mReg, 0xc30000); // Get D/I Bits + SHR32ItoR(mReg, 12); // Shift D/I Bits to proper position + OR32RtoR (sReg, mReg); // Set D/I Bits + mVUallocSFLAGb(mVU, sReg, sFLAG.write); // Set Status Flag + } } //------------------------------------------------------------------ @@ -438,9 +450,9 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool } // FMAC27~29 - MAX/MINI FMAC Opcodes -#define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { sFLAG.doFlag = 0; } } -#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } } -#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } } +#define mVU_FMAC27(operation, OPname) { mVU_FMAC1 (operation, OPname); pass1 { mVUup.doFlags = 0; } } +#define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { mVUup.doFlags = 0; } } +#define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { mVUup.doFlags = 0; } } //------------------------------------------------------------------ // Micro VU Micromode Upper instructions