diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index cfd8ffac72..d14ba7a570 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -647,17 +647,44 @@ microVUt(void) mVUallocFMAC26b(mV, int& ACCw, int& ACCr) { } \ } +#define setBitSFLAG(bitTest, bitSet) { \ + TEST32ItoR(regT, bitTest); \ + pjmp = JZ8(0); \ + OR32ItoR(reg, bitSet); \ + x86SetJ8(pjmp); \ +} + +#define setBitFSEQ(bitX) { \ + TEST32ItoR(gprT1, bitX); \ + pjmp = JZ8(0); \ + OR32ItoR(gprT1, bitX); \ + x86SetJ8(pjmp); \ +} + microVUt(void) mVUallocSFLAGa(int reg, int fInstance) { getFlagReg(fInstance, fInstance); - MOVZX32R16toR(reg, fInstance); + MOV32RtoR(reg, fInstance); } microVUt(void) mVUallocSFLAGb(int reg, int fInstance) { getFlagReg(fInstance, fInstance); - //AND32ItoR(reg, 0xffff); MOV32RtoR(fInstance, reg); } +// Normalize Status Flag +microVUt(void) mVUallocSFLAGc(int reg, int regT, int fInstance) { + u8 *pjmp; + XOR32RtoR(reg, reg); + mVUallocSFLAGa(regT, fInstance); + setBitSFLAG(0x000f, 0x0001); // Z Bit + setBitSFLAG(0x00f0, 0x0002); // S Bit + setBitSFLAG(0x0f00, 0x0040); // ZS Bit + setBitSFLAG(0xf000, 0x0080); // SS Bit + AND32ItoR(regT, 0xffff0000); // DS/DI/OS/US/D/I/O/U Bits + SHR32ItoR(regT, 14); + OR32RtoR(reg, regT); +} + microVUt(void) mVUallocMFLAGa(mV, int reg, int fInstance) { MOVZX32M16toR(reg, (uptr)&mVU->macFlag[fInstance]); } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 689d8b82a4..5dd94ca18f 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -169,8 +169,8 @@ microVUt(void) mVUendProgram(mV, int qInst, int pInst, int fStatus, int fMac, in // Save Flag Instances if (!mVUflagHack) { - getFlagReg(fStatus, fStatus); - MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, fStatus); + mVUallocSFLAGc(gprT1, gprT2, fStatus); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1); } mVUallocMFLAGa(mVU, gprT1, fMac); mVUallocCFLAGa(mVU, gprT2, fClip); @@ -196,13 +196,13 @@ microVUt(void) mVUtestCycles(mV) { iPC = mVUstartPC; mVUdebugNOW(0); SUB32ItoM((uptr)&mVU->cycles, mVUcycles); - u8* jmp8 = JG8(0); + u32* jmp32 = JG32(0); MOV32ItoR(gprT2, xPC); if (!isVU1) CALLFunc((uptr)mVUwarning0); else CALLFunc((uptr)mVUwarning1); MOV32ItoR(gprR, Roffset); // Restore gprR mVUendProgram(mVU, 0, 0, sI, 0, cI); - x86SetJ8(jmp8); + x86SetJ32(jmp32); } //------------------------------------------------------------------ @@ -378,7 +378,7 @@ eBitTemination: if (mVUinfo.doDivFlag) { int flagReg; getFlagReg(flagReg, lStatus); - AND32ItoR (flagReg, 0x0fcf); + AND32ItoR (flagReg, 0xfff3ffff); OR32MtoR (flagReg, (uptr)&mVU->divFlag); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 0854b0bad3..e3fe31d202 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -42,10 +42,24 @@ microVUt(void) mVUdispatcherA(mV) { // Load Regs MOV32ItoR(gprR, Roffset); // Load VI Reg Offset MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL); - AND32ItoR(gprF0, 0xffff); + MOV32RtoR(gprF1, gprF0); + SHL32ItoR(gprF1, 3); + AND32ItoR(gprF1, 0x218); + MOV32RtoR(gprF2, gprF0); + SHL32ItoR(gprF2, 5); + AND32ItoR(gprF2, 0x1000); + OR32RtoR (gprF1, gprF2); + MOV32RtoR(gprF3, gprF0); + SHL32ItoR(gprF3, 14); + AND32ItoR(gprF3, 0x3cf0000); + OR32RtoR (gprF1, gprF3); + + MOV32RtoR(gprF0, gprF1); + MOV32RtoR(gprF2, gprF1); + MOV32RtoR(gprF3, gprF1); SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL); SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, 0); diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index fc63c3db9d..7ed2e26245 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -24,7 +24,7 @@ microVUt(void) mVUdivSet(mV) { if (mVUinfo.doDivFlag) { getFlagReg(flagReg1, sFLAG.write); if (!sFLAG.doFlag) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } - AND32ItoR(flagReg1, 0x0fcf); + AND32ItoR(flagReg1, 0xfff3ffff); OR32MtoR (flagReg1, (uptr)&mVU->divFlag); } } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index f7410f8d96..cef71c3874 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -37,7 +37,7 @@ SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \ TEST32ItoR(gprTemp, 1); /* Check sign bit */ \ aJump = JZ8(0); /* Skip if positive */ \ - MOV32ItoM((uptr)&mVU->divFlag, 0x410); /* Set Invalid Flags */ \ + MOV32ItoM((uptr)&mVU->divFlag, divI); /* Set Invalid Flags */ \ SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \ x86SetJ8(aJump); \ } @@ -54,10 +54,10 @@ mVUop(mVU_DIV) { testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero ajmp = JZ8(0); - MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0) + MOV32ItoM((uptr)&mVU->divFlag, divI); // Set invalid flag (0/0) bjmp = JMP8(0); x86SetJ8(ajmp); - MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide (only when not 0/0) + MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0) x86SetJ8(bjmp); SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt); @@ -112,10 +112,10 @@ mVUop(mVU_RSQRT) { testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero bjmp = JZ8(0); // Skip if none are - MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0) + MOV32ItoM((uptr)&mVU->divFlag, divI); // Set invalid flag (0/0) cjmp = JMP8(0); x86SetJ8(bjmp); - MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0) + MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide flag (only when not 0/0) x86SetJ8(cjmp); SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); @@ -522,7 +522,7 @@ mVUop(mVU_FMOR) { mVUop(mVU_FSAND) { pass1 { mVUanalyzeSflag(mVU, _It_); } pass2 { - mVUallocSFLAGa(gprT1, sFLAG.read); + mVUallocSFLAGc(gprT1, gprT2, sFLAG.read); AND16ItoR(gprT1, _Imm12_); mVUallocVIb(mVU, gprT1, _It_); } @@ -530,23 +530,10 @@ mVUop(mVU_FSAND) { pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; } } -mVUop(mVU_FSEQ) { - pass1 { mVUanalyzeSflag(mVU, _It_); } - pass2 { - mVUallocSFLAGa(gprT1, sFLAG.read); - XOR16ItoR(gprT1, _Imm12_); - SUB16ItoR(gprT1, 1); - SHR16ItoR(gprT1, 15); - mVUallocVIb(mVU, gprT1, _It_); - } - pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); } - pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; } -} - mVUop(mVU_FSOR) { pass1 { mVUanalyzeSflag(mVU, _It_); } pass2 { - mVUallocSFLAGa(gprT1, sFLAG.read); + mVUallocSFLAGc(gprT1, gprT2, sFLAG.read); OR16ItoR(gprT1, _Imm12_); mVUallocVIb(mVU, gprT1, _It_); } @@ -554,14 +541,54 @@ mVUop(mVU_FSOR) { pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; } } +mVUop(mVU_FSEQ) { + pass1 { mVUanalyzeSflag(mVU, _It_); } + pass2 { + u8 *pjmp; + int imm = 0; + if (_Imm12_ & 0x0001) imm |= 0x000000f; // Z + if (_Imm12_ & 0x0002) imm |= 0x00000f0; // S + if (_Imm12_ & 0x0004) imm |= 0x0010000; // U + if (_Imm12_ & 0x0008) imm |= 0x0020000; // O + if (_Imm12_ & 0x0010) imm |= 0x0040000; // I + if (_Imm12_ & 0x0020) imm |= 0x0080000; // D + if (_Imm12_ & 0x0040) imm |= 0x0000f00; // ZS + if (_Imm12_ & 0x0080) imm |= 0x000f000; // SS + if (_Imm12_ & 0x0100) imm |= 0x0400000; // US + if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS + if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS + if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS + + mVUallocSFLAGa(gprT1, sFLAG.read); + setBitFSEQ(0x000f); // Z bit + setBitFSEQ(0x00f0); // S bit + setBitFSEQ(0x0f00); // ZS bit + setBitFSEQ(0xf000); // SS bit + XOR32ItoR(gprT1, imm); + SUB32ItoR(gprT1, 1); + SHR32ItoR(gprT1, 31); + mVUallocVIb(mVU, gprT1, _It_); + } + pass3 { mVUlog("FSEQ vi%02d, $%x", _Ft_, _Imm12_); } + pass4 { mVUflagInfo |= 0xf << (/*mVUcount +*/ 0); mVUsFlagHack = 0; } +} + mVUop(mVU_FSSET) { pass1 { mVUanalyzeFSSET(mVU); } pass2 { - int flagReg1, flagReg2; - getFlagReg(flagReg1, sFLAG.write); - if (!(sFLAG.doFlag||mVUinfo.doDivFlag)) { getFlagReg(flagReg2, sFLAG.lastWrite); MOV32RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction - AND32ItoR(flagReg1, 0x03f); - OR32ItoR (flagReg1, (_Imm12_ & 0xfc0)); + int sReg, imm = 0; + if (_Imm12_ & 0x0040) imm |= 0x0000f00; // ZS + if (_Imm12_ & 0x0080) imm |= 0x000f000; // SS + if (_Imm12_ & 0x0100) imm |= 0x0400000; // US + if (_Imm12_ & 0x0200) imm |= 0x0800000; // OS + if (_Imm12_ & 0x0400) imm |= 0x1000000; // IS + if (_Imm12_ & 0x0800) imm |= 0x2000000; // DS + getFlagReg(sReg, sFLAG.write); + if (!(sFLAG.doFlag || mVUinfo.doDivFlag)) { + mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag + } + AND32ItoR(sReg, 0xf00ff); // Keep Non-Sticky Bits + if (imm) OR32ItoR(sReg, imm); } pass3 { mVUlog("FSSET $%x", _Imm12_); } pass4 { mVUsFlagHack = 0; } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index d36cfc7a8e..48d2ebcd59 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -105,6 +105,9 @@ declareAllVariables #define _Tbit_ (1<<27) #define _MDTbit_ 0 //( _Mbit_ | _Dbit_ | _Tbit_ ) // ToDo: Implement this stuff... +#define divI 0x1040000 +#define divD 0x2080000 + #define isVU1 (mVU == µVU1) #define getIndex (isVU1 ? 1 : 0) #define getVUmem(x) (((isVU1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index f6d2c58b00..138683c9e9 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -300,6 +300,7 @@ microVUt(void) mVUcheckSflag(mV, int progIndex) { if (CHECK_VU_FLAGHACK1) { int bFlagInfo = mVUflagInfo; int bCode = mVU->code; + int bFlagHack = mVUsFlagHack; mVUsFlagHack = 1; for (u32 i = 0; i < mVU->progSize; i+=2) { mVU->code = mVU->prog.prog[progIndex].data[i]; @@ -308,6 +309,7 @@ microVUt(void) mVUcheckSflag(mV, int progIndex) { mVUflagInfo = bFlagInfo; mVU->code = bCode; mVU->prog.prog[progIndex].sFlagHack = mVUsFlagHack; + mVUsFlagHack = bFlagHack; } } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index c2bb91e294..db0e92a51e 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -29,7 +29,6 @@ // Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool modXYZW) { int sReg, mReg = gprT1; - static u8 *pjmp, *pjmp2; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); @@ -40,7 +39,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool if (sFLAG.doFlag) { getFlagReg(sReg, sFLAG.write); // Set sReg to valid GPR by Cur Flag Instance mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag - AND32ItoR(sReg, 0xff0); // Keep Sticky and D/I flags + AND32ItoR(sReg, 0xffffff00); // Keep Sticky and D/I flags } //-------------------------Check for Signed flags------------------------------ @@ -54,25 +53,22 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation - if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are - if (mFLAG.doFlag) SHL32ItoR(mReg, 4 + ADD_XYZW); - if (sFLAG.doFlag) OR32ItoR(sReg, 0x82); // SS, S flags - if (sFLAG.doFlag && _XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking - if (sFLAG.doFlag) x86SetJ8(pjmp); + SHL32ItoR(mReg, 4 + ADD_XYZW); //-------------------------Check for Zero flags------------------------------ AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation - if (sFLAG.doFlag) pjmp = JZ8(0); // Skip if none are - if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); } - if (sFLAG.doFlag) { OR32ItoR(sReg, 0x41); } // ZS, Z flags - if (sFLAG.doFlag) x86SetJ8(pjmp); + if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); } + OR32RtoR(mReg, gprT2); //-------------------------Write back flags------------------------------ - if (sFLAG.doFlag && _XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here - if (mFLAG.doFlag) mVUallocMFLAGb(mVU, mReg, mFLAG.write); // Set Mac Flag + if (sFLAG.doFlag) { + OR32RtoR (sReg, mReg); + SHL32ItoR(mReg, 8); + OR32RtoR (sReg, mReg); + } } //------------------------------------------------------------------