diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 9d717f2e47..3bd90ca4eb 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -42,8 +42,8 @@ struct microAllocInfo { // bit 2 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) // bit 3 = Used with bit 4 to make a 2-bit key for ACC read instance // bit 4 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) - // bit 5 = Read Q1/P1 or backup? - // bit 6 = Write to Q2/P2? + // bit 5 = Write to Q1 or Q2? + // bit 6 = Read Q1 or Q2? // bit 7 = Write VI(Fd) Result to backup memory? // bit 8 = Update Mac Flags? // bit 9 = Update Status Flags? diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 4cbb1dda67..241eb6ed3f 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -658,6 +658,189 @@ microVUt(void) mVUallocFMAC19b(int& Fd) { mVUallocFMAC9b(Fd); } +//------------------------------------------------------------------ +// FMAC20 - MADDA FMAC Opcode (I Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC20a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + microVU* mVU = mVUx; + getACC(ACCw); + Fs = (_X_Y_Z_W == 15) ? ACCw : xmmFs; + Ft = xmmFt; + ACCr = xmmACC0 + readACC; + getIreg(Ft); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC20b(int& ACCw, int& Fs) { + mVUallocFMAC14b(ACCw, Fs); +} + +//------------------------------------------------------------------ +// FMAC21 - MSUBA FMAC Opcode (I Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC21a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + mVUallocFMAC20a(ACCw, ACCr, Fs, Ft); + SSE_MOVAPS_XMM_to_XMM(xmmT1, ACCr); + ACCr = xmmT1; +} + +microVUt(void) mVUallocFMAC21b(int& ACCw, int& ACCr) { + mVUallocFMAC15b(ACCw, ACCr); +} + +//------------------------------------------------------------------ +// FMAC22 - Normal FMAC Opcodes (Q Reg) +//------------------------------------------------------------------ + +#define getQreg(reg) { \ + mVUunpack_xyzw(reg, xmmPQ, writeQ); \ + /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ +} + +microVUt(void) mVUallocFMAC22a(int& Fd, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmFs; + getQreg(Ft); + if (_XYZW_SS) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero(Fs); } + else { getReg(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC22b(int& Fd) { + mVUallocFMAC1b(Fd); +} + +//------------------------------------------------------------------ +// FMAC23 - FMAC Opcodes Storing Result to ACC (Q Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC23a(int& ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + getACC(ACC); + getQreg(Ft); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC23b(int& ACC, int& Fs) { + mVUallocFMAC4b(ACC, Fs); +} + + +//------------------------------------------------------------------ +// FMAC24 - MADD FMAC Opcode Storing Result to Fd (Q Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC24a(int& Fd, int&ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmFs; + ACC = xmmACC0 + readACC; + getQreg(Ft); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC24b(int& Fd) { + mVUallocFMAC8b(Fd); +} + +//------------------------------------------------------------------ +// FMAC25 - MSUB FMAC Opcode Storing Result to Fd (Q Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC25a(int& Fd, int&ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmT1; + ACC = xmmT1; + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); + getQreg(Ft); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC25b(int& Fd) { + mVUallocFMAC9b(Fd); +} + +//------------------------------------------------------------------ +// FMAC26 - MADDA FMAC Opcode (Q Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC26a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + microVU* mVU = mVUx; + getACC(ACCw); + Fs = (_X_Y_Z_W == 15) ? ACCw : xmmFs; + Ft = xmmFt; + ACCr = xmmACC0 + readACC; + getQreg(Ft); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC26b(int& ACCw, int& Fs) { + mVUallocFMAC14b(ACCw, Fs); +} + +//------------------------------------------------------------------ +// FMAC27 - MSUBA FMAC Opcode (Q Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC27a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + mVUallocFMAC26a(ACCw, ACCr, Fs, Ft); + SSE_MOVAPS_XMM_to_XMM(xmmT1, ACCr); + ACCr = xmmT1; +} + +microVUt(void) mVUallocFMAC27b(int& ACCw, int& ACCr) { + mVUallocFMAC15b(ACCw, ACCr); +} + //------------------------------------------------------------------ // Flag Allocators //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 2fe67e06a2..86287c9f4c 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -101,6 +101,8 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define writeACC ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1) #define prevACC (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1) - 1) & 0x3) #define readACC ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<3)) >> 3) +#define writeQ ((mVUallocInfo.info[mVUallocInfo.curPC] & (1<<5)) >> 5) +#define readQ ((mVUallocInfo.info[mVUallocInfo.curPC] & (1<<6)) >> 6) //#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7)) #define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8)) #define doMac (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<8)) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 207c78536c..3b1fe47866 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -79,8 +79,8 @@ microVUx(void) mVUloadReg(int reg, u32 offset, int xyzw) { case 4: SSE_MOVSS_M32_to_XMM(reg, offset+4); break; // Y case 2: SSE_MOVSS_M32_to_XMM(reg, offset+8); break; // Z case 1: SSE_MOVSS_M32_to_XMM(reg, offset+12); break; // W - case 3: SSE_MOVHPS_M64_to_XMM(reg, offset+8); break; // ZW (not sure if this is faster than default) - case 12: SSE_MOVLPS_M64_to_XMM(reg, offset); break; // XY (not sure if this is faster than default) + //case 3: SSE_MOVHPS_M64_to_XMM(reg, offset+8); break; // ZW (not sure if this is faster than default) + //case 12: SSE_MOVLPS_M64_to_XMM(reg, offset); break; // XY (not sure if this is faster than default) default: SSE_MOVAPS_M128_to_XMM(reg, offset); break; } } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 9240108da5..9b5e3a5090 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -389,6 +389,154 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX } \ } +#define mVU_FMAC20(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC20a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACCr); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACCr); \ + } \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC20b(ACCw, Fs); \ + } \ +} + +#define mVU_FMAC21(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC21a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACCr, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ + } \ + mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC21b(ACCw, ACCr); \ + } \ +} + +#define mVU_FMAC22(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC22a(Fd, Fs, Ft); \ + if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ + mVUallocFMAC22b(Fd); \ + } \ +} + +#define mVU_FMAC23(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC23a(ACC, Fs, Ft); \ + if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC23b(ACC, Fs); \ + } \ +} + +#define mVU_FMAC24(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC24a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACC); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ + } \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC24b(Fd); \ + } \ +} + +#define mVU_FMAC25(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC25a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACC, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ + } \ + mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC25b(Fd); \ + } \ +} + +#define mVU_FMAC26(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC26a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACCr); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACCr); \ + } \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC26b(ACCw, Fs); \ + } \ +} + +#define mVU_FMAC27(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC27a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACCr, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ + } \ + mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC27b(ACCw, ACCr); \ + } \ +} + //------------------------------------------------------------------ // Micro VU Micromode Upper instructions //------------------------------------------------------------------ @@ -406,70 +554,70 @@ microVUf(void) mVU_ABS() { } microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); } microVUf(void) mVU_ADDi() { mVU_FMAC6(ADD); } -microVUf(void) mVU_ADDq(){} +microVUf(void) mVU_ADDq() { mVU_FMAC22(ADD); } microVUf(void) mVU_ADDx() { mVU_FMAC3(ADD); } microVUf(void) mVU_ADDy() { mVU_FMAC3(ADD); } microVUf(void) mVU_ADDz() { mVU_FMAC3(ADD); } microVUf(void) mVU_ADDw() { mVU_FMAC3(ADD); } microVUf(void) mVU_ADDA() { mVU_FMAC4(ADD); } microVUf(void) mVU_ADDAi() { mVU_FMAC7(ADD); } -microVUf(void) mVU_ADDAq(){} +microVUf(void) mVU_ADDAq() { mVU_FMAC23(ADD); } microVUf(void) mVU_ADDAx() { mVU_FMAC5(ADD); } microVUf(void) mVU_ADDAy() { mVU_FMAC5(ADD); } microVUf(void) mVU_ADDAz() { mVU_FMAC5(ADD); } microVUf(void) mVU_ADDAw() { mVU_FMAC5(ADD); } microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); } microVUf(void) mVU_SUBi() { mVU_FMAC6(SUB); } -microVUf(void) mVU_SUBq(){} +microVUf(void) mVU_SUBq() { mVU_FMAC22(SUB); } microVUf(void) mVU_SUBx() { mVU_FMAC3(SUB); } microVUf(void) mVU_SUBy() { mVU_FMAC3(SUB); } microVUf(void) mVU_SUBz() { mVU_FMAC3(SUB); } microVUf(void) mVU_SUBw() { mVU_FMAC3(SUB); } microVUf(void) mVU_SUBA() { mVU_FMAC4(SUB); } microVUf(void) mVU_SUBAi() { mVU_FMAC7(SUB); } -microVUf(void) mVU_SUBAq(){} +microVUf(void) mVU_SUBAq() { mVU_FMAC23(SUB); } microVUf(void) mVU_SUBAx() { mVU_FMAC5(SUB); } microVUf(void) mVU_SUBAy() { mVU_FMAC5(SUB); } microVUf(void) mVU_SUBAz() { mVU_FMAC5(SUB); } microVUf(void) mVU_SUBAw() { mVU_FMAC5(SUB); } microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); } microVUf(void) mVU_MULi() { mVU_FMAC6(MUL); } -microVUf(void) mVU_MULq(){} +microVUf(void) mVU_MULq() { mVU_FMAC22(MUL); } microVUf(void) mVU_MULx() { mVU_FMAC3(MUL); } microVUf(void) mVU_MULy() { mVU_FMAC3(MUL); } microVUf(void) mVU_MULz() { mVU_FMAC3(MUL); } microVUf(void) mVU_MULw() { mVU_FMAC3(MUL); } microVUf(void) mVU_MULA() { mVU_FMAC4(MUL); } microVUf(void) mVU_MULAi() { mVU_FMAC7(MUL); } -microVUf(void) mVU_MULAq(){} +microVUf(void) mVU_MULAq() { mVU_FMAC23(MUL); } microVUf(void) mVU_MULAx() { mVU_FMAC5(MUL); } microVUf(void) mVU_MULAy() { mVU_FMAC5(MUL); } microVUf(void) mVU_MULAz() { mVU_FMAC5(MUL); } microVUf(void) mVU_MULAw() { mVU_FMAC5(MUL); } microVUf(void) mVU_MADD() { mVU_FMAC8(ADD); } microVUf(void) mVU_MADDi() { mVU_FMAC12(ADD); } -microVUf(void) mVU_MADDq(){} +microVUf(void) mVU_MADDq() { mVU_FMAC24(ADD); } microVUf(void) mVU_MADDx() { mVU_FMAC10(ADD); } microVUf(void) mVU_MADDy() { mVU_FMAC10(ADD); } microVUf(void) mVU_MADDz() { mVU_FMAC10(ADD); } microVUf(void) mVU_MADDw() { mVU_FMAC10(ADD); } microVUf(void) mVU_MADDA() { mVU_FMAC14(ADD); } -microVUf(void) mVU_MADDAi(){} -microVUf(void) mVU_MADDAq(){} +microVUf(void) mVU_MADDAi() { mVU_FMAC20(ADD); } +microVUf(void) mVU_MADDAq() { mVU_FMAC26(ADD); } microVUf(void) mVU_MADDAx() { mVU_FMAC16(ADD); } microVUf(void) mVU_MADDAy() { mVU_FMAC16(ADD); } microVUf(void) mVU_MADDAz() { mVU_FMAC16(ADD); } microVUf(void) mVU_MADDAw() { mVU_FMAC16(ADD); } microVUf(void) mVU_MSUB() { mVU_FMAC9(SUB); } microVUf(void) mVU_MSUBi() { mVU_FMAC13(SUB); } -microVUf(void) mVU_MSUBq(){} +microVUf(void) mVU_MSUBq() { mVU_FMAC25(SUB); } microVUf(void) mVU_MSUBx() { mVU_FMAC11(SUB); } microVUf(void) mVU_MSUBy() { mVU_FMAC11(SUB); } microVUf(void) mVU_MSUBz() { mVU_FMAC11(SUB); } microVUf(void) mVU_MSUBw() { mVU_FMAC11(SUB); } microVUf(void) mVU_MSUBA() { mVU_FMAC14(SUB); } -microVUf(void) mVU_MSUBAi(){} -microVUf(void) mVU_MSUBAq(){} +microVUf(void) mVU_MSUBAi() { mVU_FMAC21(SUB); } +microVUf(void) mVU_MSUBAq() { mVU_FMAC27(SUB); } microVUf(void) mVU_MSUBAx() { mVU_FMAC17(SUB); } microVUf(void) mVU_MSUBAy() { mVU_FMAC17(SUB); } microVUf(void) mVU_MSUBAz() { mVU_FMAC17(SUB); } @@ -488,7 +636,11 @@ microVUf(void) mVU_MINIz() { mVU_FMAC3(MIN); } microVUf(void) mVU_MINIw() { mVU_FMAC3(MIN); } microVUf(void) mVU_OPMULA() { mVU_FMAC18(MUL); } microVUf(void) mVU_OPMSUB() { mVU_FMAC19(SUB); } -microVUf(void) mVU_NOP(){} +microVUf(void) mVU_NOP() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else {} +} microVUq(void) mVU_FTOIx(uptr addr) { microVU* mVU = mVUx; if (recPass == 0) {}