diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 46ea6933b9..4cbb1dda67 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -27,8 +27,9 @@ // Micro VU - recPass 1 Functions //------------------------------------------------------------------ -#define makeFdFd (makeFd == 0) -#define makeFdFs (makeFd == 1) +//------------------------------------------------------------------ +// FMAC1 - Normal FMAC Opcodes +//------------------------------------------------------------------ #define getReg(reg, _reg_) { \ mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _X_Y_Z_W); \ @@ -45,10 +46,6 @@ else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ } -//------------------------------------------------------------------ -// FMAC1 - Normal FMAC Opcodes -//------------------------------------------------------------------ - microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) { microVU* mVU = mVUx; Fs = xmmFs; @@ -171,12 +168,12 @@ microVUt(void) mVUallocFMAC3b(int& Fd) { //------------------------------------------------------------------ #define getReg4(reg, _reg_) { \ - mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (_XYZW_SS) ? 15 : _X_Y_Z_W); \ - if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (_XYZW_SS) ? 15 : _X_Y_Z_W); \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _xyzw_ACC); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, _xyzw_ACC); \ } #define getZero4(reg) { \ - if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], (_XYZW_SS) ? 15 : _X_Y_Z_W); } \ + if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], _xyzw_ACC); } \ else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ } @@ -214,7 +211,7 @@ microVUt(void) mVUallocFMAC4a(int& ACC, int& Fs, int& Ft) { microVUt(void) mVUallocFMAC4b(int& ACC, int& Fs) { microVU* mVU = mVUx; - if (CHECK_VU_OVERFLOW) mVUclamp1(Fs, xmmT1, (_XYZW_SS && !_X) ? 15 : _X_Y_Z_W); + if (CHECK_VU_OVERFLOW) mVUclamp1(Fs, xmmT1, _xyzw_ACC); mVUmergeRegs(ACC, Fs, _X_Y_Z_W); } @@ -308,10 +305,314 @@ microVUt(void) mVUallocFMAC7b(int& ACC, int& Fs) { } //------------------------------------------------------------------ -// FMAC17 - OPMULA FMAC Opcode +// FMAC8 - MADD FMAC Opcode Storing Result to Fd //------------------------------------------------------------------ -microVUt(void) mVUallocFMAC17a(int& ACC, int& Fs, int& Ft) { +microVUt(void) mVUallocFMAC8a(int& Fd, int&ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmFs; + ACC = xmmACC0 + readACC; + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZeroSS(Ft); } + else { getReg(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } + } + } +} + +microVUt(void) mVUallocFMAC8b(int& Fd) { + microVU* mVU = mVUx; + if (!_Fd_) return; + if (CHECK_VU_OVERFLOW) mVUclamp1(Fd, xmmT1, _xyzw_ACC); + mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W); +} + +//------------------------------------------------------------------ +// FMAC9 - MSUB FMAC Opcode Storing Result to Fd +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC9a(int& Fd, int&ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmT1; + ACC = xmmT1; + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZeroSS(Ft); } + else { getReg(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } + } + } +} + +microVUt(void) mVUallocFMAC9b(int& Fd) { + microVU* mVU = mVUx; + if (!_Fd_) return; + if (CHECK_VU_OVERFLOW) mVUclamp1(Fd, xmmFt, _xyzw_ACC); + mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W); +} + +//------------------------------------------------------------------ +// FMAC10 - MADD FMAC BC(xyzw) Opcode Storing Result to Fd +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC10a(int& Fd, int& ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmFs; + ACC = xmmACC0 + readACC; + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if ( (_Ft_ == _Fs_) && _bc_x) { + Ft = Fs; + } + else { + if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (!_Ft_) { getZero3(Ft); } + else { getReg3(Ft, _Ft_); } + } +} + +microVUt(void) mVUallocFMAC10b(int& Fd) { + mVUallocFMAC8b(Fd); +} + +//------------------------------------------------------------------ +// FMAC11 - MSUB FMAC BC(xyzw) Opcode Storing Result to Fd +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC11a(int& Fd, int& ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmT1; + ACC = xmmT1; + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if ( (_Ft_ == _Fs_) && _bc_x) { + Ft = Fs; + } + else { + if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (!_Ft_) { getZero3(Ft); } + else { getReg3(Ft, _Ft_); } + } +} + +microVUt(void) mVUallocFMAC11b(int& Fd) { + mVUallocFMAC9b(Fd); +} + +//------------------------------------------------------------------ +// FMAC12 - MADD FMAC Opcode Storing Result to Fd (I Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC12a(int& Fd, int&ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmFs; + ACC = xmmACC0 + readACC; + getIreg(Ft); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC12b(int& Fd) { + mVUallocFMAC8b(Fd); +} + +//------------------------------------------------------------------ +// FMAC13 - MSUB FMAC Opcode Storing Result to Fd (I Reg) +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC13a(int& Fd, int&ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmT1; + ACC = xmmT1; + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); + getIreg(Ft); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC13b(int& Fd) { + mVUallocFMAC9b(Fd); +} + +//------------------------------------------------------------------ +// FMAC14 - MADDA FMAC Opcode +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC14a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + microVU* mVU = mVUx; + getACC(ACCw); + Fs = (_X_Y_Z_W == 15) ? ACCw : xmmFs; + Ft = xmmFt; + ACCr = xmmACC0 + readACC; + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZeroSS(Ft); } + else { getReg(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } + } + } +} + +microVUt(void) mVUallocFMAC14b(int& ACCw, int& Fs) { + microVU* mVU = mVUx; + if (CHECK_VU_OVERFLOW) mVUclamp1(Fs, xmmT1, _xyzw_ACC); + mVUmergeRegs(ACCw, Fs, _X_Y_Z_W); +} + +//------------------------------------------------------------------ +// FMAC15 - MSUBA FMAC Opcode +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC15a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + mVUallocFMAC14a(ACCw, ACCr, Fs, Ft); + SSE_MOVAPS_XMM_to_XMM(xmmT1, ACCr); + ACCr = xmmT1; +} + +microVUt(void) mVUallocFMAC15b(int& ACCw, int& ACCr) { + microVU* mVU = mVUx; + if (CHECK_VU_OVERFLOW) mVUclamp1(ACCr, xmmFt, _xyzw_ACC); + mVUmergeRegs(ACCw, ACCr, _X_Y_Z_W); +} + +//------------------------------------------------------------------ +// FMAC16 - MADDA BC(xyzw) FMAC Opcode +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC16a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + microVU* mVU = mVUx; + getACC(ACCw); + Fs = (_X_Y_Z_W == 15) ? ACCw : xmmFs; + Ft = xmmFt; + ACCr = xmmACC0 + readACC; + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if ( (_Ft_ == _Fs_) && _bc_x) { + Ft = Fs; + } + else { + if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (!_Ft_) { getZero3(Ft); } + else { getReg3(Ft, _Ft_); } + } +} + +microVUt(void) mVUallocFMAC16b(int& ACCw, int& Fs) { + mVUallocFMAC14b(ACCw, Fs); +} + +//------------------------------------------------------------------ +// FMAC17 - MSUBA BC(xyzw) FMAC Opcode +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC17a(int& ACCw, int&ACCr, int& Fs, int& Ft) { + mVUallocFMAC16a(ACCw, ACCr, Fs, Ft); + SSE_MOVAPS_XMM_to_XMM(xmmT1, ACCr); + ACCr = xmmT1; +} + +microVUt(void) mVUallocFMAC17b(int& ACCw, int& ACCr) { + mVUallocFMAC15b(ACCw, ACCr); +} + +//------------------------------------------------------------------ +// FMAC18 - OPMULA FMAC Opcode +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC18a(int& ACC, int& Fs, int& Ft) { microVU* mVU = mVUx; Fs = xmmFs; Ft = xmmFt; @@ -323,14 +624,38 @@ microVUt(void) mVUallocFMAC17a(int& ACC, int& Fs, int& Ft) { if (!_Ft_) { getZero4(Ft); } else { getReg4(Ft, _Ft_); } - SSE_SHUFPS_XMM_to_XMM( Fs, Fs, 0xC9 ); // WXZY - SSE_SHUFPS_XMM_to_XMM( Ft, Ft, 0xD2 ); // WYXZ + SSE_SHUFPS_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY + SSE_SHUFPS_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ } -microVUt(void) mVUallocFMAC17b(int& ACC, int& Fs) { +microVUt(void) mVUallocFMAC18b(int& ACC, int& Fs) { + mVUallocFMAC4b(ACC, Fs); +} + +//------------------------------------------------------------------ +// FMAC19 - OPMULA FMAC Opcode +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC19a(int& Fd, int&ACC, int& Fs, int& Ft) { microVU* mVU = mVUx; - if (CHECK_VU_OVERFLOW) mVUclamp1(Fs, xmmT1, _X_Y_Z_W); - mVUmergeRegs(ACC, Fs, _X_Y_Z_W); + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmT1; + ACC = xmmT1; + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC0 + readACC); + + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } + + SSE_SHUFPS_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY + SSE_SHUFPS_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ +} + +microVUt(void) mVUallocFMAC19b(int& Fd) { + mVUallocFMAC9b(Fd); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 949f345648..2fe67e06a2 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -47,6 +47,7 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define _XYZW_SS (_X+_Y+_Z+_W==1) #define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF )) +#define _xyzw_ACC ((_XYZW_SS && !_X) ? 15 : _X_Y_Z_W) #define _bc_ (mVU->code & 0x03) #define _bc_x ((mVU->code & 0x03) == 0) @@ -99,8 +100,7 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0)) #define writeACC ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1) #define prevACC (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1) - 1) & 0x3) -//#define setACCreg ((mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1)) >> 1) -//#define setACCmem (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) +#define readACC ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<3)) >> 3) //#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7)) #define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8)) #define doMac (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<8)) @@ -109,7 +109,7 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define fsInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) >> 12) #define fpmInstance (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) >> 10) - 1) & 0x3) #define fpsInstance (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) >> 12) - 1) & 0x3) -//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) -//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3)) +//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13)) +//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14)) #include "microVU_Misc.inl" diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 98b626b02c..9240108da5 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -22,9 +22,10 @@ // mVUupdateFlags() - Updates status/mac flags //------------------------------------------------------------------ -#define AND_XYZW (_XYZW_SS ? (1) : (doMac ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W]))) +#define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (doMac ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W]))) -microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { +// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations +microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modXYZW) { microVU* mVU = mVUx; int sReg, mReg = gprT1; static u8 *pjmp, *pjmp2; @@ -86,7 +87,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { mVUallocFMAC1a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ mVUallocFMAC1b(Fd); \ } \ } @@ -100,7 +101,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { mVUallocFMAC3a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ mVUallocFMAC3b(Fd); \ } \ } @@ -114,7 +115,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { mVUallocFMAC4a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W); \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ mVUallocFMAC4b(ACC, Fs); \ } \ } @@ -128,7 +129,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { mVUallocFMAC5a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W); \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ mVUallocFMAC5b(ACC, Fs); \ } \ } @@ -142,7 +143,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { mVUallocFMAC6a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ mVUallocFMAC6b(Fd); \ } \ } @@ -156,21 +157,235 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { mVUallocFMAC7a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W); \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ mVUallocFMAC7b(ACC, Fs); \ } \ } +#define mVU_FMAC8(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC8a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACC); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ + } \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC8b(Fd); \ + } \ +} + +#define mVU_FMAC9(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC9a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACC, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ + } \ + mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC9b(Fd); \ + } \ +} + +#define mVU_FMAC10(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC10a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACC); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ + } \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC10b(Fd); \ + } \ +} + +#define mVU_FMAC11(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC11a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACC, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ + } \ + mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC11b(Fd); \ + } \ +} + +#define mVU_FMAC12(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC12a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACC); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ + } \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC12b(Fd); \ + } \ +} + +#define mVU_FMAC13(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC13a(Fd, ACC, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACC, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ + } \ + mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC13b(Fd); \ + } \ +} + +#define mVU_FMAC14(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC14a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACCr); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACCr); \ + } \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC14b(ACCw, Fs); \ + } \ +} + +#define mVU_FMAC15(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC15a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACCr, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ + } \ + mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC15b(ACCw, ACCr); \ + } \ +} + +#define mVU_FMAC16(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC16a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(Fs, ACCr); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(Fs, ACCr); \ + } \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC16b(ACCw, Fs); \ + } \ +} + #define mVU_FMAC17(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACCw, ACCr, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC17a(ACCw, ACCr, Fs, Ft); \ + if (_XYZW_SS && _X) { \ + SSE_MULSS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##SS_XMM_to_XMM(ACCr, Fs); \ + } \ + else { \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ + } \ + mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC17b(ACCw, ACCr); \ + } \ +} + +#define mVU_FMAC18(operation) { \ microVU* mVU = mVUx; \ if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ if (isNOP) return; \ - mVUallocFMAC7a(ACC, Fs, Ft); \ + mVUallocFMAC18a(ACC, Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W); \ - mVUallocFMAC7b(ACC, Fs); \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC18b(ACC, Fs); \ + } \ +} + +#define mVU_FMAC19(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC19a(Fd, ACC, Fs, Ft); \ + SSE_MULPS_XMM_to_XMM(Fs, Ft); \ + SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ + mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUallocFMAC19b(Fd); \ } \ } @@ -231,34 +446,34 @@ microVUf(void) mVU_MULAx() { mVU_FMAC5(MUL); } microVUf(void) mVU_MULAy() { mVU_FMAC5(MUL); } microVUf(void) mVU_MULAz() { mVU_FMAC5(MUL); } microVUf(void) mVU_MULAw() { mVU_FMAC5(MUL); } -microVUf(void) mVU_MADD(){} -microVUf(void) mVU_MADDi(){} +microVUf(void) mVU_MADD() { mVU_FMAC8(ADD); } +microVUf(void) mVU_MADDi() { mVU_FMAC12(ADD); } microVUf(void) mVU_MADDq(){} -microVUf(void) mVU_MADDx(){} -microVUf(void) mVU_MADDy(){} -microVUf(void) mVU_MADDz(){} -microVUf(void) mVU_MADDw(){} -microVUf(void) mVU_MADDA(){} +microVUf(void) mVU_MADDx() { mVU_FMAC10(ADD); } +microVUf(void) mVU_MADDy() { mVU_FMAC10(ADD); } +microVUf(void) mVU_MADDz() { mVU_FMAC10(ADD); } +microVUf(void) mVU_MADDw() { mVU_FMAC10(ADD); } +microVUf(void) mVU_MADDA() { mVU_FMAC14(ADD); } microVUf(void) mVU_MADDAi(){} microVUf(void) mVU_MADDAq(){} -microVUf(void) mVU_MADDAx(){} -microVUf(void) mVU_MADDAy(){} -microVUf(void) mVU_MADDAz(){} -microVUf(void) mVU_MADDAw(){} -microVUf(void) mVU_MSUB(){} -microVUf(void) mVU_MSUBi(){} +microVUf(void) mVU_MADDAx() { mVU_FMAC16(ADD); } +microVUf(void) mVU_MADDAy() { mVU_FMAC16(ADD); } +microVUf(void) mVU_MADDAz() { mVU_FMAC16(ADD); } +microVUf(void) mVU_MADDAw() { mVU_FMAC16(ADD); } +microVUf(void) mVU_MSUB() { mVU_FMAC9(SUB); } +microVUf(void) mVU_MSUBi() { mVU_FMAC13(SUB); } microVUf(void) mVU_MSUBq(){} -microVUf(void) mVU_MSUBx(){} -microVUf(void) mVU_MSUBy(){} -microVUf(void) mVU_MSUBz(){} -microVUf(void) mVU_MSUBw(){} -microVUf(void) mVU_MSUBA(){} +microVUf(void) mVU_MSUBx() { mVU_FMAC11(SUB); } +microVUf(void) mVU_MSUBy() { mVU_FMAC11(SUB); } +microVUf(void) mVU_MSUBz() { mVU_FMAC11(SUB); } +microVUf(void) mVU_MSUBw() { mVU_FMAC11(SUB); } +microVUf(void) mVU_MSUBA() { mVU_FMAC14(SUB); } microVUf(void) mVU_MSUBAi(){} microVUf(void) mVU_MSUBAq(){} -microVUf(void) mVU_MSUBAx(){} -microVUf(void) mVU_MSUBAy(){} -microVUf(void) mVU_MSUBAz(){} -microVUf(void) mVU_MSUBAw(){} +microVUf(void) mVU_MSUBAx() { mVU_FMAC17(SUB); } +microVUf(void) mVU_MSUBAy() { mVU_FMAC17(SUB); } +microVUf(void) mVU_MSUBAz() { mVU_FMAC17(SUB); } +microVUf(void) mVU_MSUBAw() { mVU_FMAC17(SUB); } microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); } microVUf(void) mVU_MAXi() { mVU_FMAC6(MAX); } microVUf(void) mVU_MAXx() { mVU_FMAC3(MAX); } @@ -271,8 +486,8 @@ microVUf(void) mVU_MINIx() { mVU_FMAC3(MIN); } microVUf(void) mVU_MINIy() { mVU_FMAC3(MIN); } microVUf(void) mVU_MINIz() { mVU_FMAC3(MIN); } microVUf(void) mVU_MINIw() { mVU_FMAC3(MIN); } -microVUf(void) mVU_OPMULA() { mVU_FMAC17(MUL); } -microVUf(void) mVU_OPMSUB(){} +microVUf(void) mVU_OPMULA() { mVU_FMAC18(MUL); } +microVUf(void) mVU_OPMSUB() { mVU_FMAC19(SUB); } microVUf(void) mVU_NOP(){} microVUq(void) mVU_FTOIx(uptr addr) { microVU* mVU = mVUx;