From 93b85af339a5a400dc6132bdd31a2e882668ba8b Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 11 Mar 2009 23:49:39 +0000 Subject: [PATCH] optimized some stuff, and implemented all EFU opcodes. note: most of the microVU EFU opcodes are implemented with completely different algorithms than zerorecs. this might prove to be more accurate, but i mainly did it to avoid using x87 FPU instructions (since i'm using mmx regs for storage). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@749 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 11 ++ pcsx2/x86/microVU_Alloc.inl | 14 ++- pcsx2/x86/microVU_Lower.inl | 205 ++++++++++++++++++++++++++++++++---- pcsx2/x86/microVU_Misc.h | 15 +++ pcsx2/x86/microVU_Upper.inl | 29 ----- 5 files changed, 218 insertions(+), 56 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index eaeeadd90e..3171b7e2a4 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -43,6 +43,17 @@ PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xb PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652}; PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7}; PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb}; +//PCSX2_ALIGNED16(const u32 mVU_S1[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000}; +PCSX2_ALIGNED16(const u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4}; +PCSX2_ALIGNED16(const u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e}; +PCSX2_ALIGNED16(const u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f}; +PCSX2_ALIGNED16(const u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14}; +PCSX2_ALIGNED16(const u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8}; +PCSX2_ALIGNED16(const u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4}; +PCSX2_ALIGNED16(const u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff}; +PCSX2_ALIGNED16(const u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553}; +PCSX2_ALIGNED16(const u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510}; +PCSX2_ALIGNED16(const u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac}; PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0}; PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0}; PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0}; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index b2ff61451d..32b53fbb97 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -748,14 +748,14 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) { microVUt(void) mVUallocVIa(int GPRreg, int _reg_) { microVU* mVU = mVUx; if (_reg_ == 0) { XOR32RtoR(GPRreg, GPRreg); } - else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmxVI1 + (_reg_ - 1)); } + else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmVI(_reg_)); } else { MOVZX32M16toR(GPRreg, (uptr)&mVU->regs->VI[_reg_].UL); } } microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { microVU* mVU = mVUx; if (_reg_ == 0) { return; } - else if (_reg_ < 9) { MOVD32RtoMMX(mmxVI1 + (_reg_ - 1), GPRreg); } + else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); } else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); } } @@ -773,8 +773,14 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { //------------------------------------------------------------------ #define getReg5(reg, _reg_, _fxf_) { \ - mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \ - if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _fxf_))); \ + if (!_reg_) { \ + if (_fxf_ < 3) { SSE_XORPS_XMM_to_XMM(reg, reg); } \ + else { mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 3); } \ + } \ + else { \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _fxf_))); \ + } \ } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 5ff2a4b495..46a41e6d89 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -167,7 +167,6 @@ microVUf(void) mVU_EATAN() { getReg5(xmmFs, _Fs_, _Fsf_); SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance - // ToDo: Can Be Optimized Further? (takes approximately (~125 cycles + mem access time) on a c2d) SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one); SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); @@ -180,8 +179,8 @@ microVUf(void) mVU_EATANxy() { microVU* mVU = mVUx; if (recPass == 0) {} else { - getReg5(xmmFs, _Fs_, 1); - getReg5(xmmFt, _Fs_, 0); + getReg6(xmmFt, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01); SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); @@ -196,8 +195,8 @@ microVUf(void) mVU_EATANxz() { microVU* mVU = mVUx; if (recPass == 0) {} else { - getReg5(xmmFs, _Fs_, 2); - getReg5(xmmFt, _Fs_, 0); + getReg6(xmmFt, _Fs_); + SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02); SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); @@ -208,15 +207,162 @@ microVUf(void) mVU_EATANxz() { mVU_EATAN_(); } } -microVUf(void) mVU_EEXP() {} -microVUf(void) mVU_ELENG() {} +#define eexpHelper(addr) { \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ + SSE_MOVSS_XMM_to_XMM(xmmFt, xmmT1); \ + SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \ + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \ +} +microVUf(void) mVU_EEXP() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_E1); + SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); + + SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MULSS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFt); + SSE_MULSS_M32_to_XMM(xmmFt, (uptr)mVU_E2); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); + + eexpHelper(mVU_E3); + eexpHelper(mVU_E4); + eexpHelper(mVU_E5); + + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); + SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_E6); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1); + SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE_MOVSS_M32_to_XMM(xmmT1, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmT1, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUt(void) mVU_sumXYZ() { + // regd.x = x ^ 2 + y ^ 2 + z ^ 2 + if( cpucaps.hasStreamingSIMD4Extensions ) { + SSE4_DPPS_XMM_to_XMM(xmmFs, xmmFs, 0x71); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + } + else { + SSE_MULPS_XMM_to_XMM(xmmFs, xmmFs); // wzyx ^ 2 + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xe1); // wzyx -> wzxy + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xD2); // wzxy -> wxyz + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + z ^ 2 + } +} +microVUf(void) mVU_ELENG() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} microVUf(void) mVU_ERCPR() {} -microVUf(void) mVU_ERLENG() {} -microVUf(void) mVU_ERSADD() {} -microVUf(void) mVU_ERSQRT() {} -microVUf(void) mVU_ESADD() {} -microVUf(void) mVU_ESIN() {} -microVUf(void) mVU_ESQRT() {} +microVUf(void) mVU_ERLENG() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ); + SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ERSADD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + //SSE_RCPSS_XMM_to_XMM(xmmPQ, xmmPQ); // Lower Precision is bad? + SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ERSQRT() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ESADD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg6(xmmFs, _Fs_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + mVU_sumXYZ(); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +#define esinHelper(addr) { \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \ + SSE_MOVSS_XMM_to_XMM(xmmFs, xmmT1); \ + SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \ + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \ +} +microVUf(void) mVU_ESIN() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + //SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); // Multiplying by 1 is redundant? + SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt); + SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFs); + SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt); + SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs); + SSE_MULSS_M32_to_XMM(xmmFs, (uptr)mVU_S2); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); + + esinHelper(mVU_S3); + esinHelper(mVU_S4); + + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); + SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_S5); + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} +microVUf(void) mVU_ESQRT() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back + } +} microVUf(void) mVU_ESUM() { microVU* mVU = mVUx; if (recPass == 0) {} @@ -315,8 +461,11 @@ microVUf(void) mVU_IADD() { if (recPass == 0) {} else { mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - ADD16RtoR(gprT1, gprT2); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT2, _Ft_); + ADD16RtoR(gprT1, gprT2); + } + else ADD16RtoR(gprT1, gprT1); mVUallocVIb(gprT1, _Fd_); } } @@ -343,8 +492,10 @@ microVUf(void) mVU_IAND() { if (recPass == 0) {} else { mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - AND32RtoR(gprT1, gprT2); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT2, _Ft_); + AND32RtoR(gprT1, gprT2); + } mVUallocVIb(gprT1, _Fd_); } } @@ -353,8 +504,10 @@ microVUf(void) mVU_IOR() { if (recPass == 0) {} else { mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - OR32RtoR(gprT1, gprT2); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT2, _Ft_); + OR32RtoR(gprT1, gprT2); + } mVUallocVIb(gprT1, _Fd_); } } @@ -362,10 +515,16 @@ microVUf(void) mVU_ISUB() { microVU* mVU = mVUx; if (recPass == 0) {} else { - mVUallocVIa(gprT1, _Fs_); - mVUallocVIa(gprT2, _Ft_); - SUB16RtoR(gprT1, gprT2); - mVUallocVIb(gprT1, _Fd_); + if (_Ft_ != _Fs_) { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + SUB16RtoR(gprT1, gprT2); + } + else if (!isMMX(_Fd_)) { + XOR32RtoR(gprT1, gprT1); + mVUallocVIb(gprT1, _Fd_); + } + else { PXORRtoR(mmVI(_Fd_), mmVI(_Fd_)); } } } microVUf(void) mVU_ISUBIU() { diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index b30db341e9..e269ce39f5 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -21,6 +21,7 @@ //------------------------------------------------------------------ // Global Variables //------------------------------------------------------------------ + PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]); @@ -40,6 +41,17 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]); +//PCSX2_ALIGNED16_EXTERN(const u32 mVU_S1[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_S5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E1[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_E6[4]); //------------------------------------------------------------------ // Helper Macros @@ -129,4 +141,7 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]); //#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13)) //#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14)) +#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) +#define mmVI(_VIreg_) (_VIreg_ - 1) + #include "microVU_Misc.inl" diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index f0336aa081..651ef92048 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -84,7 +84,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC1a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -98,7 +97,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC3a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -112,7 +110,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC4a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -126,7 +123,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC5a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -140,7 +136,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC6a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -154,7 +149,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC7a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -168,7 +162,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC8a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -188,7 +181,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC9a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -208,7 +200,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC10a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -228,7 +219,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC11a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -248,7 +238,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC12a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -268,7 +257,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC13a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -288,7 +276,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC14a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -308,7 +295,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC15a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -328,7 +314,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC16a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -348,7 +333,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC17a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -368,7 +352,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC18a(ACC, Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ @@ -381,7 +364,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC19a(Fd, ACC, Fs, Ft); \ SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ @@ -395,7 +377,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC20a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -415,7 +396,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC21a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -435,7 +415,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC22a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -449,7 +428,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC23a(ACC, Fs, Ft); \ if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ @@ -463,7 +441,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC24a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -483,7 +460,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int Fd, ACC, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC25a(Fd, ACC, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -503,7 +479,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC26a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -523,7 +498,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX if (recPass == 0) {} \ else { \ int ACCw, ACCr, Fs, Ft; \ - if (isNOP) return; \ mVUallocFMAC27a(ACCw, ACCr, Fs, Ft); \ if (_XYZW_SS && _X) { \ SSE_MULSS_XMM_to_XMM(Fs, Ft); \ @@ -547,7 +521,6 @@ microVUf(void) mVU_ABS() { if (recPass == 0) {} else { int Fs, Ft; - if (isNOP) return; mVUallocFMAC2a(Fs, Ft); SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip); mVUallocFMAC1b(Ft); @@ -647,7 +620,6 @@ microVUq(void) mVU_FTOIx(uptr addr) { if (recPass == 0) {} else { int Fs, Ft; - if (isNOP) return; mVUallocFMAC2a(Fs, Ft); // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() @@ -673,7 +645,6 @@ microVUq(void) mVU_ITOFx(uptr addr) { if (recPass == 0) {} else { int Fs, Ft; - if (isNOP) return; mVUallocFMAC2a(Fs, Ft); SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs);