optimized some stuff, and implemented all EFU opcodes.

note: most of the microVU EFU opcodes are implemented with completely different algorithms than zerorecs. this might prove to be more accurate, but i mainly did it to avoid using x87 FPU instructions (since i'm using mmx regs for storage).


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@749 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-03-11 23:49:39 +00:00
parent 5d93b6bbe9
commit 93b85af339
5 changed files with 218 additions and 56 deletions

View File

@ -43,6 +43,17 @@ PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xb
PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652};
PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7};
PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb};
//PCSX2_ALIGNED16(const u32 mVU_S1[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000};
PCSX2_ALIGNED16(const u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4};
PCSX2_ALIGNED16(const u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e};
PCSX2_ALIGNED16(const u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f};
PCSX2_ALIGNED16(const u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14};
PCSX2_ALIGNED16(const u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8};
PCSX2_ALIGNED16(const u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4};
PCSX2_ALIGNED16(const u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff};
PCSX2_ALIGNED16(const u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553};
PCSX2_ALIGNED16(const u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510};
PCSX2_ALIGNED16(const u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac};
PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0};
PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0};
PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0};

View File

@ -748,14 +748,14 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) {
microVUt(void) mVUallocVIa(int GPRreg, int _reg_) {
microVU* mVU = mVUx;
if (_reg_ == 0) { XOR32RtoR(GPRreg, GPRreg); }
else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmxVI1 + (_reg_ - 1)); }
else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmVI(_reg_)); }
else { MOVZX32M16toR(GPRreg, (uptr)&mVU->regs->VI[_reg_].UL); }
}
microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
microVU* mVU = mVUx;
if (_reg_ == 0) { return; }
else if (_reg_ < 9) { MOVD32RtoMMX(mmxVI1 + (_reg_ - 1), GPRreg); }
else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); }
else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); }
}
@ -773,8 +773,14 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
//------------------------------------------------------------------
#define getReg5(reg, _reg_, _fxf_) { \
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, (1 << (3 - _fxf_))); \
if (!_reg_) { \
if (_fxf_ < 3) { SSE_XORPS_XMM_to_XMM(reg, reg); } \
else { mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 3); } \
} \
else { \
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, (1 << (3 - _fxf_))); \
} \
}
#endif //PCSX2_MICROVU

View File

@ -167,7 +167,6 @@ microVUf(void) mVU_EATAN() {
getReg5(xmmFs, _Fs_, _Fsf_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
// ToDo: Can Be Optimized Further? (takes approximately (~125 cycles + mem access time) on a c2d)
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
@ -180,8 +179,8 @@ microVUf(void) mVU_EATANxy() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg5(xmmFs, _Fs_, 1);
getReg5(xmmFt, _Fs_, 0);
getReg6(xmmFt, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
@ -196,8 +195,8 @@ microVUf(void) mVU_EATANxz() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg5(xmmFs, _Fs_, 2);
getReg5(xmmFt, _Fs_, 0);
getReg6(xmmFt, _Fs_);
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
@ -208,15 +207,162 @@ microVUf(void) mVU_EATANxz() {
mVU_EATAN_<vuIndex>();
}
}
microVUf(void) mVU_EEXP() {}
microVUf(void) mVU_ELENG() {}
#define eexpHelper(addr) { \
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmT1); \
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \
}
microVUf(void) mVU_EEXP() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg5(xmmFs, _Fs_, _Fsf_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_E1);
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
SSE_MULSS_XMM_to_XMM(xmmFt, xmmFs);
SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFt);
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)mVU_E2);
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt);
eexpHelper(mVU_E3);
eexpHelper(mVU_E4);
eexpHelper(mVU_E5);
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs);
SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_E6);
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1);
SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ);
SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ);
SSE_MOVSS_M32_to_XMM(xmmT1, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmT1, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUt(void) mVU_sumXYZ() {
// regd.x = x ^ 2 + y ^ 2 + z ^ 2
if( cpucaps.hasStreamingSIMD4Extensions ) {
SSE4_DPPS_XMM_to_XMM(xmmFs, xmmFs, 0x71);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
}
else {
SSE_MULPS_XMM_to_XMM(xmmFs, xmmFs); // wzyx ^ 2
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xe1); // wzyx -> wzxy
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2
SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xD2); // wzxy -> wxyz
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + z ^ 2
}
}
microVUf(void) mVU_ELENG() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg6(xmmFs, _Fs_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ<vuIndex>();
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ERCPR() {}
microVUf(void) mVU_ERLENG() {}
microVUf(void) mVU_ERSADD() {}
microVUf(void) mVU_ERSQRT() {}
microVUf(void) mVU_ESADD() {}
microVUf(void) mVU_ESIN() {}
microVUf(void) mVU_ESQRT() {}
microVUf(void) mVU_ERLENG() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg6(xmmFs, _Fs_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ<vuIndex>();
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ERSADD() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg6(xmmFs, _Fs_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ<vuIndex>();
//SSE_RCPSS_XMM_to_XMM(xmmPQ, xmmPQ); // Lower Precision is bad?
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ERSQRT() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg5(xmmFs, _Fs_, _Fsf_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ESADD() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg6(xmmFs, _Fs_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ<vuIndex>();
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
#define esinHelper(addr) { \
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \
SSE_MOVSS_XMM_to_XMM(xmmFs, xmmT1); \
SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \
}
microVUf(void) mVU_ESIN() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg5(xmmFs, _Fs_, _Fsf_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
//SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); // Multiplying by 1 is redundant?
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt);
SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFs);
SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt);
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
SSE_MULSS_M32_to_XMM(xmmFs, (uptr)mVU_S2);
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs);
esinHelper(mVU_S3);
esinHelper(mVU_S4);
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt);
SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_S5);
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ESQRT() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
getReg5(xmmFs, _Fs_, _Fsf_);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ESUM() {
microVU* mVU = mVUx;
if (recPass == 0) {}
@ -315,8 +461,11 @@ microVUf(void) mVU_IADD() {
if (recPass == 0) {}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
ADD16RtoR(gprT1, gprT2);
if (_Ft_ != _Fs_) {
mVUallocVIa<vuIndex>(gprT2, _Ft_);
ADD16RtoR(gprT1, gprT2);
}
else ADD16RtoR(gprT1, gprT1);
mVUallocVIb<vuIndex>(gprT1, _Fd_);
}
}
@ -343,8 +492,10 @@ microVUf(void) mVU_IAND() {
if (recPass == 0) {}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
AND32RtoR(gprT1, gprT2);
if (_Ft_ != _Fs_) {
mVUallocVIa<vuIndex>(gprT2, _Ft_);
AND32RtoR(gprT1, gprT2);
}
mVUallocVIb<vuIndex>(gprT1, _Fd_);
}
}
@ -353,8 +504,10 @@ microVUf(void) mVU_IOR() {
if (recPass == 0) {}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
OR32RtoR(gprT1, gprT2);
if (_Ft_ != _Fs_) {
mVUallocVIa<vuIndex>(gprT2, _Ft_);
OR32RtoR(gprT1, gprT2);
}
mVUallocVIb<vuIndex>(gprT1, _Fd_);
}
}
@ -362,10 +515,16 @@ microVUf(void) mVU_ISUB() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
SUB16RtoR(gprT1, gprT2);
mVUallocVIb<vuIndex>(gprT1, _Fd_);
if (_Ft_ != _Fs_) {
mVUallocVIa<vuIndex>(gprT1, _Fs_);
mVUallocVIa<vuIndex>(gprT2, _Ft_);
SUB16RtoR(gprT1, gprT2);
}
else if (!isMMX(_Fd_)) {
XOR32RtoR(gprT1, gprT1);
mVUallocVIb<vuIndex>(gprT1, _Fd_);
}
else { PXORRtoR(mmVI(_Fd_), mmVI(_Fd_)); }
}
}
microVUf(void) mVU_ISUBIU() {

View File

@ -21,6 +21,7 @@
//------------------------------------------------------------------
// Global Variables
//------------------------------------------------------------------
PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]);
@ -40,6 +41,17 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
//PCSX2_ALIGNED16_EXTERN(const u32 mVU_S1[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S2[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S3[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S4[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S5[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E1[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E2[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E3[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E4[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E5[4]);
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E6[4]);
//------------------------------------------------------------------
// Helper Macros
@ -129,4 +141,7 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13))
//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14))
#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9)
#define mmVI(_VIreg_) (_VIreg_ - 1)
#include "microVU_Misc.inl"

View File

@ -84,7 +84,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft); \
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -98,7 +97,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC3a<vuIndex>(Fd, Fs, Ft); \
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -112,7 +110,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC4a<vuIndex>(ACC, Fs, Ft); \
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -126,7 +123,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC5a<vuIndex>(ACC, Fs, Ft); \
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -140,7 +136,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC6a<vuIndex>(Fd, Fs, Ft); \
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -154,7 +149,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC7a<vuIndex>(ACC, Fs, Ft); \
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -168,7 +162,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC8a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -188,7 +181,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC9a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -208,7 +200,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC10a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -228,7 +219,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC11a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -248,7 +238,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC12a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -268,7 +257,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC13a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -288,7 +276,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC14a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -308,7 +295,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC15a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -328,7 +314,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC16a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -348,7 +333,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC17a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -368,7 +352,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC18a<vuIndex>(ACC, Fs, Ft); \
SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
mVUupdateFlags<vuIndex>(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \
@ -381,7 +364,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC19a<vuIndex>(Fd, ACC, Fs, Ft); \
SSE_MULPS_XMM_to_XMM(Fs, Ft); \
SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \
@ -395,7 +377,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC20a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -415,7 +396,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC21a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -435,7 +415,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC22a<vuIndex>(Fd, Fs, Ft); \
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -449,7 +428,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC23a<vuIndex>(ACC, Fs, Ft); \
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
@ -463,7 +441,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC24a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -483,7 +460,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int Fd, ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC25a<vuIndex>(Fd, ACC, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -503,7 +479,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC26a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -523,7 +498,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
if (recPass == 0) {} \
else { \
int ACCw, ACCr, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC27a<vuIndex>(ACCw, ACCr, Fs, Ft); \
if (_XYZW_SS && _X) { \
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
@ -547,7 +521,6 @@ microVUf(void) mVU_ABS() {
if (recPass == 0) {}
else {
int Fs, Ft;
if (isNOP) return;
mVUallocFMAC2a<vuIndex>(Fs, Ft);
SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip);
mVUallocFMAC1b<vuIndex>(Ft);
@ -647,7 +620,6 @@ microVUq(void) mVU_FTOIx(uptr addr) {
if (recPass == 0) {}
else {
int Fs, Ft;
if (isNOP) return;
mVUallocFMAC2a<vuIndex>(Fs, Ft);
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
@ -673,7 +645,6 @@ microVUq(void) mVU_ITOFx(uptr addr) {
if (recPass == 0) {}
else {
int Fs, Ft;
if (isNOP) return;
mVUallocFMAC2a<vuIndex>(Fs, Ft);
SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs);