mirror of https://github.com/PCSX2/pcsx2.git
optimized some stuff, and implemented all EFU opcodes.
note: most of the microVU EFU opcodes are implemented with completely different algorithms than zerorecs. this might prove to be more accurate, but i mainly did it to avoid using x87 FPU instructions (since i'm using mmx regs for storage). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@749 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5d93b6bbe9
commit
93b85af339
|
@ -43,6 +43,17 @@ PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xb
|
||||||
PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652};
|
PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652};
|
||||||
PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7};
|
PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7};
|
||||||
PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb};
|
PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb};
|
||||||
|
//PCSX2_ALIGNED16(const u32 mVU_S1[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510};
|
||||||
|
PCSX2_ALIGNED16(const u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac};
|
||||||
PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0};
|
PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0};
|
||||||
PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0};
|
PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0};
|
||||||
PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0};
|
PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0};
|
||||||
|
|
|
@ -748,14 +748,14 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) {
|
||||||
microVUt(void) mVUallocVIa(int GPRreg, int _reg_) {
|
microVUt(void) mVUallocVIa(int GPRreg, int _reg_) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (_reg_ == 0) { XOR32RtoR(GPRreg, GPRreg); }
|
if (_reg_ == 0) { XOR32RtoR(GPRreg, GPRreg); }
|
||||||
else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmxVI1 + (_reg_ - 1)); }
|
else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmVI(_reg_)); }
|
||||||
else { MOVZX32M16toR(GPRreg, (uptr)&mVU->regs->VI[_reg_].UL); }
|
else { MOVZX32M16toR(GPRreg, (uptr)&mVU->regs->VI[_reg_].UL); }
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
|
microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (_reg_ == 0) { return; }
|
if (_reg_ == 0) { return; }
|
||||||
else if (_reg_ < 9) { MOVD32RtoMMX(mmxVI1 + (_reg_ - 1), GPRreg); }
|
else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); }
|
||||||
else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); }
|
else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -773,8 +773,14 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
#define getReg5(reg, _reg_, _fxf_) { \
|
#define getReg5(reg, _reg_, _fxf_) { \
|
||||||
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \
|
if (!_reg_) { \
|
||||||
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, (1 << (3 - _fxf_))); \
|
if (_fxf_ < 3) { SSE_XORPS_XMM_to_XMM(reg, reg); } \
|
||||||
|
else { mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 3); } \
|
||||||
|
} \
|
||||||
|
else { \
|
||||||
|
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \
|
||||||
|
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, (1 << (3 - _fxf_))); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //PCSX2_MICROVU
|
#endif //PCSX2_MICROVU
|
||||||
|
|
|
@ -167,7 +167,6 @@ microVUf(void) mVU_EATAN() {
|
||||||
getReg5(xmmFs, _Fs_, _Fsf_);
|
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
|
||||||
// ToDo: Can Be Optimized Further? (takes approximately (~125 cycles + mem access time) on a c2d)
|
|
||||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||||
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
|
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
|
||||||
|
@ -180,8 +179,8 @@ microVUf(void) mVU_EATANxy() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
getReg5(xmmFs, _Fs_, 1);
|
getReg6(xmmFt, _Fs_);
|
||||||
getReg5(xmmFt, _Fs_, 0);
|
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01);
|
||||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
@ -196,8 +195,8 @@ microVUf(void) mVU_EATANxz() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
getReg5(xmmFs, _Fs_, 2);
|
getReg6(xmmFt, _Fs_);
|
||||||
getReg5(xmmFt, _Fs_, 0);
|
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02);
|
||||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
@ -208,15 +207,162 @@ microVUf(void) mVU_EATANxz() {
|
||||||
mVU_EATAN_<vuIndex>();
|
mVU_EATAN_<vuIndex>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
microVUf(void) mVU_EEXP() {}
|
#define eexpHelper(addr) { \
|
||||||
microVUf(void) mVU_ELENG() {}
|
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmT1); \
|
||||||
|
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \
|
||||||
|
}
|
||||||
|
microVUf(void) mVU_EEXP() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else {
|
||||||
|
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_E1);
|
||||||
|
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
|
||||||
|
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFt);
|
||||||
|
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)mVU_E2);
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt);
|
||||||
|
|
||||||
|
eexpHelper(mVU_E3);
|
||||||
|
eexpHelper(mVU_E4);
|
||||||
|
eexpHelper(mVU_E5);
|
||||||
|
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs);
|
||||||
|
SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_E6);
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1);
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||||
|
SSE_MOVSS_M32_to_XMM(xmmT1, (uptr)mVU_one);
|
||||||
|
SSE_DIVSS_XMM_to_XMM(xmmT1, xmmPQ);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
|
microVUt(void) mVU_sumXYZ() {
|
||||||
|
// regd.x = x ^ 2 + y ^ 2 + z ^ 2
|
||||||
|
if( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
|
SSE4_DPPS_XMM_to_XMM(xmmFs, xmmFs, 0x71);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
SSE_MULPS_XMM_to_XMM(xmmFs, xmmFs); // wzyx ^ 2
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xe1); // wzyx -> wzxy
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xD2); // wzxy -> wxyz
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + z ^ 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
microVUf(void) mVU_ELENG() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else {
|
||||||
|
getReg6(xmmFs, _Fs_);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
mVU_sumXYZ<vuIndex>();
|
||||||
|
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
microVUf(void) mVU_ERCPR() {}
|
microVUf(void) mVU_ERCPR() {}
|
||||||
microVUf(void) mVU_ERLENG() {}
|
microVUf(void) mVU_ERLENG() {
|
||||||
microVUf(void) mVU_ERSADD() {}
|
microVU* mVU = mVUx;
|
||||||
microVUf(void) mVU_ERSQRT() {}
|
if (recPass == 0) {}
|
||||||
microVUf(void) mVU_ESADD() {}
|
else {
|
||||||
microVUf(void) mVU_ESIN() {}
|
getReg6(xmmFs, _Fs_);
|
||||||
microVUf(void) mVU_ESQRT() {}
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
mVU_sumXYZ<vuIndex>();
|
||||||
|
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||||
|
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||||
|
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
|
microVUf(void) mVU_ERSADD() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else {
|
||||||
|
getReg6(xmmFs, _Fs_);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
mVU_sumXYZ<vuIndex>();
|
||||||
|
//SSE_RCPSS_XMM_to_XMM(xmmPQ, xmmPQ); // Lower Precision is bad?
|
||||||
|
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||||
|
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
|
microVUf(void) mVU_ERSQRT() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else {
|
||||||
|
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||||
|
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
|
microVUf(void) mVU_ESADD() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else {
|
||||||
|
getReg6(xmmFs, _Fs_);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
mVU_sumXYZ<vuIndex>();
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#define esinHelper(addr) { \
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmFs, xmmT1); \
|
||||||
|
SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \
|
||||||
|
}
|
||||||
|
microVUf(void) mVU_ESIN() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else {
|
||||||
|
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
//SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); // Multiplying by 1 is redundant?
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFs);
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt);
|
||||||
|
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||||
|
SSE_MULSS_M32_to_XMM(xmmFs, (uptr)mVU_S2);
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
|
||||||
|
esinHelper(mVU_S3);
|
||||||
|
esinHelper(mVU_S4);
|
||||||
|
|
||||||
|
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt);
|
||||||
|
SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_S5);
|
||||||
|
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
|
microVUf(void) mVU_ESQRT() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else {
|
||||||
|
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
|
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||||
|
}
|
||||||
|
}
|
||||||
microVUf(void) mVU_ESUM() {
|
microVUf(void) mVU_ESUM() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
|
@ -315,8 +461,11 @@ microVUf(void) mVU_IADD() {
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
if (_Ft_ != _Fs_) {
|
||||||
ADD16RtoR(gprT1, gprT2);
|
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||||
|
ADD16RtoR(gprT1, gprT2);
|
||||||
|
}
|
||||||
|
else ADD16RtoR(gprT1, gprT1);
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -343,8 +492,10 @@ microVUf(void) mVU_IAND() {
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
if (_Ft_ != _Fs_) {
|
||||||
AND32RtoR(gprT1, gprT2);
|
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||||
|
AND32RtoR(gprT1, gprT2);
|
||||||
|
}
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -353,8 +504,10 @@ microVUf(void) mVU_IOR() {
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
if (_Ft_ != _Fs_) {
|
||||||
OR32RtoR(gprT1, gprT2);
|
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||||
|
OR32RtoR(gprT1, gprT2);
|
||||||
|
}
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -362,10 +515,16 @@ microVUf(void) mVU_ISUB() {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
if (_Ft_ != _Fs_) {
|
||||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||||
SUB16RtoR(gprT1, gprT2);
|
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
SUB16RtoR(gprT1, gprT2);
|
||||||
|
}
|
||||||
|
else if (!isMMX(_Fd_)) {
|
||||||
|
XOR32RtoR(gprT1, gprT1);
|
||||||
|
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||||
|
}
|
||||||
|
else { PXORRtoR(mmVI(_Fd_), mmVI(_Fd_)); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
microVUf(void) mVU_ISUBIU() {
|
microVUf(void) mVU_ISUBIU() {
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Global Variables
|
// Global Variables
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]);
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]);
|
||||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]);
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]);
|
||||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]);
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]);
|
||||||
|
@ -40,6 +41,17 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]);
|
||||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]);
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]);
|
||||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]);
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]);
|
||||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
|
||||||
|
//PCSX2_ALIGNED16_EXTERN(const u32 mVU_S1[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S2[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S3[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S4[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S5[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E1[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E2[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E3[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E4[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E5[4]);
|
||||||
|
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E6[4]);
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Helper Macros
|
// Helper Macros
|
||||||
|
@ -129,4 +141,7 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
|
||||||
//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13))
|
//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13))
|
||||||
//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14))
|
//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14))
|
||||||
|
|
||||||
|
#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9)
|
||||||
|
#define mmVI(_VIreg_) (_VIreg_ - 1)
|
||||||
|
|
||||||
#include "microVU_Misc.inl"
|
#include "microVU_Misc.inl"
|
||||||
|
|
|
@ -84,7 +84,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, Fs, Ft; \
|
int Fd, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft); \
|
mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft); \
|
||||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -98,7 +97,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, Fs, Ft; \
|
int Fd, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC3a<vuIndex>(Fd, Fs, Ft); \
|
mVUallocFMAC3a<vuIndex>(Fd, Fs, Ft); \
|
||||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -112,7 +110,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACC, Fs, Ft; \
|
int ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC4a<vuIndex>(ACC, Fs, Ft); \
|
mVUallocFMAC4a<vuIndex>(ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -126,7 +123,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACC, Fs, Ft; \
|
int ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC5a<vuIndex>(ACC, Fs, Ft); \
|
mVUallocFMAC5a<vuIndex>(ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -140,7 +136,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, Fs, Ft; \
|
int Fd, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC6a<vuIndex>(Fd, Fs, Ft); \
|
mVUallocFMAC6a<vuIndex>(Fd, Fs, Ft); \
|
||||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -154,7 +149,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACC, Fs, Ft; \
|
int ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC7a<vuIndex>(ACC, Fs, Ft); \
|
mVUallocFMAC7a<vuIndex>(ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -168,7 +162,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC8a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC8a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -188,7 +181,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC9a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC9a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -208,7 +200,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC10a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC10a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -228,7 +219,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC11a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC11a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -248,7 +238,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC12a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC12a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -268,7 +257,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC13a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC13a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -288,7 +276,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC14a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC14a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -308,7 +295,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC15a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC15a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -328,7 +314,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC16a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC16a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -348,7 +333,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC17a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC17a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -368,7 +352,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACC, Fs, Ft; \
|
int ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC18a<vuIndex>(ACC, Fs, Ft); \
|
mVUallocFMAC18a<vuIndex>(ACC, Fs, Ft); \
|
||||||
SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
mVUupdateFlags<vuIndex>(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \
|
mVUupdateFlags<vuIndex>(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \
|
||||||
|
@ -381,7 +364,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC19a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC19a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
SSE_MULPS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULPS_XMM_to_XMM(Fs, Ft); \
|
||||||
SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \
|
SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \
|
||||||
|
@ -395,7 +377,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC20a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC20a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -415,7 +396,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC21a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC21a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -435,7 +415,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, Fs, Ft; \
|
int Fd, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC22a<vuIndex>(Fd, Fs, Ft); \
|
mVUallocFMAC22a<vuIndex>(Fd, Fs, Ft); \
|
||||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -449,7 +428,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACC, Fs, Ft; \
|
int ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC23a<vuIndex>(ACC, Fs, Ft); \
|
mVUallocFMAC23a<vuIndex>(ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -463,7 +441,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC24a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC24a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -483,7 +460,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int Fd, ACC, Fs, Ft; \
|
int Fd, ACC, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC25a<vuIndex>(Fd, ACC, Fs, Ft); \
|
mVUallocFMAC25a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -503,7 +479,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC26a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC26a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -523,7 +498,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
||||||
if (recPass == 0) {} \
|
if (recPass == 0) {} \
|
||||||
else { \
|
else { \
|
||||||
int ACCw, ACCr, Fs, Ft; \
|
int ACCw, ACCr, Fs, Ft; \
|
||||||
if (isNOP) return; \
|
|
||||||
mVUallocFMAC27a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
mVUallocFMAC27a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||||
if (_XYZW_SS && _X) { \
|
if (_XYZW_SS && _X) { \
|
||||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
@ -547,7 +521,6 @@ microVUf(void) mVU_ABS() {
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
int Fs, Ft;
|
int Fs, Ft;
|
||||||
if (isNOP) return;
|
|
||||||
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
||||||
SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip);
|
SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip);
|
||||||
mVUallocFMAC1b<vuIndex>(Ft);
|
mVUallocFMAC1b<vuIndex>(Ft);
|
||||||
|
@ -647,7 +620,6 @@ microVUq(void) mVU_FTOIx(uptr addr) {
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
int Fs, Ft;
|
int Fs, Ft;
|
||||||
if (isNOP) return;
|
|
||||||
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
||||||
|
|
||||||
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
|
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
|
||||||
|
@ -673,7 +645,6 @@ microVUq(void) mVU_ITOFx(uptr addr) {
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {
|
else {
|
||||||
int Fs, Ft;
|
int Fs, Ft;
|
||||||
if (isNOP) return;
|
|
||||||
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
||||||
|
|
||||||
SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs);
|
SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs);
|
||||||
|
|
Loading…
Reference in New Issue