mirror of https://github.com/PCSX2/pcsx2.git
optimized some stuff, and implemented all EFU opcodes.
note: most of the microVU EFU opcodes are implemented with completely different algorithms than zerorecs. this might prove to be more accurate, but i mainly did it to avoid using x87 FPU instructions (since i'm using mmx regs for storage). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@749 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5d93b6bbe9
commit
93b85af339
|
@ -43,6 +43,17 @@ PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xb
|
|||
PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652};
|
||||
PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7};
|
||||
PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb};
|
||||
//PCSX2_ALIGNED16(const u32 mVU_S1[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000};
|
||||
PCSX2_ALIGNED16(const u32 mVU_S2[4]) = {0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4, 0xbe2aaaa4};
|
||||
PCSX2_ALIGNED16(const u32 mVU_S3[4]) = {0x3c08873e, 0x3c08873e, 0x3c08873e, 0x3c08873e};
|
||||
PCSX2_ALIGNED16(const u32 mVU_S4[4]) = {0xb94fb21f, 0xb94fb21f, 0xb94fb21f, 0xb94fb21f};
|
||||
PCSX2_ALIGNED16(const u32 mVU_S5[4]) = {0x362e9c14, 0x362e9c14, 0x362e9c14, 0x362e9c14};
|
||||
PCSX2_ALIGNED16(const u32 mVU_E1[4]) = {0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8, 0x3e7fffa8};
|
||||
PCSX2_ALIGNED16(const u32 mVU_E2[4]) = {0x3d0007f4, 0x3d0007f4, 0x3d0007f4, 0x3d0007f4};
|
||||
PCSX2_ALIGNED16(const u32 mVU_E3[4]) = {0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff, 0x3b29d3ff};
|
||||
PCSX2_ALIGNED16(const u32 mVU_E4[4]) = {0x3933e553, 0x3933e553, 0x3933e553, 0x3933e553};
|
||||
PCSX2_ALIGNED16(const u32 mVU_E5[4]) = {0x36b63510, 0x36b63510, 0x36b63510, 0x36b63510};
|
||||
PCSX2_ALIGNED16(const u32 mVU_E6[4]) = {0x353961ac, 0x353961ac, 0x353961ac, 0x353961ac};
|
||||
PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0};
|
||||
PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0};
|
||||
PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0};
|
||||
|
|
|
@ -748,14 +748,14 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) {
|
|||
microVUt(void) mVUallocVIa(int GPRreg, int _reg_) {
|
||||
microVU* mVU = mVUx;
|
||||
if (_reg_ == 0) { XOR32RtoR(GPRreg, GPRreg); }
|
||||
else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmxVI1 + (_reg_ - 1)); }
|
||||
else if (_reg_ < 9) { MOVD32MMXtoR(GPRreg, mmVI(_reg_)); }
|
||||
else { MOVZX32M16toR(GPRreg, (uptr)&mVU->regs->VI[_reg_].UL); }
|
||||
}
|
||||
|
||||
microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
|
||||
microVU* mVU = mVUx;
|
||||
if (_reg_ == 0) { return; }
|
||||
else if (_reg_ < 9) { MOVD32RtoMMX(mmxVI1 + (_reg_ - 1), GPRreg); }
|
||||
else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); }
|
||||
else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); }
|
||||
}
|
||||
|
||||
|
@ -773,8 +773,14 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) {
|
|||
//------------------------------------------------------------------
|
||||
|
||||
#define getReg5(reg, _reg_, _fxf_) { \
|
||||
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \
|
||||
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, (1 << (3 - _fxf_))); \
|
||||
if (!_reg_) { \
|
||||
if (_fxf_ < 3) { SSE_XORPS_XMM_to_XMM(reg, reg); } \
|
||||
else { mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 3); } \
|
||||
} \
|
||||
else { \
|
||||
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \
|
||||
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, (1 << (3 - _fxf_))); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif //PCSX2_MICROVU
|
||||
|
|
|
@ -167,7 +167,6 @@ microVUf(void) mVU_EATAN() {
|
|||
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
|
||||
// ToDo: Can Be Optimized Further? (takes approximately (~125 cycles + mem access time) on a c2d)
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
|
||||
|
@ -180,8 +179,8 @@ microVUf(void) mVU_EATANxy() {
|
|||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg5(xmmFs, _Fs_, 1);
|
||||
getReg5(xmmFt, _Fs_, 0);
|
||||
getReg6(xmmFt, _Fs_);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
|
@ -196,8 +195,8 @@ microVUf(void) mVU_EATANxz() {
|
|||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg5(xmmFs, _Fs_, 2);
|
||||
getReg5(xmmFt, _Fs_, 0);
|
||||
getReg6(xmmFt, _Fs_);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
|
@ -208,15 +207,162 @@ microVUf(void) mVU_EATANxz() {
|
|||
mVU_EATAN_<vuIndex>();
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_EEXP() {}
|
||||
microVUf(void) mVU_ELENG() {}
|
||||
#define eexpHelper(addr) { \
|
||||
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \
|
||||
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmT1); \
|
||||
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \
|
||||
}
|
||||
microVUf(void) mVU_EEXP() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_E1);
|
||||
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one);
|
||||
|
||||
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||
SSE_MULSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFt);
|
||||
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)mVU_E2);
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt);
|
||||
|
||||
eexpHelper(mVU_E3);
|
||||
eexpHelper(mVU_E4);
|
||||
eexpHelper(mVU_E5);
|
||||
|
||||
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs);
|
||||
SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_E6);
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1);
|
||||
SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||
SSE_MULSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||
SSE_MOVSS_M32_to_XMM(xmmT1, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM(xmmT1, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
microVUt(void) mVU_sumXYZ() {
|
||||
// regd.x = x ^ 2 + y ^ 2 + z ^ 2
|
||||
if( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||
SSE4_DPPS_XMM_to_XMM(xmmFs, xmmFs, 0x71);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
}
|
||||
else {
|
||||
SSE_MULPS_XMM_to_XMM(xmmFs, xmmFs); // wzyx ^ 2
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xe1); // wzyx -> wzxy
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmFs, xmmFs, 0xD2); // wzxy -> wxyz
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + z ^ 2
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ELENG() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg6(xmmFs, _Fs_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ<vuIndex>();
|
||||
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ERCPR() {}
|
||||
microVUf(void) mVU_ERLENG() {}
|
||||
microVUf(void) mVU_ERSADD() {}
|
||||
microVUf(void) mVU_ERSQRT() {}
|
||||
microVUf(void) mVU_ESADD() {}
|
||||
microVUf(void) mVU_ESIN() {}
|
||||
microVUf(void) mVU_ESQRT() {}
|
||||
microVUf(void) mVU_ERLENG() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg6(xmmFs, _Fs_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ<vuIndex>();
|
||||
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ);
|
||||
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ERSADD() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg6(xmmFs, _Fs_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ<vuIndex>();
|
||||
//SSE_RCPSS_XMM_to_XMM(xmmPQ, xmmPQ); // Lower Precision is bad?
|
||||
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ERSQRT() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_MOVSS_M32_to_XMM(xmmFs, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ESADD() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg6(xmmFs, _Fs_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ<vuIndex>();
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
#define esinHelper(addr) { \
|
||||
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \
|
||||
SSE_MOVSS_XMM_to_XMM(xmmFs, xmmT1); \
|
||||
SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \
|
||||
}
|
||||
microVUf(void) mVU_ESIN() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
//SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); // Multiplying by 1 is redundant?
|
||||
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||
SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFs);
|
||||
SSE_MULSS_XMM_to_XMM(xmmFs, xmmFt);
|
||||
SSE_MOVSS_XMM_to_XMM(xmmFt, xmmFs);
|
||||
SSE_MULSS_M32_to_XMM(xmmFs, (uptr)mVU_S2);
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
|
||||
esinHelper(mVU_S3);
|
||||
esinHelper(mVU_S4);
|
||||
|
||||
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt);
|
||||
SSE_MULSS_M32_to_XMM(xmmT1, (uptr)mVU_S5);
|
||||
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ESQRT() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
getReg5(xmmFs, _Fs_, _Fsf_);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs);
|
||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ESUM() {
|
||||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
|
@ -315,8 +461,11 @@ microVUf(void) mVU_IADD() {
|
|||
if (recPass == 0) {}
|
||||
else {
|
||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
ADD16RtoR(gprT1, gprT2);
|
||||
if (_Ft_ != _Fs_) {
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
ADD16RtoR(gprT1, gprT2);
|
||||
}
|
||||
else ADD16RtoR(gprT1, gprT1);
|
||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||
}
|
||||
}
|
||||
|
@ -343,8 +492,10 @@ microVUf(void) mVU_IAND() {
|
|||
if (recPass == 0) {}
|
||||
else {
|
||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
AND32RtoR(gprT1, gprT2);
|
||||
if (_Ft_ != _Fs_) {
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
AND32RtoR(gprT1, gprT2);
|
||||
}
|
||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||
}
|
||||
}
|
||||
|
@ -353,8 +504,10 @@ microVUf(void) mVU_IOR() {
|
|||
if (recPass == 0) {}
|
||||
else {
|
||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
OR32RtoR(gprT1, gprT2);
|
||||
if (_Ft_ != _Fs_) {
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
OR32RtoR(gprT1, gprT2);
|
||||
}
|
||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||
}
|
||||
}
|
||||
|
@ -362,10 +515,16 @@ microVUf(void) mVU_ISUB() {
|
|||
microVU* mVU = mVUx;
|
||||
if (recPass == 0) {}
|
||||
else {
|
||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
SUB16RtoR(gprT1, gprT2);
|
||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||
if (_Ft_ != _Fs_) {
|
||||
mVUallocVIa<vuIndex>(gprT1, _Fs_);
|
||||
mVUallocVIa<vuIndex>(gprT2, _Ft_);
|
||||
SUB16RtoR(gprT1, gprT2);
|
||||
}
|
||||
else if (!isMMX(_Fd_)) {
|
||||
XOR32RtoR(gprT1, gprT1);
|
||||
mVUallocVIb<vuIndex>(gprT1, _Fd_);
|
||||
}
|
||||
else { PXORRtoR(mmVI(_Fd_), mmVI(_Fd_)); }
|
||||
}
|
||||
}
|
||||
microVUf(void) mVU_ISUBIU() {
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
//------------------------------------------------------------------
|
||||
// Global Variables
|
||||
//------------------------------------------------------------------
|
||||
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]);
|
||||
|
@ -40,6 +41,17 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]);
|
|||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
|
||||
//PCSX2_ALIGNED16_EXTERN(const u32 mVU_S1[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S2[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S3[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S4[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_S5[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E1[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E2[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E3[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E4[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E5[4]);
|
||||
PCSX2_ALIGNED16_EXTERN(const u32 mVU_E6[4]);
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Helper Macros
|
||||
|
@ -129,4 +141,7 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]);
|
|||
//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<13))
|
||||
//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<14))
|
||||
|
||||
#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9)
|
||||
#define mmVI(_VIreg_) (_VIreg_ - 1)
|
||||
|
||||
#include "microVU_Misc.inl"
|
||||
|
|
|
@ -84,7 +84,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft); \
|
||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -98,7 +97,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC3a<vuIndex>(Fd, Fs, Ft); \
|
||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -112,7 +110,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC4a<vuIndex>(ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -126,7 +123,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC5a<vuIndex>(ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -140,7 +136,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC6a<vuIndex>(Fd, Fs, Ft); \
|
||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -154,7 +149,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC7a<vuIndex>(ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -168,7 +162,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC8a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -188,7 +181,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC9a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -208,7 +200,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC10a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -228,7 +219,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC11a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -248,7 +238,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC12a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -268,7 +257,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC13a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -288,7 +276,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC14a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -308,7 +295,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC15a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -328,7 +314,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC16a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -348,7 +333,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC17a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -368,7 +352,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC18a<vuIndex>(ACC, Fs, Ft); \
|
||||
SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
mVUupdateFlags<vuIndex>(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \
|
||||
|
@ -381,7 +364,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC19a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
SSE_MULPS_XMM_to_XMM(Fs, Ft); \
|
||||
SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \
|
||||
|
@ -395,7 +377,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC20a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -415,7 +396,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC21a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -435,7 +415,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC22a<vuIndex>(Fd, Fs, Ft); \
|
||||
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -449,7 +428,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC23a<vuIndex>(ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -463,7 +441,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC24a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -483,7 +460,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int Fd, ACC, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC25a<vuIndex>(Fd, ACC, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -503,7 +479,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC26a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -523,7 +498,6 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
|
|||
if (recPass == 0) {} \
|
||||
else { \
|
||||
int ACCw, ACCr, Fs, Ft; \
|
||||
if (isNOP) return; \
|
||||
mVUallocFMAC27a<vuIndex>(ACCw, ACCr, Fs, Ft); \
|
||||
if (_XYZW_SS && _X) { \
|
||||
SSE_MULSS_XMM_to_XMM(Fs, Ft); \
|
||||
|
@ -547,7 +521,6 @@ microVUf(void) mVU_ABS() {
|
|||
if (recPass == 0) {}
|
||||
else {
|
||||
int Fs, Ft;
|
||||
if (isNOP) return;
|
||||
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
||||
SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip);
|
||||
mVUallocFMAC1b<vuIndex>(Ft);
|
||||
|
@ -647,7 +620,6 @@ microVUq(void) mVU_FTOIx(uptr addr) {
|
|||
if (recPass == 0) {}
|
||||
else {
|
||||
int Fs, Ft;
|
||||
if (isNOP) return;
|
||||
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
||||
|
||||
// Note: For help understanding this algorithm see recVUMI_FTOI_Saturate()
|
||||
|
@ -673,7 +645,6 @@ microVUq(void) mVU_ITOFx(uptr addr) {
|
|||
if (recPass == 0) {}
|
||||
else {
|
||||
int Fs, Ft;
|
||||
if (isNOP) return;
|
||||
mVUallocFMAC2a<vuIndex>(Fs, Ft);
|
||||
|
||||
SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs);
|
||||
|
|
Loading…
Reference in New Issue