diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 55dbc6278e..eaeeadd90e 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -33,6 +33,16 @@ PCSX2_ALIGNED16(const u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; +PCSX2_ALIGNED16(const u32 mVU_one[4]) = {0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000}; +PCSX2_ALIGNED16(const u32 mVU_T1[4]) = {0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5, 0x3f7ffff5}; +PCSX2_ALIGNED16(const u32 mVU_T2[4]) = {0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c, 0xbeaaa61c}; +PCSX2_ALIGNED16(const u32 mVU_T3[4]) = {0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6, 0x3e4c40a6}; +PCSX2_ALIGNED16(const u32 mVU_T4[4]) = {0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63, 0xbe0e6c63}; +PCSX2_ALIGNED16(const u32 mVU_T5[4]) = {0x3dc577df, 0x3dc577df, 0x3dc577df, 0x3dc577df}; +PCSX2_ALIGNED16(const u32 mVU_T6[4]) = {0xbd6501c4, 0xbd6501c4, 0xbd6501c4, 0xbd6501c4}; +PCSX2_ALIGNED16(const u32 mVU_T7[4]) = {0x3cb31652, 0x3cb31652, 0x3cb31652, 0x3cb31652}; +PCSX2_ALIGNED16(const u32 mVU_T8[4]) = {0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7, 0xbb84d7e7}; +PCSX2_ALIGNED16(const u32 mVU_Pi4[4]) = {0x3f490fdb, 0x3f490fdb, 0x3f490fdb, 0x3f490fdb}; PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0}; PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0}; PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0}; @@ -40,7 +50,6 @@ PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = {0.0625f, 0.0625f, 0.0625f, 0.0625f PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = {0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625}; PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125}; - //------------------------------------------------------------------ // Micro VU - Main Functions //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index a79861e111..af260661be 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -901,7 +901,7 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { //------------------------------------------------------------------ #define getPreg(reg) { \ - mVUunpack_xyzw(reg, xmmPQ, (2 + writeP)); \ + mVUunpack_xyzw(reg, xmmPQ, (2 + readP)); \ /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 3e268e03a4..43ae6e8d80 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -134,9 +134,80 @@ microVUf(void) mVU_RSQRT() { } } -microVUf(void) mVU_EATAN() {} -microVUf(void) mVU_EATANxy() {} -microVUf(void) mVU_EATANxz() {} +#define EATANhelper(addr) { \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ + SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ + SSE_MOVSS_XMM_to_XMM(xmmFt, xmmT1); \ + SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \ + SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \ +} +microVUt(void) mVU_EATAN_() { + microVU* mVU = mVUx; + + // ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d) + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_MULSS_M32_to_XMM(xmmPQ, (uptr)mVU_T1); + SSE_MOVSS_XMM_to_XMM(xmmT1, xmmFs); + + EATANhelper(mVU_T2); + EATANhelper(mVU_T3); + EATANhelper(mVU_T4); + EATANhelper(mVU_T5); + EATANhelper(mVU_T6); + EATANhelper(mVU_T7); + EATANhelper(mVU_T8); + + SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_Pi4); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); +} +microVUf(void) mVU_EATAN() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, _Fsf_); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + + // ToDo: Can Be Optimized Further? (takes approximately (~125 cycles + mem access time) on a c2d) + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_one); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmPQ); + + mVU_EATAN_(); + } +} +microVUf(void) mVU_EATANxy() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, 1); + getReg5(xmmFt, _Fs_, 0); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SUBSS_M32_to_XMM(xmmFs, (uptr)mVU_one); + SSE_ADDSS_XMM_to_XMM(xmmFt, xmmPQ); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt); + + mVU_EATAN_(); + } +} +microVUf(void) mVU_EATANxz() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + getReg5(xmmFs, _Fs_, 2); + getReg5(xmmFt, _Fs_, 0); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + + SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); + SSE_SUBSS_XMM_to_XMM(xmmFs, xmmFt); + SSE_ADDSS_XMM_to_XMM(xmmFt, xmmPQ); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt); + + mVU_EATAN_(); + } +} microVUf(void) mVU_EEXP() {} microVUf(void) mVU_ELENG() {} microVUf(void) mVU_ERCPR() {} diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index a6dc6233ca..b30db341e9 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -31,6 +31,15 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_15[4]); PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_4[4]); PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_12[4]); PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T1[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T2[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T3[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T4[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T5[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T6[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T7[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_T8[4]); +PCSX2_ALIGNED16_EXTERN(const u32 mVU_Pi4[4]); //------------------------------------------------------------------ // Helper Macros