diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 5e51b802ec..885c4e6fea 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -19,8 +19,8 @@ // Micro VU recompiler! - author: cottonvibes(@gmail.com) #include "PrecompiledHeader.h" -#ifdef PCSX2_MICROVU #include "microVU.h" +#ifdef PCSX2_MICROVU //------------------------------------------------------------------ // Micro VU - Global Variables @@ -29,9 +29,17 @@ PCSX2_ALIGNED16(microVU microVU0); PCSX2_ALIGNED16(microVU microVU1); +PCSX2_ALIGNED16(const u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; +PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = { 16.0, 16.0, 16.0, 16.0 }; +PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = { 4096.0, 4096.0, 4096.0, 4096.0 }; +PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = { 32768.0, 32768.0, 32768.0, 32768.0 }; +PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; +PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; +PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; + //------------------------------------------------------------------ // Micro VU - Main Functions diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 9e4251c0a3..9e4886820e 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -45,6 +45,9 @@ else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ } +//------------------------------------------------------------------ +// FMAC1 - Normal FMAC Opcodes +//------------------------------------------------------------------ microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) { microVU* mVU = mVUx; Fs = xmmFs; @@ -79,4 +82,86 @@ microVUt(void) mVUallocFMAC1b(int& Fd) { mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W); } +//------------------------------------------------------------------ +// FMAC2 - ABS/FTOI/ITOF Opcodes +//------------------------------------------------------------------ +microVUt(void) mVUallocFMAC2a(int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFs; + if (_XYZW_SS) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + } + else { + if (!_Fs_) { getZero(Fs); } + else { getReg(Fs, _Fs_); } + } +} + +microVUt(void) mVUallocFMAC2b(int& Ft) { + microVU* mVU = mVUx; + if (!_Ft_) return; + //if (CHECK_VU_OVERFLOW) mVUclamp1(Ft, xmmT1, _X_Y_Z_W); + mVUsaveReg(Ft, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); +} + +//------------------------------------------------------------------ +// FMAC3 - BC(xyzw) FMAC Opcodes +//------------------------------------------------------------------ + +#define getReg3SS(reg, _reg_) { \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _bc_))); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _bc_))); \ +} + +#define getReg3(reg, _reg_) { \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _bc_))); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _bc_))); \ + mVUunpack_xyzw(reg, reg, _bc_); \ +} + +#define getZero3SS(reg) { \ + if (_bc_w) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], 1); } \ + else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ +} + +#define getZero3(reg) { \ + if (_bc_w) { \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], 1); \ + mVUunpack_xyzw(reg, reg, _bc_); \ + } \ + else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ +} + +microVUt(void) mVUallocFMAC3a(int& Fd, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + Fd = xmmFs; + if (_XYZW_SS) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if ( (_Ft_ == _Fs_) && ((_X && _bc_x) || (_Y && _bc_y) || (_Z && _bc_w) || (_W && _bc_w)) ) { + Ft = Fs; + } + else { + if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero(Fs); } + else { getReg(Fs, _Fs_); } + + if (!_Ft_) { getZero3(Ft); } + else { getReg3(Ft, _Ft_); } + } +} + +microVUt(void) mVUallocFMAC3b(int& Fd) { + mVUallocFMAC1b(Fd); +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 3992088ac0..b5f5870321 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -21,9 +21,16 @@ //------------------------------------------------------------------ // Global Variables //------------------------------------------------------------------ +PCSX2_ALIGNED16_EXTERN(const u32 mVU_absclip[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_signbit[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_minvals[4]); PCSX2_ALIGNED16_EXTERN(const u32 mVU_maxvals[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_4[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_12[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_FTOI_15[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_4[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_12[4]); +PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); //------------------------------------------------------------------ // Helper Macros @@ -39,7 +46,13 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_maxvals[4]); #define _XYZW_SS (_X+_Y+_Z+_W==1) -#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF ) ) +#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF )) + +#define _bc_ (mVU->code & 0x03) +#define _bc_x ((mVU->code & 0x03) == 0) +#define _bc_y ((mVU->code & 0x03) == 1) +#define _bc_z ((mVU->code & 0x03) == 2) +#define _bc_w ((mVU->code & 0x03) == 3) #define _Fsf_ ((mVU->code >> 21) & 0x03) #define _Ftf_ ((mVU->code >> 23) & 0x03) @@ -70,6 +83,7 @@ PCSX2_ALIGNED16_EXTERN(const u32 mVU_maxvals[4]); #define microVUt(aType) template __forceinline aType #define microVUx(aType) template aType #define microVUf(aType) template aType +#define microVUq(aType) template __forceinline aType #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 1b422e0c15..b66891ed91 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -76,32 +76,57 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { // Helper Macros //------------------------------------------------------------------ -#define mVU_FMAC1(operation) { \ - if (isNOP) return; \ - int Fd, Fs, Ft; \ - mVUallocFMAC1a(Fd, Fs, Ft); \ - if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ - else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ - mVUallocFMAC1b(Fd); \ +#define mVU_FMAC1(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC1a(Fd, Fs, Ft); \ + if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ + mVUallocFMAC1b(Fd); \ + } \ +} + +#define mVU_FMAC3(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC3a(Fd, Fs, Ft); \ + if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ + mVUallocFMAC3b(Fd); \ + } \ } //------------------------------------------------------------------ // Micro VU Micromode Upper instructions //------------------------------------------------------------------ -microVUf(void) mVU_ABS(){} -microVUf(void) mVU_ADD() { +microVUf(void) mVU_ABS() { microVU* mVU = mVUx; if (recPass == 0) {} - else { mVU_FMAC1(ADD); } + else { + int Fs, Ft; + if (isNOP) return; + mVUallocFMAC2a(Fs, Ft); + SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip); + mVUallocFMAC1b(Ft); + } } +microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); } microVUf(void) mVU_ADDi(){} microVUf(void) mVU_ADDq(){} -microVUf(void) mVU_ADDx(){} -microVUf(void) mVU_ADDy(){} -microVUf(void) mVU_ADDz(){} -microVUf(void) mVU_ADDw(){} +microVUq(void) mVU_ADDxyzw() { mVU_FMAC3(ADD); } +microVUf(void) mVU_ADDx() { mVU_ADDxyzw(); } +microVUf(void) mVU_ADDy() { mVU_ADDxyzw(); } +microVUf(void) mVU_ADDz() { mVU_ADDxyzw(); } +microVUf(void) mVU_ADDw() { mVU_ADDxyzw(); } microVUf(void) mVU_ADDA(){} microVUf(void) mVU_ADDAi(){} microVUf(void) mVU_ADDAq(){} @@ -109,17 +134,14 @@ microVUf(void) mVU_ADDAx(){} microVUf(void) mVU_ADDAy(){} microVUf(void) mVU_ADDAz(){} microVUf(void) mVU_ADDAw(){} -microVUf(void) mVU_SUB(){ - microVU* mVU = mVUx; - if (recPass == 0) {} - else { mVU_FMAC1(SUB); } -} +microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); } microVUf(void) mVU_SUBi(){} microVUf(void) mVU_SUBq(){} -microVUf(void) mVU_SUBx(){} -microVUf(void) mVU_SUBy(){} -microVUf(void) mVU_SUBz(){} -microVUf(void) mVU_SUBw(){} +microVUq(void) mVU_SUBxyzw() { mVU_FMAC3(SUB); } +microVUf(void) mVU_SUBx() { mVU_SUBxyzw(); } +microVUf(void) mVU_SUBy() { mVU_SUBxyzw(); } +microVUf(void) mVU_SUBz() { mVU_SUBxyzw(); } +microVUf(void) mVU_SUBw() { mVU_SUBxyzw(); } microVUf(void) mVU_SUBA(){} microVUf(void) mVU_SUBAi(){} microVUf(void) mVU_SUBAq(){} @@ -127,17 +149,14 @@ microVUf(void) mVU_SUBAx(){} microVUf(void) mVU_SUBAy(){} microVUf(void) mVU_SUBAz(){} microVUf(void) mVU_SUBAw(){} -microVUf(void) mVU_MUL(){ - microVU* mVU = mVUx; - if (recPass == 0) {} - else { mVU_FMAC1(MUL); } -} +microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); } microVUf(void) mVU_MULi(){} microVUf(void) mVU_MULq(){} -microVUf(void) mVU_MULx(){} -microVUf(void) mVU_MULy(){} -microVUf(void) mVU_MULz(){} -microVUf(void) mVU_MULw(){} +microVUq(void) mVU_MULxyzw() { mVU_FMAC3(MUL); } +microVUf(void) mVU_MULx() { mVU_MULxyzw(); } +microVUf(void) mVU_MULy() { mVU_MULxyzw(); } +microVUf(void) mVU_MULz() { mVU_MULxyzw(); } +microVUf(void) mVU_MULw() { mVU_MULxyzw(); } microVUf(void) mVU_MULA(){} microVUf(void) mVU_MULAi(){} microVUf(void) mVU_MULAq(){} @@ -173,28 +192,67 @@ microVUf(void) mVU_MSUBAx(){} microVUf(void) mVU_MSUBAy(){} microVUf(void) mVU_MSUBAz(){} microVUf(void) mVU_MSUBAw(){} -microVUf(void) mVU_MAX(){} +microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); } microVUf(void) mVU_MAXi(){} -microVUf(void) mVU_MAXx(){} -microVUf(void) mVU_MAXy(){} -microVUf(void) mVU_MAXz(){} -microVUf(void) mVU_MAXw(){} -microVUf(void) mVU_MINI(){} +microVUq(void) mVU_MAXxyzw() { mVU_FMAC3(MAX); } +microVUf(void) mVU_MAXx() { mVU_MAXxyzw(); } +microVUf(void) mVU_MAXy() { mVU_MAXxyzw(); } +microVUf(void) mVU_MAXz() { mVU_MAXxyzw(); } +microVUf(void) mVU_MAXw() { mVU_MAXxyzw(); } +microVUf(void) mVU_MINI() { mVU_FMAC1(MIN); } microVUf(void) mVU_MINIi(){} -microVUf(void) mVU_MINIx(){} -microVUf(void) mVU_MINIy(){} -microVUf(void) mVU_MINIz(){} -microVUf(void) mVU_MINIw(){} +microVUq(void) mVU_MINIxyzw(){ mVU_FMAC3(MIN); } +microVUf(void) mVU_MINIx() { mVU_MINIxyzw(); } +microVUf(void) mVU_MINIy() { mVU_MINIxyzw(); } +microVUf(void) mVU_MINIz() { mVU_MINIxyzw(); } +microVUf(void) mVU_MINIw() { mVU_MINIxyzw(); } microVUf(void) mVU_OPMULA(){} microVUf(void) mVU_OPMSUB(){} microVUf(void) mVU_NOP(){} -microVUf(void) mVU_FTOI0(){} -microVUf(void) mVU_FTOI4(){} -microVUf(void) mVU_FTOI12(){} -microVUf(void) mVU_FTOI15(){} -microVUf(void) mVU_ITOF0(){} -microVUf(void) mVU_ITOF4(){} -microVUf(void) mVU_ITOF12(){} -microVUf(void) mVU_ITOF15(){} +microVUq(void) mVU_FTOIx(uptr addr) { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + int Fs, Ft; + if (isNOP) return; + mVUallocFMAC2a(Fs, Ft); + + // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() + SSE_MOVAPS_XMM_to_XMM(xmmT1, Fs); + if (addr) { SSE_MULPS_M128_to_XMM(Fs, addr); } + SSE2_CVTTPS2DQ_XMM_to_XMM(Fs, Fs); + SSE2_PXOR_M128_to_XMM(xmmT1, (uptr)mVU_signbit); + SSE2_PSRAD_I8_to_XMM(xmmT1, 31); + SSE_MOVAPS_XMM_to_XMM(xmmFt, Fs); + SSE2_PCMPEQD_M128_to_XMM(xmmFt, (uptr)mVU_signbit); + SSE_ANDPS_XMM_to_XMM(xmmT1, xmmFt); + SSE2_PADDD_XMM_to_XMM(Fs, xmmT1); + + mVUallocFMAC1b(Ft); + } +} +microVUf(void) mVU_FTOI0() { mVU_FTOIx(0); } +microVUf(void) mVU_FTOI4() { mVU_FTOIx((uptr)mVU_FTOI_4); } +microVUf(void) mVU_FTOI12() { mVU_FTOIx((uptr)mVU_FTOI_12); } +microVUf(void) mVU_FTOI15() { mVU_FTOIx((uptr)mVU_FTOI_15); } +microVUq(void) mVU_ITOFx(uptr addr) { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + int Fs, Ft; + if (isNOP) return; + mVUallocFMAC2a(Fs, Ft); + + SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs); + if (addr) { SSE_MULPS_M128_to_XMM(Ft, addr); } + //mVUclamp2(Ft, xmmT1, 15); // Clamp infinities (not sure if this is needed) + + mVUallocFMAC1b(Ft); + } +} +microVUf(void) mVU_ITOF0() { mVU_ITOFx(0); } +microVUf(void) mVU_ITOF4() { mVU_ITOFx((uptr)mVU_ITOF_4); } +microVUf(void) mVU_ITOF12() { mVU_ITOFx((uptr)mVU_ITOF_12); } +microVUf(void) mVU_ITOF15() { mVU_ITOFx((uptr)mVU_ITOF_15); } microVUf(void) mVU_CLIP(){} #endif //PCSX2_MICROVU