From a5d95b75c503f5c7f66cdc36fced7c6e7baee0dd Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 9 Mar 2009 11:09:44 +0000 Subject: [PATCH] more lower instructions implemented git-svn-id: http://pcsx2.googlecode.com/svn/trunk@725 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Alloc.inl | 11 +- pcsx2/x86/microVU_Lower.inl | 282 ++++++++++++++++++++++++++++-------- pcsx2/x86/microVU_Misc.h | 2 + 3 files changed, 236 insertions(+), 59 deletions(-) diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 621d5c976e..a79861e111 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -702,7 +702,7 @@ microVUt(void) mVUallocFMAC21b(int& ACCw, int& ACCr) { //------------------------------------------------------------------ #define getQreg(reg) { \ - mVUunpack_xyzw(reg, xmmPQ, writeQ); \ + mVUunpack_xyzw(reg, xmmPQ, readQ); \ /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ } @@ -905,4 +905,13 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ } +//------------------------------------------------------------------ +// Div/Sqrt/Rsqrt Allocator Helpers +//------------------------------------------------------------------ + +#define getReg5(reg, _reg_, _fxf_) { \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _fxf_))); \ +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index fcf95c28f8..3e268e03a4 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -23,29 +23,136 @@ // Micro VU Micromode Lower instructions //------------------------------------------------------------------ -microVUf(void) mVU_DIV(){} -microVUf(void) mVU_SQRT(){} -microVUf(void) mVU_RSQRT(){} +microVUf(void) mVU_DIV() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + //u8 *pjmp;, *pjmp1; + u32 *ajmp32, *bjmp32; -microVUf(void) mVU_EATAN(){} -microVUf(void) mVU_EATANxy(){} -microVUf(void) mVU_EATANxz(){} -microVUf(void) mVU_EEXP(){} -microVUf(void) mVU_ELENG(){} -microVUf(void) mVU_ERCPR(){} -microVUf(void) mVU_ERLENG(){} -microVUf(void) mVU_ERSADD(){} -microVUf(void) mVU_ERSQRT(){} -microVUf(void) mVU_ESADD(){} -microVUf(void) mVU_ESIN(){} -microVUf(void) mVU_ESQRT(){} -microVUf(void) mVU_ESUM(){} + getReg5(xmmFs, _Fs_, _Fsf_); + getReg5(xmmFt, _Ft_, _Ftf_); -microVUf(void) mVU_FCAND(){} -microVUf(void) mVU_FCEQ(){} -microVUf(void) mVU_FCOR(){} -microVUf(void) mVU_FCSET(){} -microVUf(void) mVU_FCGET(){} + //AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags + + // FT can be zero here! so we need to check if its zero and set the correct flag. + SSE_XORPS_XMM_to_XMM(xmmT1, xmmT1); // Clear xmmT1 + SSE_CMPEQPS_XMM_to_XMM(xmmT1, xmmFt); // Set all F's if each vector is zero + SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); // Move the sign bits of the previous calculation + + AND32ItoR(gprT1, 1); // Grab "Is Zero" bits from the previous calculation + ajmp32 = JZ32(0); // Skip if none are + //SSE_XORPS_XMM_to_XMM(xmmT1, xmmT1); // Clear xmmT1 + //SSE_CMPEQPS_XMM_to_XMM(xmmT1, xmmFs); // Set all F's if each vector is zero + //SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); // Move the sign bits of the previous calculation + + //AND32ItoR(gprT1, 1); // Grab "Is Zero" bits from the previous calculation + //pjmp = JZ8(0); + // OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) + // pjmp1 = JMP8(0); + //x86SetJ8(pjmp); + // OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) + //x86SetJ8(pjmp1); + + SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt); + SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); + SSE_ORPS_M128_to_XMM(xmmFs, (uptr)mVU_maxvals); // If division by zero, then xmmFs = +/- fmax + + bjmp32 = JMP32(0); + x86SetJ32(ajmp32); + + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt); + mVUclamp1(xmmFs, xmmFt, 8); + + x86SetJ32(bjmp32); + + mVUunpack_xyzw(xmmFs, xmmFs, 0); + mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8); + } +} +microVUf(void) mVU_SQRT() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + //u8* pjmp; + getReg5(xmmFt, _Ft_, _Ftf_); + + //AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags + /* Check for negative sqrt */ + //SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmFt); + //AND32ItoR(gprT1, 1); //Check sign + //pjmp = JZ8(0); //Skip if none are + // OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt + //x86SetJ8(pjmp); + + SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(xmmFt, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive) + SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt); + mVUunpack_xyzw(xmmFt, xmmFt, 0); + mVUmergeRegs(xmmPQ, xmmFt, writeQ ? 4 : 8); + } +} +microVUf(void) mVU_RSQRT() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + u8 *ajmp8, *bjmp8; + + getReg5(xmmFs, _Fs_, _Fsf_); + getReg5(xmmFt, _Ft_, _Ftf_); + + //AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags + /* Check for negative divide */ + //SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); + //AND32ItoR(gprT1, 1); //Check sign + //ajmp8 = JZ8(0); //Skip if none are + // OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt + //x86SetJ8(ajmp8); + + SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt + SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt); + + // Ft can still be zero here! so we need to check if its zero and set the correct flag. + SSE_XORPS_XMM_to_XMM(xmmT1, xmmT1); // Clear t1reg + SSE_CMPEQSS_XMM_to_XMM(xmmT1, xmmFt); // Set all F's if each vector is zero + + SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); // Move the sign bits of the previous calculation + + AND32ItoR(gprT1, 1); // Grab "Is Zero" bits from the previous calculation + ajmp8 = JZ8(0); // Skip if none are + //OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide flag + SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); + SSE_ORPS_M128_to_XMM(xmmFs, (uptr)mVU_maxvals); // EEREC_TEMP = +/-Max + bjmp8 = JMP8(0); + x86SetJ8(ajmp8); + SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt); + mVUclamp1(xmmFs, xmmFt, 8); + x86SetJ8(bjmp8); + + mVUunpack_xyzw(xmmFs, xmmFs, 0); + mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8); + } +} + +microVUf(void) mVU_EATAN() {} +microVUf(void) mVU_EATANxy() {} +microVUf(void) mVU_EATANxz() {} +microVUf(void) mVU_EEXP() {} +microVUf(void) mVU_ELENG() {} +microVUf(void) mVU_ERCPR() {} +microVUf(void) mVU_ERLENG() {} +microVUf(void) mVU_ERSADD() {} +microVUf(void) mVU_ERSQRT() {} +microVUf(void) mVU_ESADD() {} +microVUf(void) mVU_ESIN() {} +microVUf(void) mVU_ESQRT() {} +microVUf(void) mVU_ESUM() {} + +microVUf(void) mVU_FCAND() {} +microVUf(void) mVU_FCEQ() {} +microVUf(void) mVU_FCOR() {} +microVUf(void) mVU_FCSET() {} +microVUf(void) mVU_FCGET() {} microVUf(void) mVU_FMAND() { microVU* mVU = mVUx; @@ -119,29 +226,89 @@ microVUf(void) mVU_FSSET() { } } -microVUf(void) mVU_IADD(){} -microVUf(void) mVU_IADDI(){} -microVUf(void) mVU_IADDIU(){} -microVUf(void) mVU_IAND(){} -microVUf(void) mVU_IOR(){} -microVUf(void) mVU_ISUB(){} -microVUf(void) mVU_ISUBIU(){} +microVUf(void) mVU_IADD() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + ADD16RtoR(gprT1, gprT2); + mVUallocVIb(gprT1, _Fd_); + } +} +microVUf(void) mVU_IADDI() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT1, _Fs_); + ADD16ItoR(gprT1, _Imm5_); + mVUallocVIb(gprT1, _Ft_); + } +} +microVUf(void) mVU_IADDIU() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT1, _Fs_); + ADD16ItoR(gprT1, _Imm12_); + mVUallocVIb(gprT1, _Ft_); + } +} +microVUf(void) mVU_IAND() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + AND32RtoR(gprT1, gprT2); + mVUallocVIb(gprT1, _Fd_); + } +} +microVUf(void) mVU_IOR() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + OR32RtoR(gprT1, gprT2); + mVUallocVIb(gprT1, _Fd_); + } +} +microVUf(void) mVU_ISUB() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT1, _Fs_); + mVUallocVIa(gprT2, _Ft_); + SUB16RtoR(gprT1, gprT2); + mVUallocVIb(gprT1, _Fd_); + } +} +microVUf(void) mVU_ISUBIU() { + microVU* mVU = mVUx; + if (recPass == 0) {} + else { + mVUallocVIa(gprT1, _Fs_); + SUB16ItoR(gprT1, _Imm12_); + mVUallocVIb(gprT1, _Ft_); + } +} -microVUf(void) mVU_B(){} -microVUf(void) mVU_BAL(){} -microVUf(void) mVU_IBEQ(){} -microVUf(void) mVU_IBGEZ(){} -microVUf(void) mVU_IBGTZ(){} -microVUf(void) mVU_IBLTZ(){} -microVUf(void) mVU_IBLEZ(){} -microVUf(void) mVU_IBNE(){} -microVUf(void) mVU_JR(){} -microVUf(void) mVU_JALR(){} +microVUf(void) mVU_B() {} +microVUf(void) mVU_BAL() {} +microVUf(void) mVU_IBEQ() {} +microVUf(void) mVU_IBGEZ() {} +microVUf(void) mVU_IBGTZ() {} +microVUf(void) mVU_IBLTZ() {} +microVUf(void) mVU_IBLEZ() {} +microVUf(void) mVU_IBNE() {} +microVUf(void) mVU_JR() {} +microVUf(void) mVU_JALR() {} -microVUf(void) mVU_ILW(){} -microVUf(void) mVU_ISW(){} -microVUf(void) mVU_ILWR(){} -microVUf(void) mVU_ISWR(){} +microVUf(void) mVU_ILW() {} +microVUf(void) mVU_ISW() {} +microVUf(void) mVU_ILWR() {} +microVUf(void) mVU_ISWR() {} microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; @@ -156,8 +323,7 @@ microVUf(void) mVU_MFIR() { if (recPass == 0) {} else { mVUallocVIa(gprT1, _Fs_); - SHL32ItoR(gprT1, 16); - SAR32ItoR(gprT1, 16); + MOVSX32R16toR(gprT1, gprT1); SSE2_MOVD_R_to_XMM(xmmT1, gprT1); if (!_XYZW_SS) { mVUunpack_xyzw(xmmT1, xmmT1, 0); } mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -189,23 +355,23 @@ microVUf(void) mVU_MR32() { } } -microVUf(void) mVU_LQ(){} -microVUf(void) mVU_LQD(){} -microVUf(void) mVU_LQI(){} -microVUf(void) mVU_SQ(){} -microVUf(void) mVU_SQD(){} -microVUf(void) mVU_SQI(){} -//microVUf(void) mVU_LOI(){} +microVUf(void) mVU_LQ() {} +microVUf(void) mVU_LQD() {} +microVUf(void) mVU_LQI() {} +microVUf(void) mVU_SQ() {} +microVUf(void) mVU_SQD() {} +microVUf(void) mVU_SQI() {} +//microVUf(void) mVU_LOI() {} -microVUf(void) mVU_RINIT(){} -microVUf(void) mVU_RGET(){} -microVUf(void) mVU_RNEXT(){} -microVUf(void) mVU_RXOR(){} +microVUf(void) mVU_RINIT() {} +microVUf(void) mVU_RGET() {} +microVUf(void) mVU_RNEXT() {} +microVUf(void) mVU_RXOR() {} -microVUf(void) mVU_WAITP(){} -microVUf(void) mVU_WAITQ(){} +microVUf(void) mVU_WAITP() {} +microVUf(void) mVU_WAITQ() {} -microVUf(void) mVU_XGKICK(){} +microVUf(void) mVU_XGKICK() {} microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; if (recPass == 0) {} diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 0c215ab820..a6dc6233ca 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -61,6 +61,8 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define _Imm11_ (s32)(mVU->code & 0x400 ? 0xfffffc00 | (mVU->code & 0x3ff) : mVU->code & 0x3ff) #define _UImm11_ (s32)(mVU->code & 0x7ff) #define _Imm12_ (((mVU->code >> 21 ) & 0x1) << 11) | (mVU->code & 0x7ff) +#define _Imm5_ (((mVU->code & 0x400) ? 0xfff0 : 0) | ((mVU->code >> 6) & 0xf)) +#define _Imm15_ (((mVU->code >> 10) & 0x7800) | (mVU->code & 0x7ff)) #define xmmT1 0 // Temp Reg #define xmmFs 1 // Holds the Value of Fs (writes back result Fd)