microVU: some lower opcode changes...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@861 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-03-30 07:43:37 +00:00
parent e0c3ecbc8f
commit fa134fb61b
1 changed files with 102 additions and 54 deletions

View File

@ -23,110 +23,108 @@
// Micro VU Micromode Lower instructions
//------------------------------------------------------------------
#define testZero(xmmReg, xmmTemp, gprTemp) { \
SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \
SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if each vector is zero */ \
SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmTemp); /* Move the sign bits */ \
TEST32ItoR(gprTemp, 1); /* Test "Is Zero" bit */ \
}
microVUf(void) mVU_DIV() {
microVU* mVU = mVUx;
if (!recPass) { mVUanalyzeFDIV<vuIndex>(_Fs_, _Fsf_, _Ft_, _Ftf_); }
else {
u8 *pjmp, *pjmp1;
u32 *ajmp32, *bjmp32;
u8 *ajmp, *bjmp, *cjmp, *djmp;
getReg5(xmmFs, _Fs_, _Fsf_);
getReg5(xmmFt, _Ft_, _Ftf_);
// FT can be zero here! so we need to check if its zero and set the correct flag.
SSE_XORPS_XMM_to_XMM(xmmT1, xmmT1); // Clear xmmT1
SSE_CMPEQPS_XMM_to_XMM(xmmT1, xmmFt); // Set all F's if each vector is zero
SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); // Move the sign bits of the previous calculation
testZero(xmmFt, xmmT1, gprT1); // Test if Ft is zero
cjmp = JZ8(0); // Skip if not zero
AND32ItoR(gprT1, 1); // Grab "Is Zero" bits from the previous calculation
ajmp32 = JZ32(0); // Skip if none are
SSE_XORPS_XMM_to_XMM(xmmT1, xmmT1); // Clear xmmT1
SSE_CMPEQPS_XMM_to_XMM(xmmT1, xmmFs); // Set all F's if each vector is zero
SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); // Move the sign bits of the previous calculation
AND32ItoR(gprT1, 1); // Grab "Is Zero" bits from the previous calculation
pjmp = JZ8(0);
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
ajmp = JZ8(0);
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
pjmp1 = JMP8(0);
x86SetJ8(pjmp);
bjmp = JMP8(0);
x86SetJ8(ajmp);
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide (only when not 0/0)
x86SetJ8(pjmp1);
x86SetJ8(bjmp);
SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt);
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // If division by zero, then xmmFs = +/- fmax
bjmp32 = JMP32(0);
x86SetJ32(ajmp32);
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt);
mVUclamp1<vuIndex>(xmmFs, xmmFt, 8);
x86SetJ32(bjmp32);
djmp = JMP8(0);
x86SetJ8(cjmp);
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt);
mVUclamp1<vuIndex>(xmmFs, xmmFt, 8);
x86SetJ8(djmp);
mVUunpack_xyzw<vuIndex>(xmmFs, xmmFs, 0);
mVUmergeRegs<vuIndex>(xmmPQ, xmmFs, writeQ ? 4 : 8);
}
}
microVUf(void) mVU_SQRT() {
microVU* mVU = mVUx;
if (!recPass) { mVUanalyzeFDIV<vuIndex>(0, 0, _Ft_, _Ftf_); }
else {
//u8* pjmp;
u8* ajmp;
getReg5(xmmFt, _Ft_, _Ftf_);
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
//AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags
/* Check for negative sqrt */
//SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmFt);
//AND32ItoR(gprT1, 1); //Check sign
//pjmp = JZ8(0); //Skip if none are
// OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt
//x86SetJ8(pjmp);
SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmFt);
AND32ItoR(gprT1, 1); //Check sign
ajmp = JZ8(0); //Skip if none are
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Invalid Flag - Negative number sqrt
SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt
x86SetJ8(ajmp);
SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt
if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
mVUunpack_xyzw<vuIndex>(xmmFt, xmmFt, 0);
mVUmergeRegs<vuIndex>(xmmPQ, xmmFt, writeQ ? 4 : 8);
}
}
microVUf(void) mVU_RSQRT() {
microVU* mVU = mVUx;
if (!recPass) { mVUanalyzeFDIV<vuIndex>(_Fs_, _Fsf_, _Ft_, _Ftf_); }
else {
u8 *ajmp8, *bjmp8;
u8 *ajmp8, *bjmp8, *cjmp8, *djmp8;
getReg5(xmmFs, _Fs_, _Fsf_);
getReg5(xmmFt, _Ft_, _Ftf_);
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
//AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags
/* Check for negative divide */
//SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1);
//AND32ItoR(gprT1, 1); //Check sign
//ajmp8 = JZ8(0); //Skip if none are
// OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt
//x86SetJ8(ajmp8);
SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1);
AND32ItoR(gprT1, 1); //Check sign
ajmp8 = JZ8(0); //Skip if none are
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Invalid Flag - Negative number sqrt
SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt
x86SetJ8(ajmp8);
SSE_ANDPS_M128_to_XMM(xmmFt, (uptr)mVU_absclip); // Do a cardinal sqrt
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
testZero(xmmFt, xmmT1, gprT1); // Test if Ft is zero
ajmp8 = JZ8(0); // Skip if not zero
// Ft can still be zero here! so we need to check if its zero and set the correct flag.
SSE_XORPS_XMM_to_XMM(xmmT1, xmmT1); // Clear t1reg
SSE_CMPEQSS_XMM_to_XMM(xmmT1, xmmFt); // Set all F's if each vector is zero
testZero(xmmFs, xmmT1, gprT1); // Test if Fs is zero
bjmp8 = JZ8(0); // Skip if none are
MOV32ItoM((uptr)&mVU->divFlag, 0x410); // Set invalid flag (0/0)
cjmp8 = JMP8(0);
x86SetJ8(bjmp8);
MOV32ItoM((uptr)&mVU->divFlag, 0x820); // Zero divide flag (only when not 0/0)
x86SetJ8(cjmp8);
SSE_MOVMSKPS_XMM_to_R32(gprT1, xmmT1); // Move the sign bits of the previous calculation
AND32ItoR(gprT1, 1); // Grab "Is Zero" bits from the previous calculation
ajmp8 = JZ8(0); // Skip if none are
//OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide flag
SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit);
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // EEREC_TEMP = +/-Max
bjmp8 = JMP8(0);
SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // xmmFs = +/-Max
djmp8 = JMP8(0);
x86SetJ8(ajmp8);
SSE_DIVSS_XMM_to_XMM(xmmFs, xmmFt);
mVUclamp1<vuIndex>(xmmFs, xmmFt, 8);
x86SetJ8(bjmp8);
x86SetJ8(djmp8);
mVUunpack_xyzw<vuIndex>(xmmFs, xmmFs, 0);
mVUmergeRegs<vuIndex>(xmmPQ, xmmFs, writeQ ? 4 : 8);
@ -140,6 +138,7 @@ microVUf(void) mVU_RSQRT() {
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \
}
microVUt(void) mVU_EATAN_() {
microVU* mVU = mVUx;
@ -159,6 +158,7 @@ microVUt(void) mVU_EATAN_() {
SSE_ADDSS_M32_to_XMM(xmmPQ, (uptr)mVU_Pi4);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6);
}
microVUf(void) mVU_EATAN() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -174,6 +174,7 @@ microVUf(void) mVU_EATAN() {
mVU_EATAN_<vuIndex>();
}
}
microVUf(void) mVU_EATANxy() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -190,6 +191,7 @@ microVUf(void) mVU_EATANxy() {
mVU_EATAN_<vuIndex>();
}
}
microVUf(void) mVU_EATANxz() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -206,12 +208,14 @@ microVUf(void) mVU_EATANxz() {
mVU_EATAN_<vuIndex>();
}
}
#define eexpHelper(addr) { \
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \
SSE_MOVAPS_XMM_to_XMM(xmmFt, xmmT1); \
SSE_MULSS_M32_to_XMM(xmmFt, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFt); \
}
microVUf(void) mVU_EEXP() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -243,6 +247,7 @@ microVUf(void) mVU_EEXP() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUt(void) mVU_sumXYZ() {
// regd.x = x ^ 2 + y ^ 2 + z ^ 2
if( cpucaps.hasStreamingSIMD4Extensions ) {
@ -258,6 +263,7 @@ microVUt(void) mVU_sumXYZ() {
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); // x ^ 2 + y ^ 2 + z ^ 2
}
}
microVUf(void) mVU_ELENG() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -269,6 +275,7 @@ microVUf(void) mVU_ELENG() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ERCPR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -282,6 +289,7 @@ microVUf(void) mVU_ERCPR() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ERLENG() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -296,6 +304,7 @@ microVUf(void) mVU_ERLENG() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ERSADD() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -310,6 +319,7 @@ microVUf(void) mVU_ERSADD() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ERSQRT() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -323,6 +333,7 @@ microVUf(void) mVU_ERSQRT() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ESADD() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -333,12 +344,14 @@ microVUf(void) mVU_ESADD() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
#define esinHelper(addr) { \
SSE_MULSS_XMM_to_XMM(xmmT1, xmmFt); \
SSE_MOVAPS_XMM_to_XMM(xmmFs, xmmT1); \
SSE_MULSS_M32_to_XMM(xmmFs, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmFs); \
}
microVUf(void) mVU_ESIN() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -364,6 +377,7 @@ microVUf(void) mVU_ESIN() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ESQRT() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -374,6 +388,7 @@ microVUf(void) mVU_ESQRT() {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back
}
}
microVUf(void) mVU_ESUM() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -400,6 +415,7 @@ microVUf(void) mVU_FCAND() {
mVUallocVIb<vuIndex>(gprT1, 1);
}
}
microVUf(void) mVU_FCEQ() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -411,6 +427,7 @@ microVUf(void) mVU_FCEQ() {
mVUallocVIb<vuIndex>(gprT1, 1);
}
}
microVUf(void) mVU_FCGET() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -420,6 +437,7 @@ microVUf(void) mVU_FCGET() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_FCOR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -431,6 +449,7 @@ microVUf(void) mVU_FCOR() {
mVUallocVIb<vuIndex>(gprT1, 1);
}
}
microVUf(void) mVU_FCSET() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -450,6 +469,7 @@ microVUf(void) mVU_FMAND() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_FMEQ() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -462,6 +482,7 @@ microVUf(void) mVU_FMEQ() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_FMOR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -482,6 +503,7 @@ microVUf(void) mVU_FSAND() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_FSEQ() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -493,6 +515,7 @@ microVUf(void) mVU_FSEQ() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_FSOR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -502,6 +525,7 @@ microVUf(void) mVU_FSOR() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_FSSET() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -528,6 +552,7 @@ microVUf(void) mVU_IADD() {
mVUallocVIb<vuIndex>(gprT1, _Fd_);
}
}
microVUf(void) mVU_IADDI() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -537,6 +562,7 @@ microVUf(void) mVU_IADDI() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_IADDIU() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -546,6 +572,7 @@ microVUf(void) mVU_IADDIU() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_IAND() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -558,6 +585,7 @@ microVUf(void) mVU_IAND() {
mVUallocVIb<vuIndex>(gprT1, _Fd_);
}
}
microVUf(void) mVU_IOR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -570,6 +598,7 @@ microVUf(void) mVU_IOR() {
mVUallocVIb<vuIndex>(gprT1, _Fd_);
}
}
microVUf(void) mVU_ISUB() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -586,6 +615,7 @@ microVUf(void) mVU_ISUB() {
else { PXORRtoR(mmVI(_Fd_), mmVI(_Fd_)); }
}
}
microVUf(void) mVU_ISUBIU() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -604,6 +634,7 @@ microVUf(void) mVU_MOVE() {
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_MFIR() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
@ -615,6 +646,7 @@ microVUf(void) mVU_MFIR() {
mVUsaveReg<vuIndex>(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_MFP() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
@ -623,6 +655,7 @@ microVUf(void) mVU_MFP() {
mVUsaveReg<vuIndex>(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W);
}
}
microVUf(void) mVU_MTIR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -631,6 +664,7 @@ microVUf(void) mVU_MTIR() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_MR32() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
@ -659,6 +693,7 @@ microVUf(void) mVU_ILW() {
}
}
}
microVUf(void) mVU_ILWR() {
microVU* mVU = mVUx;
if (!recPass) { /*If (!_Ft_) nop();*/ }
@ -676,6 +711,7 @@ microVUf(void) mVU_ILWR() {
}
}
}
microVUf(void) mVU_ISW() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -700,6 +736,7 @@ microVUf(void) mVU_ISW() {
}
}
}
microVUf(void) mVU_ISWR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -740,6 +777,7 @@ microVUf(void) mVU_LQ() {
}
}
}
microVUf(void) mVU_LQD() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -760,6 +798,7 @@ microVUf(void) mVU_LQD() {
}
}
}
microVUf(void) mVU_LQI() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -781,6 +820,7 @@ microVUf(void) mVU_LQI() {
}
}
}
microVUf(void) mVU_SQ() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -798,6 +838,7 @@ microVUf(void) mVU_SQ() {
}
}
}
microVUf(void) mVU_SQD() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -816,6 +857,7 @@ microVUf(void) mVU_SQD() {
}
}
}
microVUf(void) mVU_SQI() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -848,6 +890,7 @@ microVUf(void) mVU_RINIT() {
else MOV32ItoR(gprR, 0x3f800000);
}
}
microVUt(void) mVU_RGET_() {
microVU* mVU = mVUx;
if (_Ft_) {
@ -857,11 +900,13 @@ microVUt(void) mVU_RGET_() {
if (_W) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[3], gprR);
}
}
microVUf(void) mVU_RGET() {
microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ }
else { mVU_RGET_<vuIndex>(); }
}
microVUf(void) mVU_RNEXT() {
microVU* mVU = mVUx;
if (!recPass) { /*if (!_Ft_) nop();*/ }
@ -883,6 +928,7 @@ microVUf(void) mVU_RNEXT() {
mVU_RGET_<vuIndex>();
}
}
microVUf(void) mVU_RXOR() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -900,6 +946,7 @@ microVUf(void) mVU_WAITP() {
if (!recPass) {}
else {}
}
microVUf(void) mVU_WAITQ() {
microVU* mVU = mVUx;
if (!recPass) {}
@ -914,6 +961,7 @@ microVUf(void) mVU_XTOP() {
mVUallocVIb<vuIndex>(gprT1, _Ft_);
}
}
microVUf(void) mVU_XITOP() {
microVU* mVU = mVUx;
if (!recPass) {}