diff --git a/pcsx2/x86/iFPU.c b/pcsx2/x86/iFPU.c index 7aaa844503..180b27f422 100644 --- a/pcsx2/x86/iFPU.c +++ b/pcsx2/x86/iFPU.c @@ -50,8 +50,8 @@ extern PCSX2_ALIGNED16_DECL(u32 g_minvals[4]); extern PCSX2_ALIGNED16_DECL(u32 g_maxvals[4]); -static u32 PCSX2_ALIGNED16(s_neg[4]) = { 0x80000000, 0, 0, 0 }; -static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0, 0, 0 }; +static u32 PCSX2_ALIGNED16(s_neg[4]) = { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff }; +static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; #define REC_FPUBRANCH(f) \ void f(); \ diff --git a/pcsx2/x86/iVUmicro.c b/pcsx2/x86/iVUmicro.c index 443c657e3f..99c75e66ff 100644 --- a/pcsx2/x86/iVUmicro.c +++ b/pcsx2/x86/iVUmicro.c @@ -3665,480 +3665,83 @@ void recVUMI_CLIP(VURegs *VU, int info) /* VU Lower instructions */ /******************************/ PCSX2_ALIGNED16(u64 DIV_TEMP_XMM[2]); -PCSX2_ALIGNED16(u64 DIV_TEMP_XMM2[2]); void recVUMI_DIV(VURegs *VU, int info) { - int t1reg, t2reg; - u8* pjmp; - u8* pjmp1; - u32* pjmp2; - u32* pjmp32; + int t1reg, t1boolean, vftemp; + u8* pjmp, * pjmp1; + u32* pjmp2, * pjmp32; + //SysPrintf("VU DIV Opcode \n"); AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags + + vftemp = ALLOCTEMPX86(MODE_8BITREG); + if (vftemp < 0) {SysPrintf("VU: SQRT allocation error!!!\n"); vftemp = EAX;} - if( _Fs_ == 0 ) { - - if( _Ft_ == 0 ) { - if( _Fsf_ < 3 ) { // 0/ft - if( _Ftf_ < 3 ) { // 0/0 - //SysPrintf("DIV 0/0\n"); - OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag (only when 0/0) - MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x7f7fffff); - } - else { // 0/1 ----- zero divided by 1 is zero! :p - //SysPrintf("DIV 0/0\n"); - MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x00000000); - } - } - else if( _Ftf_ < 3 ) { // 1/0 - //SysPrintf("DIV 1/0\n"); - OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); //Zero divide (only when not 0/0) - MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x7f7fffff); - } - else { // 1/1 - //SysPrintf("DIV 1/1\n"); - MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x3f800000); - } - return; - } - - if( _Fsf_ == 3 ) // = 1 - { // don't use RCPSS (very bad precision) - if( _Ftf_ != 0 || (xmmregs[EEREC_T].mode & MODE_WRITE) ) - { - if( _Ftf_ ) - { - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) // 1/n ---- needs work, ft can also be zero! - { - //SysPrintf("DIV: Fixed! 1 \n"); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); - - if (CHECK_EXTRA_OVERFLOW) - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero - - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0) - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- EEREC_TEMP - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1 - SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); // t1reg = 1 / EEREC_TEMP - vuFloat2(t1reg, t1reg, 0x8); // check for overflow - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); // q <- t1reg - - x86SetJ32(pjmp2); - - _freeXMMreg(t1reg); - - return; - } - else // 1/n ---- needs work, ft can also be zero! - { - //SysPrintf("DIV: Fixed! 2 \n"); - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); - - if (CHECK_EXTRA_OVERFLOW) - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - - t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1); // find a xmm reg thats not EEREC_TEMP - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address - - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero - - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0) - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- EEREC_TEMP - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1 - SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); // t1reg = 1 / EEREC_TEMP - vuFloat2(t1reg, t1reg, 0x8); // check for overflow - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); // q <- t1reg - - x86SetJ32(pjmp2); - - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore data to t1reg - - return; - } - } - else - { // 1/n ---- (SS) needs work, ft can also be zero! - //SysPrintf("DIV: Fixed! 3 \n"); - - if (CHECK_EXTRA_OVERFLOW) - vuFloat2(EEREC_T, EEREC_TEMP, 0x8); - - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero - - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0) - SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // EEREC_TEMP <- EEREC_T - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- EEREC_TEMP - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1 - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // EEREC_TEMP = 1 / EEREC_T - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); // check for overflow - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- t1reg - - x86SetJ32(pjmp2); - - return; - } - } - else { // 1/n ---- (SS) needs work, ft can also be zero! - //SysPrintf("DIV: Fixed! 4 \n"); - - t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1); // find a xmm reg thats not EEREC_TEMP - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address - - SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&VU->VF[_Ft_].UL[_Ftf_]); // t1reg.x <- Ft.Ftf - - if (CHECK_EXTRA_OVERFLOW) - vuFloat2(t1reg, EEREC_TEMP, 0x8); - - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, t1reg); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero - - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0) - SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(t1reg, (uptr)&g_maxvals[0]); // If 0, then t1reg = +/- fmax - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); // q <- t1reg - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1 - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg); // EEREC_TEMP = 1 / t1reg - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); // check for overflow - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- t1reg - - x86SetJ32(pjmp2); - - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore data to t1reg - - return; - } - } - else { // 0/n ---- So result is +/- 0, or +/- Fmax if (FT == 0) - //SysPrintf("FS = 0, FT = n \n"); - - if( _Ftf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - else _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); // EEREC_TEMP.x <- EEREC_T.ftf - - t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1); - //t2reg = (EEREC_TEMP <= 1) ? (EEREC_TEMP + 2) : (EEREC_TEMP - 2); - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address - - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip if none are - - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0) - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); // If != 0, then EEREC_TEMP = +/- 0 - - x86SetJ32(pjmp2); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore t1reg data - - return; - } + t1reg = _vuGetTempXMMreg(info); + if( t1reg < 0 ) { + for (t1reg = 0; ( (t1reg == EEREC_TEMP) || (t1reg == EEREC_S)|| (t1reg == EEREC_T) ); t1reg++) + ; // Makes t1reg not be EEREC_TEMP, EEREC_S, or EEREC_T. + SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address + t1boolean = 1; } - else { // _Fs_ != 0 - if( _Ft_ == 0 ) { - if( _Ftf_ < 3 ) { // needs extra work, fs can also be zero! - //SysPrintf("DIV: FS = n, FT == 0 ---- Finished! \n"); + else t1boolean = 0; - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); // EEREC_TEMP.x <- EEREC_S.fsf + // FT can be zero here! so we need to check if its zero and set the correct flag. + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // Set all F's if each vector is zero - t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1); // find a xmm reg thats not EEREC_TEMP - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address + SSE_MOVMSKPS_XMM_to_R32( vftemp, EEREC_TEMP); // Move the sign bits of the previous calculation - // FS can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero + AND32ItoR( vftemp, (1<<_Ftf_) ); // Grab "Is Zero" bits from the previous calculation + pjmp32 = JZ32(0); // Skip if none are - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // Set all F's if each vector is zero + SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP); // Move the sign bits of the previous calculation - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) - pjmp1 = JMP8(0); - x86SetJ8(pjmp); - OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide (only when not 0/0) - x86SetJ8(pjmp1); - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore data to t1reg - } - else { - //SysPrintf("DIV: FS = n, FT == 1 \n"); - if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - if (CHECK_EXTRA_OVERFLOW) - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - } - - return; - } + AND32ItoR( vftemp, (1<<_Fsf_) ); // Grab "Is Zero" bits from the previous calculation + pjmp = JZ8(0); + OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) + pjmp1 = JMP8(0); + x86SetJ8(pjmp); + OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) + x86SetJ8(pjmp1); if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + if( _Ftf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_T); + else _unpackVF_xyzw(t1reg, EEREC_T, _Ftf_); - if( _Ftf_ ) - { - t1reg = _vuGetTempXMMreg(info); + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, t1reg); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); + SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax - if( t1reg >= 0 ) - { - //SysPrintf("Second Half of DIV Opcode: Fixed 1 \n"); - _unpackVFSS_xyzw(t1reg, EEREC_T, _Ftf_); + pjmp2 = JMP32(0); - if (CHECK_EXTRA_OVERFLOW) { - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - vuFloat2(t1reg, t1reg, 0x8); - } - - for (t2reg = 0; ( (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Makes t2reg not be EEREC_TEMP or t1reg. + x86SetJ32(pjmp32); - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM2[0], t2reg ); // backup data in t2reg to a temp address + if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + if( _Ftf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_T); + else _unpackVF_xyzw(t1reg, EEREC_T, _Ftf_); - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t2reg, t1reg); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32( EAX, t2reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip if none are - - SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_TEMP); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(EAX, t2reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) - pjmp1 = JMP8(0); - x86SetJ8(pjmp); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) - x86SetJ8(pjmp1); - - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, t1reg); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg); - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - - x86SetJ32(pjmp2); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - - SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)&DIV_TEMP_XMM2[0] ); // restore t2reg data - _freeXMMreg(t1reg); // free t1reg - - return; - } - else - { - //SysPrintf("Second Half of DIV Opcode: Fixed 2 \n"); - t1reg = EEREC_T; - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address - _unpackVFSS_xyzw(t1reg, EEREC_T, _Ftf_); - - if (CHECK_EXTRA_OVERFLOW) { - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - vuFloat2(t1reg, t1reg, 0x8); - } - - for (t2reg = 0; ( (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Makes t2reg not be EEREC_TEMP or t1reg. - - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM2[0], t2reg ); // backup data in t2reg to a temp address - - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t2reg, t1reg); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32( EAX, t2reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip if none are - - SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_TEMP); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(EAX, t2reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) - pjmp1 = JMP8(0); - x86SetJ8(pjmp); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) - x86SetJ8(pjmp1); - - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, t1reg); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg); - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - - x86SetJ32(pjmp2); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore t1reg data - SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)&DIV_TEMP_XMM2[0] ); // restore t2reg data - - return; - } - } - else - { - //SysPrintf("Second Half of DIV Opcode: Fixed 3 \n"); - - if (CHECK_EXTRA_OVERFLOW) { - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - vuFloat2(EEREC_T, EEREC_T, 0x8); - } - - for (t2reg = 0; ( (t2reg == EEREC_TEMP) || (t2reg == EEREC_T) ); t2reg++) - ; // Makes t2reg not be EEREC_TEMP or EEREC_T. - - SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM2[0], t2reg ); // backup data in t2reg to a temp address - - // FT can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_T); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32( EAX, t2reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp32 = JZ32(0); // Skip if none are - - SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg - XOR32RtoR( EAX, EAX ); // Clear EAX - SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_TEMP); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(EAX, t2reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) - pjmp1 = JMP8(0); - x86SetJ8(pjmp); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) - x86SetJ8(pjmp1); - - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax - - pjmp2 = JMP32(0); - - x86SetJ32(pjmp32); - - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); - - x86SetJ32(pjmp2); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)&DIV_TEMP_XMM2[0] ); // restore t2reg data - - return; - } + if (CHECK_EXTRA_OVERFLOW) { + vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); + vuFloat2(t1reg, t1reg, 0x8); } - vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg); + if (CHECK_OVERFLOW) vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); + + x86SetJ32(pjmp2); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); + + if (t1boolean) SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore t1reg data + else _freeXMMreg(t1reg); // free t1reg + + _freeX86reg(vftemp); // free vftemp } void recVUMI_SQRT( VURegs *VU, int info ) @@ -5187,7 +4790,7 @@ void recVUMI_RNEXT( VURegs *VU, int info ) int rreg, x86temp0, x86temp1; if ( _Ft_ == 0) return; - SysPrintf("VU RNEXT Opcode \n"); + //SysPrintf("VU RNEXT Opcode \n"); rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ);