mirror of https://github.com/PCSX2/pcsx2.git
Nneeve worked a bit on our lovely floating point cpu's (mis)ability to do rsqrt the way the ps2 does.
Should be better now ;) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@822 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
382431ebcb
commit
aea075320b
|
@ -1749,6 +1749,7 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
|
|||
{
|
||||
u8 *pjmp1, *pjmp2;
|
||||
u32 *pjmp32;
|
||||
u8 *qjmp1, *qjmp2;
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
|
||||
//if (t1reg == -1) {Console::Error("FPU: RSQRT Allocation Error!");}
|
||||
|
@ -1756,20 +1757,7 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
|
|||
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
|
||||
|
||||
/*--- Check for zero ---*/
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags
|
||||
SSE_XORPS_XMM_to_XMM(regd, t0reg); // Make regd Positive or Negative
|
||||
SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit
|
||||
SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum
|
||||
pjmp32 = JMP32(0);
|
||||
x86SetJ8(pjmp1);
|
||||
|
||||
/*--- Check for negative SQRT ---*/
|
||||
/*--- (first) Check for negative SQRT ---*/
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t0reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign
|
||||
pjmp2 = JZ8(0); //Skip if not set
|
||||
|
@ -1777,6 +1765,30 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
|
|||
SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive
|
||||
x86SetJ8(pjmp2);
|
||||
|
||||
/*--- Check for zero ---*/
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
/*--- Check for 0/0 ---*/
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, regd);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if regd == zero, sign will be set)
|
||||
qjmp1 = JZ8(0); //Skip if not set
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 )
|
||||
qjmp2 = JMP8(0);
|
||||
x86SetJ8(qjmp1); //x/0 but not 0/0
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 )
|
||||
x86SetJ8(qjmp2);
|
||||
|
||||
/*--- Make regd +/- Maximum ---*/
|
||||
SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit
|
||||
SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum
|
||||
pjmp32 = JMP32(0);
|
||||
x86SetJ8(pjmp1);
|
||||
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) {
|
||||
SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive
|
||||
fpuFloat2(regd);
|
||||
|
|
|
@ -886,7 +886,6 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
|
|||
void recSQRT_S_xmm(int info)
|
||||
{
|
||||
u8 *pjmp;
|
||||
u32 *pjmpx;
|
||||
static u32 PCSX2_ALIGNED16(roundmode_temp[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
|
||||
int roundmodeFlag = 0;
|
||||
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
|
||||
|
@ -907,15 +906,8 @@ void recSQRT_S_xmm(int info)
|
|||
|
||||
if (FPU_FLAGS_ID) {
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
|
||||
|
||||
//--- Check for zero (skip sqrt if zero)
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_D);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1);
|
||||
pjmpx = JNE32(0);
|
||||
|
||||
//--- Check for negative SQRT ---
|
||||
|
||||
//--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say)
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D);
|
||||
AND32ItoR(tempReg, 1); //Check sign
|
||||
pjmp = JZ8(0); //Skip if none are
|
||||
|
@ -934,9 +926,7 @@ void recSQRT_S_xmm(int info)
|
|||
SSE2_SQRTSD_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
|
||||
ToPS2FPU(EEREC_D, false, t1reg, false);
|
||||
|
||||
x86SetJ32(pjmpx);
|
||||
|
||||
|
||||
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
|
||||
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
|
||||
}
|
||||
|
@ -954,6 +944,7 @@ FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT);
|
|||
void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when regd <- Fs and regt <- Ft (Sets correct flags)
|
||||
{
|
||||
u8 *pjmp1, *pjmp2;
|
||||
u8 *qjmp1, *qjmp2;
|
||||
u32 *pjmp32;
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
|
||||
|
@ -962,19 +953,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
|
||||
|
||||
//--- Check for zero ---
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, regt);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if regt == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags (even when 0/0)
|
||||
SSE_XORPS_XMM_to_XMM(regd, regt); // Make regd Positive or Negative
|
||||
SetMaxValue(regd); //clamp to max
|
||||
pjmp32 = JMP32(0);
|
||||
x86SetJ8(pjmp1);
|
||||
|
||||
//--- Check for negative SQRT ---
|
||||
//--- (first) Check for negative SQRT ---
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, regt);
|
||||
AND32ItoR(tempReg, 1); //Check sign
|
||||
pjmp2 = JZ8(0); //Skip if not set
|
||||
|
@ -982,6 +961,29 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
SSE_ANDPS_M128_to_XMM(regt, (uptr)&s_pos[0]); // Make regt Positive
|
||||
x86SetJ8(pjmp2);
|
||||
|
||||
//--- Check for zero ---
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, regt);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if regt == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
|
||||
//--- Check for 0/0 ---
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, regd);
|
||||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if regd == zero, sign will be set)
|
||||
qjmp1 = JZ8(0); //Skip if not set
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 )
|
||||
qjmp2 = JMP8(0);
|
||||
x86SetJ8(qjmp1); //x/0 but not 0/0
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 )
|
||||
x86SetJ8(qjmp2);
|
||||
|
||||
SetMaxValue(regd); //clamp to max
|
||||
pjmp32 = JMP32(0);
|
||||
x86SetJ8(pjmp1);
|
||||
|
||||
ToDouble(regt); ToDouble(regd);
|
||||
|
||||
SSE2_SQRTSD_XMM_to_XMM(regt, regt);
|
||||
|
|
|
@ -181,6 +181,7 @@ PCSX2_ALIGNED16(u64 RSQRT_TEMP_XMM[2]);
|
|||
void recVUMI_RSQRT(VURegs *VU, int info)
|
||||
{
|
||||
u8 *ajmp8, *bjmp8;
|
||||
u8 *qjmp1, *qjmp2;
|
||||
int t1reg, t1boolean;
|
||||
//SysPrintf("recVUMI_RSQRT()\n");
|
||||
|
||||
|
@ -215,11 +216,24 @@ void recVUMI_RSQRT(VURegs *VU, int info)
|
|||
|
||||
AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation
|
||||
ajmp8 = JZ8(0); // Skip if none are
|
||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide flag
|
||||
|
||||
|
||||
//check for 0/0
|
||||
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
||||
|
||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear EEREC_TEMP
|
||||
SSE_CMPEQPS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
|
||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation
|
||||
|
||||
AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation
|
||||
qjmp1 = JZ8(0);
|
||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0)
|
||||
qjmp2 = JMP8(0);
|
||||
x86SetJ8(qjmp1);
|
||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0)
|
||||
x86SetJ8(qjmp2);
|
||||
|
||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
|
||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // EEREC_TEMP = +/-Max
|
||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax
|
||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
||||
bjmp8 = JMP8(0);
|
||||
x86SetJ8(ajmp8);
|
||||
|
|
Loading…
Reference in New Issue