Nneeve worked a bit on our lovely floating point cpu's (mis)ability to do rsqrt the way the ps2 does.

Should be better now ;)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@822 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
ramapcsx2 2009-03-19 19:47:06 +00:00
parent 382431ebcb
commit aea075320b
3 changed files with 71 additions and 43 deletions

View File

@ -1749,6 +1749,7 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
{
u8 *pjmp1, *pjmp2;
u32 *pjmp32;
u8 *qjmp1, *qjmp2;
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
//if (t1reg == -1) {Console::Error("FPU: RSQRT Allocation Error!");}
@ -1756,20 +1757,7 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
/*--- Check for zero ---*/
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags
SSE_XORPS_XMM_to_XMM(regd, t0reg); // Make regd Positive or Negative
SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit
SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum
pjmp32 = JMP32(0);
x86SetJ8(pjmp1);
/*--- Check for negative SQRT ---*/
/*--- (first) Check for negative SQRT ---*/
SSE_MOVMSKPS_XMM_to_R32(tempReg, t0reg);
AND32ItoR(tempReg, 1); //Check sign
pjmp2 = JZ8(0); //Skip if not set
@ -1777,6 +1765,30 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive
x86SetJ8(pjmp2);
/*--- Check for zero ---*/
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set
/*--- Check for 0/0 ---*/
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, regd);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if regd == zero, sign will be set)
qjmp1 = JZ8(0); //Skip if not set
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 )
qjmp2 = JMP8(0);
x86SetJ8(qjmp1); //x/0 but not 0/0
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 )
x86SetJ8(qjmp2);
/*--- Make regd +/- Maximum ---*/
SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit
SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum
pjmp32 = JMP32(0);
x86SetJ8(pjmp1);
if (CHECK_FPU_EXTRA_OVERFLOW) {
SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive
fpuFloat2(regd);

View File

@ -886,7 +886,6 @@ FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
void recSQRT_S_xmm(int info)
{
u8 *pjmp;
u32 *pjmpx;
static u32 PCSX2_ALIGNED16(roundmode_temp[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 };
int roundmodeFlag = 0;
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
@ -907,15 +906,8 @@ void recSQRT_S_xmm(int info)
if (FPU_FLAGS_ID) {
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
//--- Check for zero (skip sqrt if zero)
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_D);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1);
pjmpx = JNE32(0);
//--- Check for negative SQRT ---
//--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say)
SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D);
AND32ItoR(tempReg, 1); //Check sign
pjmp = JZ8(0); //Skip if none are
@ -934,9 +926,7 @@ void recSQRT_S_xmm(int info)
SSE2_SQRTSD_XMM_to_XMM(EEREC_D, EEREC_D);
ToPS2FPU(EEREC_D, false, t1reg, false);
x86SetJ32(pjmpx);
if (roundmodeFlag == 1) { // Set roundmode back if it was changed
SSE_LDMXCSR ((uptr)&roundmode_temp[1]);
}
@ -954,6 +944,7 @@ FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT);
void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when regd <- Fs and regt <- Ft (Sets correct flags)
{
u8 *pjmp1, *pjmp2;
u8 *qjmp1, *qjmp2;
u32 *pjmp32;
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0);
@ -962,19 +953,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags
//--- Check for zero ---
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, regt);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if regt == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags (even when 0/0)
SSE_XORPS_XMM_to_XMM(regd, regt); // Make regd Positive or Negative
SetMaxValue(regd); //clamp to max
pjmp32 = JMP32(0);
x86SetJ8(pjmp1);
//--- Check for negative SQRT ---
//--- (first) Check for negative SQRT ---
SSE_MOVMSKPS_XMM_to_R32(tempReg, regt);
AND32ItoR(tempReg, 1); //Check sign
pjmp2 = JZ8(0); //Skip if not set
@ -982,6 +961,29 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
SSE_ANDPS_M128_to_XMM(regt, (uptr)&s_pos[0]); // Make regt Positive
x86SetJ8(pjmp2);
//--- Check for zero ---
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, regt);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if regt == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set
//--- Check for 0/0 ---
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
SSE_CMPEQSS_XMM_to_XMM(t1reg, regd);
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if regd == zero, sign will be set)
qjmp1 = JZ8(0); //Skip if not set
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags ( 0/0 )
qjmp2 = JMP8(0);
x86SetJ8(qjmp1); //x/0 but not 0/0
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags ( x/0 )
x86SetJ8(qjmp2);
SetMaxValue(regd); //clamp to max
pjmp32 = JMP32(0);
x86SetJ8(pjmp1);
ToDouble(regt); ToDouble(regd);
SSE2_SQRTSD_XMM_to_XMM(regt, regt);

View File

@ -181,6 +181,7 @@ PCSX2_ALIGNED16(u64 RSQRT_TEMP_XMM[2]);
void recVUMI_RSQRT(VURegs *VU, int info)
{
u8 *ajmp8, *bjmp8;
u8 *qjmp1, *qjmp2;
int t1reg, t1boolean;
//SysPrintf("recVUMI_RSQRT()\n");
@ -215,11 +216,24 @@ void recVUMI_RSQRT(VURegs *VU, int info)
AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation
ajmp8 = JZ8(0); // Skip if none are
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide flag
//check for 0/0
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear EEREC_TEMP
SSE_CMPEQPS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation
AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation
qjmp1 = JZ8(0);
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0)
qjmp2 = JMP8(0);
x86SetJ8(qjmp1);
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0)
x86SetJ8(qjmp2);
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]);
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // EEREC_TEMP = +/-Max
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
bjmp8 = JMP8(0);
x86SetJ8(ajmp8);