mirror of https://github.com/PCSX2/pcsx2.git
recoded RSQRT opcode.
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@40 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
17b04bbd0c
commit
e096347d51
|
@ -4225,8 +4225,8 @@ void recVUMI_SQRT( VURegs *VU, int info )
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
|
||||||
x86SetJ8(pjmp);
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW)
|
if (CHECK_EXTRA_OVERFLOW) // Clamp Infinities to Fmax
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals);
|
||||||
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
||||||
|
@ -4235,117 +4235,109 @@ void recVUMI_SQRT( VURegs *VU, int info )
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PCSX2_ALIGNED16(u64 RSQRT_TEMP_XMM[2];)
|
||||||
|
|
||||||
void recVUMI_RSQRT(VURegs *VU, int info)
|
void recVUMI_RSQRT(VURegs *VU, int info)
|
||||||
{
|
{
|
||||||
int vftemp = ALLOCTEMPX86(MODE_8BITREG);
|
int vftemp = ALLOCTEMPX86(MODE_8BITREG);
|
||||||
u8* njmp;
|
int t1reg;
|
||||||
|
u32* ajmp32;
|
||||||
|
u32* bjmp32;
|
||||||
|
|
||||||
SysPrintf("RSQRT Opcode \n");
|
if( _Ftf_ )
|
||||||
if( _Ftf_ ) {
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_);
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
else
|
||||||
_unpackVF_xyzw(EEREC_TEMP, EEREC_TEMP, _Ftf_);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
||||||
}
|
|
||||||
/* Check for negative divide */
|
/* Check for negative divide */
|
||||||
XOR32RtoR(vftemp, vftemp);
|
XOR32RtoR(vftemp, vftemp);
|
||||||
SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP);
|
SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP);
|
||||||
AND32ItoR(vftemp, 1); //Check sign
|
AND32ItoR(vftemp, 1); //Check sign
|
||||||
njmp = JZ8(0); //Skip if none are
|
ajmp32 = JZ32(0); //Skip if none are
|
||||||
//SysPrintf("Invalid RSQRT\n");
|
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag - Negative number sqrt
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag - Negative number sqrt
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
|
x86SetJ32(ajmp32);
|
||||||
x86SetJ8(njmp);
|
|
||||||
|
|
||||||
//SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
|
if (CHECK_EXTRA_OVERFLOW) // Clamp Infinities to Fmax
|
||||||
|
SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals);
|
||||||
|
|
||||||
if( _Fs_ == 0 ) {
|
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||||
if( _Fsf_ == 3 ) {
|
|
||||||
if(_Ft_ != 0 ||_Ftf_ == 3 )
|
|
||||||
{
|
|
||||||
//SysPrintf("_Fs_ = 0.3 _Ft_ != 0 || _Ft_ = 0.3 \n");
|
|
||||||
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); //Dont use RSQRT, terrible accuracy
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
//SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
||||||
_unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
||||||
SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_Q, 0));
|
|
||||||
|
|
||||||
|
t1reg = _vuGetTempXMMreg(info);
|
||||||
|
|
||||||
}
|
if( t1reg >= 0 )
|
||||||
else
|
{
|
||||||
{
|
SysPrintf("RSQRT Opcode Part 1 \n");
|
||||||
//SysPrintf("FS0.3 / 0!\n");
|
// Ft can still be zero here! so we need to check if its zero and set the correct flag.
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); //Zero divide (only when not 0/0)
|
XOR32RtoR(vftemp, vftemp);
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x7f7fffff);
|
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
|
||||||
_freeX86reg(vftemp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}else {
|
|
||||||
if(_Ft_ != 0 || _Ftf_ == 3) {
|
|
||||||
//SysPrintf("FS = 0 FT != 0\n");
|
|
||||||
//SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); //Zero divide (only when not 0/0)
|
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x7f7fffff); // +MAX, no negative in here :p
|
|
||||||
_freeX86reg(vftemp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//SysPrintf("FS = 0 FT = 0!\n");
|
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag (only when 0/0)
|
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0);
|
|
||||||
_freeX86reg(vftemp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
int t1reg;
|
|
||||||
if( _Ft_ == 0 ) {
|
|
||||||
if( _Ftf_ < 3 ) {
|
|
||||||
//SysPrintf("FS != 0 FT = 0!\n");
|
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag (only when 0/0)
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_S);
|
|
||||||
SHL32ItoR(vftemp, 31); //Check sign
|
|
||||||
OR32ItoR(vftemp, 0x7f7fffff);
|
|
||||||
MOV32RtoM(VU_VI_ADDR(REG_Q, 0), vftemp);
|
|
||||||
_freeX86reg(vftemp);
|
|
||||||
return;
|
|
||||||
}else {
|
|
||||||
//SysPrintf("FS != 0 FT = 1!\n");
|
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); //Zero divide (only when not 0/0)
|
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0xff7fffff);
|
|
||||||
_freeX86reg(vftemp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
SSE_MOVMSKPS_XMM_to_R32(vftemp, t1reg); // Move the sign bits of the previous calculation
|
||||||
//SysPrintf("Normal RSQRT\n");
|
|
||||||
|
|
||||||
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
AND32ItoR( vftemp, 0x01 ); // Grab "Is Zero" bits from the previous calculation
|
||||||
|
ajmp32 = JZ32(0); // Skip if none are
|
||||||
|
|
||||||
t1reg = _vuGetTempXMMreg(info);
|
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide flag
|
||||||
|
|
||||||
if( t1reg >= 0 )
|
|
||||||
{
|
|
||||||
_unpackVFSS_xyzw(t1reg, EEREC_S, _Fsf_);
|
|
||||||
SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
|
||||||
_freeXMMreg(t1reg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
||||||
SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_Q, 0));
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
||||||
}
|
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = positive fmax
|
||||||
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
||||||
|
bjmp32 = JMP32(0);
|
||||||
|
|
||||||
|
x86SetJ32(ajmp32);
|
||||||
|
|
||||||
|
_unpackVFSS_xyzw(t1reg, EEREC_S, _Fsf_);
|
||||||
|
if (CHECK_EXTRA_OVERFLOW) // Clamp Infinities
|
||||||
|
vuFloat2(t1reg, t1reg, 0x8);
|
||||||
|
SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
||||||
|
vuFloat2(t1reg, t1reg, 0x8);
|
||||||
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg);
|
||||||
|
|
||||||
|
x86SetJ32(bjmp32);
|
||||||
|
|
||||||
|
_freeXMMreg(t1reg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SysPrintf("RSQRT Opcode Part 2 \n");
|
||||||
|
for (t1reg = 0; ( (t1reg == EEREC_TEMP) || (t1reg == EEREC_S) ); t1reg++)
|
||||||
|
; // Makes t1reg not be EEREC_TEMP or EEREC_S.
|
||||||
|
SSE_MOVAPS_XMM_to_M128( (uptr)&RSQRT_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address
|
||||||
|
|
||||||
|
// Ft can still be zero here! so we need to check if its zero and set the correct flag.
|
||||||
|
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
||||||
|
XOR32RtoR(vftemp, vftemp);
|
||||||
|
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
|
||||||
|
|
||||||
|
SSE_MOVMSKPS_XMM_to_R32(vftemp, t1reg); // Move the sign bits of the previous calculation
|
||||||
|
|
||||||
|
AND32ItoR( vftemp, 0x01 ); // Grab "Is Zero" bits from the previous calculation
|
||||||
|
ajmp32 = JZ32(0); // Skip if none are
|
||||||
|
|
||||||
|
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide flag
|
||||||
|
|
||||||
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
||||||
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
||||||
|
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = positive fmax
|
||||||
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
||||||
|
bjmp32 = JMP32(0);
|
||||||
|
|
||||||
|
x86SetJ32(ajmp32);
|
||||||
|
|
||||||
|
_unpackVFSS_xyzw(t1reg, EEREC_S, _Fsf_);
|
||||||
|
if (CHECK_EXTRA_OVERFLOW) // Clamp Infinities
|
||||||
|
vuFloat2(t1reg, t1reg, 0x8);
|
||||||
|
SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
||||||
|
vuFloat2(t1reg, t1reg, 0x8);
|
||||||
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg);
|
||||||
|
|
||||||
|
x86SetJ32(bjmp32);
|
||||||
|
|
||||||
|
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&RSQRT_TEMP_XMM[0] ); // restore t1reg data
|
||||||
}
|
}
|
||||||
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
_freeX86reg(vftemp);
|
_freeX86reg(vftemp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue