mirror of https://github.com/PCSX2/pcsx2.git
optimized SQRT opcode a bit
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@39 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
1c4f8e47e3
commit
17b04bbd0c
|
@ -285,7 +285,8 @@ void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw)
|
||||||
{
|
{
|
||||||
if( cpucaps.hasStreamingSIMD4Extensions ) {
|
if( cpucaps.hasStreamingSIMD4Extensions ) {
|
||||||
switch (xyzw) {
|
switch (xyzw) {
|
||||||
case 0: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(0, 0, 0)); break;
|
case 0: if( dstreg != srcreg ) {
|
||||||
|
SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(0, 0, 0));} break;
|
||||||
case 1: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); break;
|
case 1: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); break;
|
||||||
case 2: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(2, 0, 0)); break;
|
case 2: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(2, 0, 0)); break;
|
||||||
case 3: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); break;
|
case 3: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); break;
|
||||||
|
@ -4210,38 +4211,26 @@ void recVUMI_SQRT( VURegs *VU, int info )
|
||||||
SysPrintf("SQRT Opcode \n");
|
SysPrintf("SQRT Opcode \n");
|
||||||
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFDF); //Divide flag cleared regardless of result
|
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFDF); //Divide flag cleared regardless of result
|
||||||
|
|
||||||
if( _Ftf_ ) {
|
if( xmmregs[EEREC_T].mode & MODE_WRITE )
|
||||||
if( xmmregs[EEREC_T].mode & MODE_WRITE ) {
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_);
|
||||||
//SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
|
else
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
||||||
_unpackVF_xyzw(EEREC_TEMP, EEREC_TEMP, _Ftf_);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[_Ft_].UL[_Ftf_]);
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[_Ft_].UL[_Ftf_]);
|
||||||
//SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
//SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)const_clip);
|
|
||||||
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
||||||
}
|
|
||||||
/* Check for negative divide */
|
/* Check for negative divide */
|
||||||
XOR32RtoR(vftemp, vftemp);
|
XOR32RtoR(vftemp, vftemp);
|
||||||
SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP);
|
SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP);
|
||||||
AND32ItoR(vftemp, 1); //Check sign
|
AND32ItoR(vftemp, 1); //Check sign
|
||||||
pjmp = JZ8(0); //Skip if none are
|
pjmp = JZ8(0); //Skip if none are
|
||||||
//SysPrintf("Invalid SQRT\n");
|
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag - Negative number sqrt
|
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag - Negative number sqrt
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
|
||||||
x86SetJ8(pjmp);
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
//SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
|
if (CHECK_EXTRA_OVERFLOW)
|
||||||
|
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
||||||
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||||
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
||||||
|
|
||||||
_freeX86reg(vftemp);
|
_freeX86reg(vftemp);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue