optimized SQRT opcode a bit

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@39 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-08-15 19:36:45 +00:00 committed by Gregory Hainaut
parent 1c4f8e47e3
commit 17b04bbd0c
1 changed files with 15 additions and 26 deletions

View File

@ -285,10 +285,11 @@ void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw)
{ {
if( cpucaps.hasStreamingSIMD4Extensions ) { if( cpucaps.hasStreamingSIMD4Extensions ) {
switch (xyzw) { switch (xyzw) {
case 0: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(0, 0, 0)); break; case 0: if( dstreg != srcreg ) {
case 1: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); break; SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(0, 0, 0));} break;
case 2: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(2, 0, 0)); break; case 1: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); break;
case 3: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); break; case 2: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(2, 0, 0)); break;
case 3: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); break;
} }
} }
else { else {
@ -4210,38 +4211,26 @@ void recVUMI_SQRT( VURegs *VU, int info )
SysPrintf("SQRT Opcode \n"); SysPrintf("SQRT Opcode \n");
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFDF); //Divide flag cleared regardless of result AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFDF); //Divide flag cleared regardless of result
if( _Ftf_ ) { if( xmmregs[EEREC_T].mode & MODE_WRITE )
if( xmmregs[EEREC_T].mode & MODE_WRITE ) { _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_);
//SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); else
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[_Ft_].UL[_Ftf_]);
_unpackVF_xyzw(EEREC_TEMP, EEREC_TEMP, _Ftf_);
}
else {
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[_Ft_].UL[_Ftf_]);
//SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
}
}
else {
//SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)const_clip);
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
}
/* Check for negative divide */ /* Check for negative divide */
XOR32RtoR(vftemp, vftemp); XOR32RtoR(vftemp, vftemp);
SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP); SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP);
AND32ItoR(vftemp, 1); //Check sign AND32ItoR(vftemp, 1); //Check sign
pjmp = JZ8(0); //Skip if none are pjmp = JZ8(0); //Skip if none are
//SysPrintf("Invalid SQRT\n"); OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag - Negative number sqrt
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag - Negative number sqrt SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); //So we do a cardinal sqrt
x86SetJ8(pjmp); x86SetJ8(pjmp);
//SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip); if (CHECK_EXTRA_OVERFLOW)
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
_freeX86reg(vftemp); _freeX86reg(vftemp);
} }