--fixed recCVT_W opcode saturation: now it gets correct results and its faster :p --fixed recVUMI_FTOI0/recVUMI_FTOI4/recVUMI_FTOI12/recVUMI_FTOI15: it does saturation now, but its still not 100% correct.Returns 0x7FFFFF80 instead of 0x7FFFFFFF.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@409 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
drkIIRaziel 2008-12-10 21:17:33 +00:00 committed by Gregory Hainaut
parent d5270d22e7
commit 43720a2d27
2 changed files with 58 additions and 36 deletions

View File

@ -925,8 +925,6 @@ void recCVT_S_xmm(int info)
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS);
static u32 s_signbit = 0x80000000;
void recCVT_W()
{
int regs;
@ -934,48 +932,28 @@ void recCVT_W()
regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
if( regs >= 0 )
{
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
_freeXMMreg(t0reg);
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&s_signbit);
{
SSE_CVTTSS2SI_XMM_to_R32(EAX, regs);
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs);
SSE_MOVMSKPS_XMM_to_R32(EDX,regs); //extract the signs
AND32ItoR(EDX,1); //keep only LSB
}
else
{
SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
MOV32MtoR(EDX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
SHR32ItoR(EDX,31); //mov sign to lsb
}
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
_deleteFPtoXMMreg(_Fd_, 2);
MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
AND32ItoR(ECX, 0x7f800000);
CMP32ItoR(ECX, 0x4E800000);
j8Ptr[0] = JLE8(0);
ADD32ItoR(EDX,0x7FFFFFFF); //0x7FFFFFFF if positive, 0x8000 0000 if negative
// need to detect if reg is positive
/*if( regs >= 0 ) {
SSE_UCOMISS_XMM_to_XMM(regs, t0reg);
j8Ptr[2] = JB8(0);
}
else {*/
TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000);
j8Ptr[2] = JNZ8(0);
//}
CMP32ItoR(EAX,0x80000000); //If the result is indefinitive
CMOVE32RtoR(EAX,EDX); //Saturate it
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff);
j8Ptr[1] = JMP8(0);
x86SetJ8( j8Ptr[2] );
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000);
j8Ptr[1] = JMP8(0);
x86SetJ8( j8Ptr[0] );
//Write the result
MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX);
x86SetJ8( j8Ptr[1] );
}
//------------------------------------------------------------------

View File

@ -3538,17 +3538,59 @@ void recVUMI_NOP( VURegs *VU, int info )
{
}
static const PCSX2_ALIGNED16(int rec_const_0x8000000[4]) = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
//Saturates for FTOI
//NOT Tested yet, it needs 2 temp regs and i have no idea how to use pcsx2's sse reg alloc functions :p
//rec_s can be the same as rec_tmp1
void recVUMI_FTOI_Saturate(int rec_s,int rec_t,int rec_tmp1,int rec_tmp2)
{
assert(rec_s!=rec_t);
assert(rec_tmp1!=rec_t);
assert(rec_tmp2!=rec_t);
assert(rec_tmp1!=rec_tmp2);
//Duplicate the xor'd sign bit to the whole value
//FFFF FFFF for positive, 0 for negative
if (rec_tmp1!=rec_s)
SSE_MOVAPS_M128_to_XMM(rec_tmp1,rec_s);
SSE2_PXOR_M128_to_XMM(rec_tmp1,(uptr)&const_clip[4]);
SSE2_PSRAW_I8_to_XMM(rec_tmp1,31);
//Create mask: 0 where !=8000 0000
SSE_MOVAPS_M128_to_XMM(rec_tmp2,(uptr)&const_clip[4]);
SSE2_PCMPEQD_M128_to_XMM(rec_tmp2,rec_t);
//AND the mask w/ the edit values
SSE_ANDPS_M128_to_XMM(rec_tmp1,rec_tmp2);
//if v==8000 0000 && positive -> 8000 0000 + FFFF FFFF -> 7FFF FFFF
//if v==8000 0000 && negative -> 8000 0000 + 0 -> 8000 0000
//if v!=8000 0000 -> v+0 (masked from the and)
//Add the values as needed
SSE2_PADDD_XMM_to_XMM(rec_t,rec_tmp1);
}
static const PCSX2_ALIGNED16(float rec_float_max_values[4]) = { 0x7FFFFF80, 0x7FFFFF80, 0x7FFFFF80, 0x7FFFFF80 };
void recVUMI_FTOI0(VURegs *VU, int info)
{
if ( _Ft_ == 0 ) return;
if (_X_Y_Z_W != 0xf) {
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP,EEREC_S);
SSE_MINPS_M128_to_XMM(EEREC_TEMP,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
}
else {
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
}
if (EEREC_T != EEREC_S)
SSE_MOVAPS_XMM_to_XMM(EEREC_T,EEREC_S);
SSE_MINPS_M128_to_XMM(EEREC_T,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
}
}
void recVUMI_FTOIX(VURegs *VU, int addr, int info)
@ -3558,6 +3600,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
if (_X_Y_Z_W != 0xf) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
SSE_MINPS_M128_to_XMM(EEREC_TEMP,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
@ -3565,6 +3608,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
else {
if (EEREC_T != EEREC_S) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
SSE_MINPS_M128_to_XMM(EEREC_T,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
}
}