mirror of https://github.com/PCSX2/pcsx2.git
--fixed recCVT_W opcode saturation: now it gets correct results and its faster :p --fixed recVUMI_FTOI0/recVUMI_FTOI4/recVUMI_FTOI12/recVUMI_FTOI15: it does saturation now, but its still not 100% correct.Returns 0x7FFFFF80 instead of 0x7FFFFFFF.
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@409 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
d5270d22e7
commit
43720a2d27
|
@ -925,8 +925,6 @@ void recCVT_S_xmm(int info)
|
|||
|
||||
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS);
|
||||
|
||||
static u32 s_signbit = 0x80000000;
|
||||
|
||||
void recCVT_W()
|
||||
{
|
||||
int regs;
|
||||
|
@ -934,48 +932,28 @@ void recCVT_W()
|
|||
regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
|
||||
|
||||
if( regs >= 0 )
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
_freeXMMreg(t0reg);
|
||||
|
||||
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&s_signbit);
|
||||
{
|
||||
SSE_CVTTSS2SI_XMM_to_R32(EAX, regs);
|
||||
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs);
|
||||
SSE_MOVMSKPS_XMM_to_R32(EDX,regs); //extract the signs
|
||||
AND32ItoR(EDX,1); //keep only LSB
|
||||
}
|
||||
else
|
||||
{
|
||||
SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
|
||||
MOV32MtoR(EDX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
|
||||
SHR32ItoR(EDX,31); //mov sign to lsb
|
||||
}
|
||||
|
||||
|
||||
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
||||
_deleteFPtoXMMreg(_Fd_, 2);
|
||||
|
||||
MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
|
||||
AND32ItoR(ECX, 0x7f800000);
|
||||
CMP32ItoR(ECX, 0x4E800000);
|
||||
j8Ptr[0] = JLE8(0);
|
||||
ADD32ItoR(EDX,0x7FFFFFFF); //0x7FFFFFFF if positive, 0x8000 0000 if negative
|
||||
|
||||
// need to detect if reg is positive
|
||||
/*if( regs >= 0 ) {
|
||||
SSE_UCOMISS_XMM_to_XMM(regs, t0reg);
|
||||
j8Ptr[2] = JB8(0);
|
||||
}
|
||||
else {*/
|
||||
TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000);
|
||||
j8Ptr[2] = JNZ8(0);
|
||||
//}
|
||||
CMP32ItoR(EAX,0x80000000); //If the result is indefinitive
|
||||
CMOVE32RtoR(EAX,EDX); //Saturate it
|
||||
|
||||
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff);
|
||||
j8Ptr[1] = JMP8(0);
|
||||
|
||||
x86SetJ8( j8Ptr[2] );
|
||||
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000);
|
||||
j8Ptr[1] = JMP8(0);
|
||||
|
||||
x86SetJ8( j8Ptr[0] );
|
||||
|
||||
//Write the result
|
||||
MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX);
|
||||
|
||||
x86SetJ8( j8Ptr[1] );
|
||||
}
|
||||
//------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -3538,17 +3538,59 @@ void recVUMI_NOP( VURegs *VU, int info )
|
|||
{
|
||||
}
|
||||
|
||||
static const PCSX2_ALIGNED16(int rec_const_0x8000000[4]) = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
|
||||
//Saturates for FTOI
|
||||
//NOT Tested yet, it needs 2 temp regs and i have no idea how to use pcsx2's sse reg alloc functions :p
|
||||
//rec_s can be the same as rec_tmp1
|
||||
void recVUMI_FTOI_Saturate(int rec_s,int rec_t,int rec_tmp1,int rec_tmp2)
|
||||
{
|
||||
assert(rec_s!=rec_t);
|
||||
assert(rec_tmp1!=rec_t);
|
||||
assert(rec_tmp2!=rec_t);
|
||||
assert(rec_tmp1!=rec_tmp2);
|
||||
|
||||
//Duplicate the xor'd sign bit to the whole value
|
||||
//FFFF FFFF for positive, 0 for negative
|
||||
if (rec_tmp1!=rec_s)
|
||||
SSE_MOVAPS_M128_to_XMM(rec_tmp1,rec_s);
|
||||
SSE2_PXOR_M128_to_XMM(rec_tmp1,(uptr)&const_clip[4]);
|
||||
SSE2_PSRAW_I8_to_XMM(rec_tmp1,31);
|
||||
|
||||
//Create mask: 0 where !=8000 0000
|
||||
SSE_MOVAPS_M128_to_XMM(rec_tmp2,(uptr)&const_clip[4]);
|
||||
SSE2_PCMPEQD_M128_to_XMM(rec_tmp2,rec_t);
|
||||
|
||||
//AND the mask w/ the edit values
|
||||
SSE_ANDPS_M128_to_XMM(rec_tmp1,rec_tmp2);
|
||||
|
||||
//if v==8000 0000 && positive -> 8000 0000 + FFFF FFFF -> 7FFF FFFF
|
||||
//if v==8000 0000 && negative -> 8000 0000 + 0 -> 8000 0000
|
||||
//if v!=8000 0000 -> v+0 (masked from the and)
|
||||
|
||||
//Add the values as needed
|
||||
SSE2_PADDD_XMM_to_XMM(rec_t,rec_tmp1);
|
||||
}
|
||||
|
||||
static const PCSX2_ALIGNED16(float rec_float_max_values[4]) = { 0x7FFFFF80, 0x7FFFFF80, 0x7FFFFF80, 0x7FFFFF80 };
|
||||
|
||||
void recVUMI_FTOI0(VURegs *VU, int info)
|
||||
{
|
||||
if ( _Ft_ == 0 ) return;
|
||||
|
||||
if (_X_Y_Z_W != 0xf) {
|
||||
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP,EEREC_S);
|
||||
SSE_MINPS_M128_to_XMM(EEREC_TEMP,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
|
||||
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
||||
}
|
||||
else {
|
||||
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
|
||||
}
|
||||
if (EEREC_T != EEREC_S)
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_T,EEREC_S);
|
||||
|
||||
SSE_MINPS_M128_to_XMM(EEREC_T,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
|
||||
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
|
||||
}
|
||||
}
|
||||
|
||||
void recVUMI_FTOIX(VURegs *VU, int addr, int info)
|
||||
|
@ -3558,6 +3600,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
|
|||
if (_X_Y_Z_W != 0xf) {
|
||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
|
||||
SSE_MINPS_M128_to_XMM(EEREC_TEMP,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
|
||||
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
||||
|
||||
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
||||
|
@ -3565,6 +3608,7 @@ void recVUMI_FTOIX(VURegs *VU, int addr, int info)
|
|||
else {
|
||||
if (EEREC_T != EEREC_S) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
|
||||
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
|
||||
SSE_MINPS_M128_to_XMM(EEREC_T,(uptr)&rec_float_max_values[0]); //this is partialy wrong, will return 0x7FFFFF80 instead of 0x7FFFFFFF on saturate
|
||||
SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue