nneeve fixed min/max FPU opcodes to be more precise when running in "Full" clamp mode.

he also cleaned up some other stuff in iFPUd.cpp

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@744 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-03-10 23:50:17 +00:00
parent 022fdf1bf3
commit 0bcb9cc0e3
1 changed files with 38 additions and 323 deletions

View File

@ -26,16 +26,18 @@
/* Version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */ /* Version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */
/* Can be made faster by not converting stuff back and forth between instructions. */
//set overflow flag (set only if FPU_RESULT is 1) //set overflow flag (set only if FPU_RESULT is 1)
#define FPU_FLAGS_OVERFLOW 1 #define FPU_FLAGS_OVERFLOW 1
//set underflow flag (set only if FPU_RESULT is 1) //set underflow flag (set only if FPU_RESULT is 1)
#define FPU_FLAGS_UNDERFLOW 1 #define FPU_FLAGS_UNDERFLOW 1
//if 1, result is not clamped (MORE correct, //if 1, result is not clamped (Gives correct results as in PS2,
//but can cause problems due to insuffecient clamping levels in the VUs) //but can cause problems due to insuffecient clamping levels in the VUs)
#define FPU_RESULT 1 #define FPU_RESULT 1
//also impacts other aspects of DIV/R/SQRT correctness //set I&D flags. also impacts other aspects of DIV/R/SQRT correctness
#define FPU_FLAGS_ID 1 #define FPU_FLAGS_ID 1
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -128,268 +130,6 @@ static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xf
// *FPU Opcodes!* // *FPU Opcodes!*
//------------------------------------------------------------------ //------------------------------------------------------------------
//------------------------------------------------------------------
// CFC1 / CTC1
//------------------------------------------------------------------
void recCFC1(void)
{
if ( !_Rt_ || ( (_Fs_ != 0) && (_Fs_ != 31) ) ) return;
_eeOnWriteReg(_Rt_, 1);
MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] );
_deleteEEreg(_Rt_, 0);
if (_Fs_ == 31)
{
AND32ItoR(EAX, 0x0083c078); //remove always-zero bits
OR32ItoR(EAX, 0x01000001); //set always-one bits
}
if(EEINST_ISLIVE1(_Rt_))
{
CDQ( );
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX );
}
else
{
EEINST_RESETHASLIVE1(_Rt_);
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
}
}
void recCTC1( void )
{
if ( _Fs_ != 31 ) return;
if ( GPR_IS_CONST1(_Rt_) )
{
MOV32ItoM((uptr)&fpuRegs.fprc[ _Fs_ ], g_cpuConstRegs[_Rt_].UL[0]);
}
else
{
int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if( mmreg >= 0 )
{
SSEX_MOVD_XMM_to_M32((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg);
}
else
{
mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ);
if ( mmreg >= 0 )
{
MOVDMMXtoM((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg);
SetMMXstate();
}
else
{
_deleteGPRtoXMMreg(_Rt_, 1);
_deleteMMXreg(MMX_GPR+_Rt_, 1);
MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] );
MOV32RtoM( (uptr)&fpuRegs.fprc[ _Fs_ ], EAX );
}
}
}
}
//------------------------------------------------------------------
//------------------------------------------------------------------
// MFC1
//------------------------------------------------------------------
void recMFC1(void)
{
int regt, regs;
if ( ! _Rt_ ) return;
_eeOnWriteReg(_Rt_, 1);
regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
if( regs >= 0 )
{
_deleteGPRtoXMMreg(_Rt_, 2);
regt = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_WRITE);
if( regt >= 0 )
{
SSE2_MOVDQ2Q_XMM_to_MM(regt, regs);
if(EEINST_ISLIVE1(_Rt_))
_signExtendGPRtoMMX(regt, _Rt_, 0);
else
EEINST_RESETHASLIVE1(_Rt_);
}
else
{
if(EEINST_ISLIVE1(_Rt_))
{
_signExtendXMMtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs, 0);
}
else
{
EEINST_RESETHASLIVE1(_Rt_);
SSE_MOVSS_XMM_to_M32((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs);
}
}
}
else
{
regs = _checkMMXreg(MMX_FPU+_Fs_, MODE_READ);
if( regs >= 0 )
{
// convert to mmx reg
mmxregs[regs].reg = MMX_GPR+_Rt_;
mmxregs[regs].mode |= MODE_READ|MODE_WRITE;
_signExtendGPRtoMMX(regs, _Rt_, 0);
}
else
{
regt = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if( regt >= 0 )
{
if( xmmregs[regt].mode & MODE_WRITE )
{
SSE_MOVHPS_XMM_to_M64((uptr)&cpuRegs.GPR.r[_Rt_].UL[2], regt);
}
xmmregs[regt].inuse = 0;
}
_deleteEEreg(_Rt_, 0);
MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Fs_ ].UL );
if(EEINST_ISLIVE1(_Rt_))
{
CDQ( );
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX );
}
else
{
EEINST_RESETHASLIVE1(_Rt_);
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
}
}
}
}
//------------------------------------------------------------------
//------------------------------------------------------------------
// MTC1
//------------------------------------------------------------------
void recMTC1(void)
{
if( GPR_IS_CONST1(_Rt_) )
{
_deleteFPtoXMMreg(_Fs_, 0);
MOV32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, g_cpuConstRegs[_Rt_].UL[0]);
}
else
{
int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if( mmreg >= 0 )
{
if( g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE )
{
// transfer the reg directly
_deleteGPRtoXMMreg(_Rt_, 2);
_deleteFPtoXMMreg(_Fs_, 2);
_allocFPtoXMMreg(mmreg, _Fs_, MODE_WRITE);
}
else
{
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
if( mmreg2 >= 0 )
SSE_MOVSS_XMM_to_XMM(mmreg2, mmreg);
else
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg);
}
}
else
{
int mmreg2;
mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ);
mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
if( mmreg >= 0 )
{
if( mmreg2 >= 0 )
{
SetMMXstate();
SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg);
}
else
{
SetMMXstate();
MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg);
}
}
else
{
if( mmreg2 >= 0 )
{
SSE_MOVSS_M32_to_XMM(mmreg2, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]);
}
else
{
MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]);
MOV32RtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, EAX);
}
}
}
}
}
//------------------------------------------------------------------
/*#ifndef FPU_RECOMPILE // If FPU_RECOMPILE is not defined, then use the interpreter opcodes. (CFC1, CTC1, MFC1, and MTC1 are special because they work specifically with the EE rec so they're defined above)
REC_FPUFUNC(ABS_S);
REC_FPUFUNC(ADD_S);
REC_FPUFUNC(ADDA_S);
REC_FPUBRANCH(BC1F);
REC_FPUBRANCH(BC1T);
REC_FPUBRANCH(BC1FL);
REC_FPUBRANCH(BC1TL);
REC_FPUFUNC(C_EQ);
REC_FPUFUNC(C_F);
REC_FPUFUNC(C_LE);
REC_FPUFUNC(C_LT);
REC_FPUFUNC(CVT_S);
REC_FPUFUNC(CVT_W);
REC_FPUFUNC(DIV_S);
REC_FPUFUNC(MAX_S);
REC_FPUFUNC(MIN_S);
REC_FPUFUNC(MADD_S);
REC_FPUFUNC(MADDA_S);
REC_FPUFUNC(MOV_S);
REC_FPUFUNC(MSUB_S);
REC_FPUFUNC(MSUBA_S);
REC_FPUFUNC(MUL_S);
REC_FPUFUNC(MULA_S);
REC_FPUFUNC(NEG_S);
REC_FPUFUNC(SUB_S);
REC_FPUFUNC(SUBA_S);
REC_FPUFUNC(SQRT_S);
REC_FPUFUNC(RSQRT_S);
#else // FPU_RECOMPILE*/
//------------------------------------------------------------------ //------------------------------------------------------------------
// PS2 -> DOUBLE // PS2 -> DOUBLE
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -678,7 +418,7 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
// CommutativeOp XMM (used for ADD, MUL, MAX, MIN and SUB opcodes) // CommutativeOp XMM (used for ADD, MUL, MAX, MIN and SUB opcodes)
//------------------------------------------------------------------ //------------------------------------------------------------------
static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = { static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = {
SSE2_ADDSD_XMM_to_XMM, NULL, SSE2_MAXSD_XMM_to_XMM, SSE2_MINSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM }; SSE2_ADDSD_XMM_to_XMM, NULL, NULL, NULL, SSE2_SUBSD_XMM_to_XMM };
void recFPUOp(int info, int regd, int op, bool acc) void recFPUOp(int info, int regd, int op, bool acc)
{ {
@ -718,48 +458,6 @@ void recADDA_S_xmm(int info)
FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
//------------------------------------------------------------------ //------------------------------------------------------------------
//------------------------------------------------------------------
// BC1x XMM
//------------------------------------------------------------------
/*
static void _setupBranchTest()
{
_eeFlushAllUnused();
// COP1 branch conditionals are based on the following equation:
// (fpuRegs.fprc[31] & 0x00800000)
// BC2F checks if the statement is false, BC2T checks if the statement is true.
MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]);
TEST32ItoR(EAX, FPUflagC);
}
void recBC1F( void )
{
_setupBranchTest();
recDoBranchImm(JNZ32(0));
}
void recBC1T( void )
{
_setupBranchTest();
recDoBranchImm(JZ32(0));
}
void recBC1FL( void )
{
_setupBranchTest();
recDoBranchImm_Likely(JNZ32(0));
}
void recBC1TL( void )
{
_setupBranchTest();
recDoBranchImm_Likely(JZ32(0));
}*/
//------------------------------------------------------------------
//TOKNOW : how does C.??.S behave with denormals?
void recCMP(int info) void recCMP(int info)
{ {
int sreg, treg; int sreg, treg;
@ -788,11 +486,6 @@ void recC_EQ_xmm(int info)
FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT);
/*void recC_F()
{
AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC );
}*/
void recC_LE_xmm(int info ) void recC_LE_xmm(int info )
{ {
recCMP(info); recCMP(info);
@ -806,7 +499,6 @@ void recC_LE_xmm(int info )
} }
FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT);
//REC_FPUFUNC(C_LE);
void recC_LT_xmm(int info) void recC_LT_xmm(int info)
{ {
@ -821,7 +513,6 @@ void recC_LT_xmm(int info)
} }
FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT);
//REC_FPUFUNC(C_LT);
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -840,7 +531,7 @@ void recCVT_S_xmm(int info)
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS); FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS);
void recCVT_W() void recCVT_W() //called from iFPU.cpp's recCVT_W
{ {
int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
@ -1052,17 +743,42 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X
// MAX / MIN XMM // MAX / MIN XMM
//------------------------------------------------------------------ //------------------------------------------------------------------
//TOKNOW : handles denormals like VU, maybe? static const u32 PCSX2_ALIGNED16(minmax_mask[4]) = {0xffffffff, 0x80000000, 0, 0};
static const u32 PCSX2_ALIGNED16(minmax_mask2[4]) = {0, 0x40000000, 0, 0};
// FPU's MAX/MIN work with all numbers (including "denormals"). Check VU's logical min max for more info.
void recMINMAX(int info, bool ismin)
{
int sreg, treg;
ALLOC_S(sreg); ALLOC_T(treg);
CLEAR_OU_FLAGS;
SSE2_PSHUFD_XMM_to_XMM(sreg, sreg, 0x00);
SSE2_PAND_M128_to_XMM(sreg, (uptr)minmax_mask);
SSE2_POR_M128_to_XMM(sreg, (uptr)minmax_mask2);
SSE2_PSHUFD_XMM_to_XMM(treg, treg, 0x00);
SSE2_PAND_M128_to_XMM(treg, (uptr)minmax_mask);
SSE2_POR_M128_to_XMM(treg, (uptr)minmax_mask2);
if (ismin)
SSE2_MINSD_XMM_to_XMM(sreg, treg);
else
SSE2_MAXSD_XMM_to_XMM(sreg, treg);
SSE_MOVSS_XMM_to_XMM(EEREC_D, sreg);
_freeXMMreg(sreg); _freeXMMreg(treg);
}
void recMAX_S_xmm(int info) void recMAX_S_xmm(int info)
{ {
recFPUOp(info, EEREC_D, 2, false); recMINMAX(info, false);
} }
FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
void recMIN_S_xmm(int info) void recMIN_S_xmm(int info)
{ {
recFPUOp(info, EEREC_D, 3, false); recMINMAX(info, true);
} }
FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
@ -1322,6 +1038,5 @@ void recRSQRT_S_xmm(int info)
FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
//#endif // FPU_RECOMPILE
} } } } } } } } } }