A new VU MIN / MAX code by Nneeve allows more games to use DaZ on the VUs.

We'll test if any game still needs DaZ off, and if not so, make that default to on.

DaZ is a more correct behavior for the VU's, and only bugs prevented it from working as expected.

Thanks to Nneeve for his hard work :)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@461 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
ramapcsx2 2009-02-09 18:50:10 +00:00
parent 25459ba1fc
commit 71c16d061d
3 changed files with 319 additions and 174 deletions

View File

@ -32,7 +32,7 @@
#include "iVUops.h"
#include "iVUzerorec.h"
//------------------------------------------------------------------
#define MINMAXFIX 1
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
@ -2329,14 +2329,122 @@ void recVUMI_MSUBAw( VURegs *VU, int info )
//------------------------------------------------------------------
static const u32 PCSX2_ALIGNED16(special_mask[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(special_mask2[4]) = {0, 0x40000000, 0, 0x40000000};
u32 PCSX2_ALIGNED16(temp_loc[4]);
u32 PCSX2_ALIGNED16(temp_loc2[4]);
//MAX/MINI are non-arithmetic operations that implicitly support numbers with the EXP field being 0 ("denormals").
//
//As such, they are sometimes used for integer move and (positive!) integer max/min, knowing that integers that
//represent denormals will not be flushed to 0.
//
//As such, this implementation performs a non-arithmetic operation that supports "denormals" and "infs/nans".
//There might be an easier way to do it but here, MAX/MIN is performed with PMAXPD/PMINPD.
//Fake double-precision numbers are constructed by copying the sign of the original numbers, clearing the upper 32 bits,
//setting the 62nd bit to 1 (to ensure double-precision number is "normalized") and having the lower 32bits
//being the same as the original number.
void MINMAXlogical(VURegs *VU, int info, int min, int mode, uptr addr = 0, int xyzw = 0)
//mode1 = iq, mode2 = xyzw, mode0 = normal
{
int t1regbool = 0;
int t1reg = _vuGetTempXMMreg(info);
if (t1reg < 0)
{
t1regbool = 1;
for (t1reg = 0; ( (t1reg == EEREC_D) || (t1reg == EEREC_S) || (mode != 1 && t1reg == EEREC_T)
|| (t1reg == EEREC_TEMP) ); t1reg++); // Find unused reg (For first temp reg)
SSE_MOVAPS_XMM_to_M128((uptr)temp_loc, t1reg); // Backup t1reg XMM reg
}
int t2regbool = -1;
int t2reg = EEREC_TEMP;
if (EEREC_TEMP == EEREC_D || EEREC_TEMP == EEREC_S || (mode != 1 && EEREC_TEMP == EEREC_T))
{
t2regbool = 0;
t2reg = _vuGetTempXMMreg(info);
if (t2reg < 0)
{
t2regbool = 1;
for (t2reg = 0; ( (t2reg == EEREC_D) || (t2reg == EEREC_S) || (mode != 1 && t2reg == EEREC_T) ||
(t2reg == t1reg) || (t2reg == EEREC_TEMP) ); t2reg++); // Find unused reg (For second temp reg)
SSE_MOVAPS_XMM_to_M128((uptr)temp_loc2, t2reg); // Backup t2reg XMM reg
}
}
if (_X || _Y)
{
SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0x50);
SSE2_PAND_M128_to_XMM(t1reg, (uptr)special_mask);
SSE2_POR_M128_to_XMM(t1reg, (uptr)special_mask2);
if (mode == 0)
SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0x50);
else if (mode == 1)
{
SSE2_MOVD_M32_to_XMM(t2reg, addr);
SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0x00);
}
else if (mode == 2)
_unpackVF_xyzw(t2reg, EEREC_T, xyzw);
SSE2_PAND_M128_to_XMM(t2reg, (uptr)special_mask);
SSE2_POR_M128_to_XMM(t2reg, (uptr)special_mask2);
if (min)
SSE2_MINPD_XMM_to_XMM(t1reg, t2reg);
else
SSE2_MAXPD_XMM_to_XMM(t1reg, t2reg);
SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0x88);
VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0xc & _X_Y_Z_W);
}
if (_Z || _W)
{
SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0xfa);
SSE2_PAND_M128_to_XMM(t1reg, (uptr)special_mask);
SSE2_POR_M128_to_XMM(t1reg, (uptr)special_mask2);
if (mode == 0)
SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0xfa);
else if (mode == 1)
{
SSE2_MOVD_M32_to_XMM(t2reg, addr);
SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0x00);
}
else if (mode == 2)
_unpackVF_xyzw(t2reg, EEREC_T, xyzw);
SSE2_PAND_M128_to_XMM(t2reg, (uptr)special_mask);
SSE2_POR_M128_to_XMM(t2reg, (uptr)special_mask2);
if (min)
SSE2_MINPD_XMM_to_XMM(t1reg, t2reg);
else
SSE2_MAXPD_XMM_to_XMM(t1reg, t2reg);
SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0x88);
VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0x3 & _X_Y_Z_W);
}
if (t1regbool == 0)
_freeXMMreg(t1reg);
else if (t1regbool == 1)
SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)temp_loc); // Restore t1reg XMM reg
if (t2regbool == 0)
_freeXMMreg(t2reg);
else if (t2regbool == 1)
SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)temp_loc2); // Restore t2reg XMM reg
}
//------------------------------------------------------------------
// MAX
//------------------------------------------------------------------
void recVUMI_MAX(VURegs *VU, int info)
{
if ( _Fd_ == 0 ) return;
//SysPrintf ("recVUMI_MAX \n");
if (MINMAXFIX)
MINMAXlogical(VU, info, 0, 0);
else
{
if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping
if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W );
@ -2362,13 +2470,18 @@ void recVUMI_MAX(VURegs *VU, int info)
SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_T);
}
}
}
}
void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info)
{
if ( _Fd_ == 0 ) return;
//SysPrintf ("recVUMI_MAX_iq \n");
if (MINMAXFIX)
MINMAXlogical(VU, info, 0, 1, addr);
else
{
if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping
vuFloat3(addr);
@ -2420,18 +2533,17 @@ void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info)
SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S);
}
}
}
}
void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info)
{
if ( _Fd_ == 0 ) return;
//SysPrintf ("recVUMI_MAX_xyzw \n");
if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping
if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
if (_Fs_ == 0 && _Ft_ == 0)
{
if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) {
if( _Fs_ == 0 && _Ft_ == 0 ) {
if( xyzw < 3 ) {
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
@ -2441,7 +2553,29 @@ void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info)
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
}
}
else if (_X_Y_Z_W != 0xf) {
if( xyzw < 3 ) {
if( _X_Y_Z_W & 1 ) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[0]); // w included, so insert the whole reg
else SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // w not included, can zero out
}
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_fones);
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
else {
if( xyzw < 3 ) SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D);
else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_fones);
}
return;
}
if (MINMAXFIX)
MINMAXlogical(VU, info, 0, 2, 0, xyzw);
else
{
if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping
if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) {
if( xyzw == 0 ) {
if( EEREC_D == EEREC_S ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T);
else if( EEREC_D == EEREC_T ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S);
@ -2456,26 +2590,11 @@ void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info)
SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
}
}
}
else if (_X_Y_Z_W != 0xf) {
if( _Fs_ == 0 && _Ft_ == 0 ) {
if( xyzw < 3 ) {
if( _X_Y_Z_W & 1 ) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[0]); // w included, so insert the whole reg
else SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // w not included, can zero out
}
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_fones);
}
else {
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
}
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
else {
if( _Fs_ == 0 && _Ft_ == 0 ) {
if( xyzw < 3 ) SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D);
else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_fones);
}
else {
if (EEREC_D == EEREC_S) {
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
@ -2503,8 +2622,13 @@ void recVUMI_MAXw(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 3, info); }
void recVUMI_MINI(VURegs *VU, int info)
{
if ( _Fd_ == 0 ) return;
//SysPrintf ("recVUMI_MINI\n");
if (MINMAXFIX)
MINMAXlogical(VU, info, 1, 0);
else
{
if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping
if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W );
@ -2536,13 +2660,19 @@ void recVUMI_MINI(VURegs *VU, int info)
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T);
}
}
}
}
void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info)
{
if ( _Fd_ == 0 ) return;
//SysPrintf ("recVUMI_MINI_iq \n");
if (MINMAXFIX)
MINMAXlogical(VU, info, 1, 1, addr);
else
{
if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping
vuFloat3(addr);
@ -2594,13 +2724,18 @@ void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info)
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S);
}
}
}
}
void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info)
{
if ( _Fd_ == 0 ) return;
//SysPrintf ("recVUMI_MINI_xyzw \n");
if (MINMAXFIX)
MINMAXlogical(VU, info, 1, 2, 0, xyzw);
else
{
if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping
if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
@ -2634,6 +2769,7 @@ void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info)
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S);
}
}
}
}
void recVUMI_MINIi(VURegs *VU, int info) { recVUMI_MINI_iq(VU, VU_VI_ADDR(REG_I, 1), info); }

View File

@ -1357,10 +1357,14 @@ extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from );
extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from );
extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from );
@ -1490,7 +1494,6 @@ extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from );
extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
//**********************************************************************************/
//PACKSSWB,PACKSSDW: Pack Saturate Signed Word
//**********************************************************************************

View File

@ -553,6 +553,9 @@ __forceinline void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
__forceinline void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); }
__forceinline void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); }
__forceinline void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); }
__forceinline void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); }
/////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MAXSS: Return Scalar Single-Precision FP Maximum *
@ -613,6 +616,9 @@ __forceinline void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType fr
__forceinline void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); }
__forceinline void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); }
__forceinline void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); }
__forceinline void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); }
//////////////////////////////////////////////////////////////////////////////////////////
//**********************************************************************************/
//MINSS: Return Scalar Single-Precision FP Minimum *