Nneeve patch for "Full" FPU, improves some clamping and adds missing values to the savestate info.

Savestates: Pcsx2 now errors when it encounters a savestate made by a newer version of pcsx2.

Added a new SafeList type to the SafeArray collection (not well tested yet).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@701 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-03-06 19:01:30 +00:00
parent 801d71f7f0
commit 97cdf187e5
8 changed files with 280 additions and 242 deletions

View File

@ -165,8 +165,6 @@ extern s32 psxCycleEE; // tracks IOP's current sych status with the EE
#define _JumpTarget_ ((_Target_ << 2) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction
#define _BranchTarget_ (((s32)(s16)_Imm_ * 4) + _PC_) // Calculates the target during a branch instruction
//#define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction
//#define _BranchTarget_ ((short)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction
#define _SetLink(x) psxRegs.GPR.r[x] = _PC_ + 4; // Sets the return address in the link register

View File

@ -135,6 +135,7 @@ struct fpuRegisters {
FPRreg fpr[32]; // 32bit floating point registers
u32 fprc[32]; // 32bit floating point control registers
FPRreg ACC; // 32 bit accumulator
u32 ACCflag; // an internal accumulator overflow flag
};
struct tlbs

View File

@ -67,10 +67,10 @@ extern void pcsx2_aligned_free(void* pmem);
}
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
// Handy little class for allocating a resizable memory block, complete with
// exception-based error handling and automatic cleanup.
//
template< typename T >
class SafeArray : public NoncopyableObject
{
@ -86,11 +86,12 @@ protected:
int m_size; // size of the allocation of memory
const static std::string m_str_Unnamed;
protected:
// Internal contructor for use by derrived classes. This allws a derrived class to
// Internal constructor for use by derived classes. This allows a derived class to
// use its own memory allocation (with an aligned memory, for example).
// Throws:
// Exception::OutOfMemory if the allocated_mem pointr is NULL.
// Exception::OutOfMemory if the allocated_mem pointer is NULL.
explicit SafeArray( const std::string& name, T* allocated_mem, int initSize ) :
Name( name )
, ChunkSize( DefaultChunkSize )
@ -139,8 +140,6 @@ public:
// amount requested. The memory allocation is not resized smaller.
void MakeRoomFor( int blockSize )
{
std::string temp;
if( blockSize > m_size )
{
const uint newalloc = blockSize + ChunkSize;
@ -158,12 +157,12 @@ public:
}
// Gets a pointer to the requested allocation index.
// DevBuilds : Throws std::out_of_range() if the index is invalid.
// DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid.
T *GetPtr( uint idx=0 ) { return _getPtr( idx ); }
const T *GetPtr( uint idx=0 ) const { return _getPtr( idx ); }
// Gets an element of this memory allocation much as if it were an array.
// DevBuilds : Throws std::out_of_range() if the index is invalid.
// DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid.
T& operator[]( int idx ) { return *_getPtr( (uint)idx ); }
const T& operator[]( int idx ) const { return *_getPtr( (uint)idx ); }
@ -180,7 +179,7 @@ protected:
// Performance Considerations: This function adds quite a bit of overhead
// to array indexing and thus should be done infrequently if used in
// time-critical situations. Instead of using it from inside loops, cache
// the pointer into a local variable and use stad (unsafe) C indexes.
// the pointer into a local variable and use std (unsafe) C indexes.
T* _getPtr( uint i ) const
{
#ifdef PCSX2_DEVBUILD
@ -199,7 +198,138 @@ protected:
};
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
//
//
template< typename T >
class SafeList : public SafeArray<T>
{
public:
static const int DefaultChunkSize = 0x80 * sizeof(T);
public:
const std::string Name; // user-assigned block name
int ChunkSize; // assigned DefaultChunkSize on init, reconfigurable at any time.
protected:
T* m_ptr;
int m_allocsize; // size of the allocation of memory
uint m_length; // length of the array (active items, not buffer allocation)
const static std::string m_str_Unnamed;
protected:
void _boundsCheck( uint i )
{
if( IsDevBuild && i >= (uint)m_length )
{
assert( 0 ); // makes debugging easier sometimes. :)
throw Exception::IndexBoundsFault(
"Index out of bounds on SafeArray: " + Name +
" (index=" + to_string(i) +
", length=" + to_string(m_length) + ")"
);
}
}
public:
virtual ~SafeList()
{
}
explicit SafeList( const std::string& name="Unnamed" ) :
Name( name )
, ChunkSize( DefaultChunkSize )
, m_ptr( NULL )
, m_allocsize( 0 )
, m_length( 0 )
{
}
explicit SafeList( int initialSize, const std::string& name="Unnamed" ) :
Name( name )
, ChunkSize( DefaultChunkSize )
, m_ptr( (T*)malloc( initialSize * sizeof(T) ) )
, m_allocsize( initialSize )
, m_length( 0 )
{
if( m_ptr == NULL )
throw Exception::OutOfMemory();
}
// Returns the size of the list, as according to the array type. This includes
// mapped items only. The actual size of the allocation may differ.
int GetLength() const { return m_length; }
// Returns the size of the list, in bytes. This includes mapped items only.
// The actual size of the allocation may differ.
int GetSizeInBytes() const { return m_length * sizeof(T); }
// Ensures that the allocation is large enough to fit data of the
// amount requested. The memory allocation is not resized smaller.
void MakeRoomFor( int blockSize )
{
if( blockSize > m_allocsize )
{
const uint newalloc = blockSize + ChunkSize;
m_ptr = _virtual_realloc( newalloc );
if( m_ptr == NULL )
{
throw Exception::OutOfMemory(
"Out-of-memory on list re-allocation. "
"Old size: " + to_string( m_allocsize ) + " bytes, "
"New size: " + to_string( newalloc ) + " bytes"
);
}
m_allocsize = newalloc;
}
}
// Gets an element of this memory allocation much as if it were an array.
// DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid.
T& operator[]( int idx ) { return *_getPtr( (uint)idx ); }
const T& operator[]( int idx ) const { return *_getPtr( (uint)idx ); }
int Add( const T& src )
{
MakeRoomFor( m_length + 1 );
m_ptr[m_length] = src;
return m_length++;
}
// Performs a standard array-copy removal of the given item. All items past the
// given item are copied over. Throws Exception::IndexBoundsFault() if the index
// is invalid (devbuilds only)
void Remove( int index )
{
_boundsCheck( index );
int copylen = m_length - index;
if( copylen > 0 )
memcpy_fast( &m_ptr[index], &m_ptr[index+1], copylen );
}
virtual SafeList<T>* Clone() const
{
SafeList<T>* retval = new SafeList<T>( m_length );
memcpy_fast( retval->GetPtr(), m_ptr, sizeof(T) * m_length );
return retval;
}
protected:
// A safe array index fetcher. Throws an exception if the array index
// is outside the bounds of the array.
// Performance Considerations: This function adds quite a bit of overhead
// to array indexing and thus should be done infrequently if used in
// time-critical situations. Instead of using it from inside loops, cache
// the pointer into a local variable and use std (unsafe) C indexes.
T* _getPtr( uint i ) const
{
_boundsCheck( i );
return &m_ptr[i];
}
};
//////////////////////////////////////////////////////////////////////////////////////////
// Handy little class for allocating a resizable memory block, complete with
// exception-based error handling and automatic cleanup.
// This one supports aligned data allocations too!
@ -214,7 +344,7 @@ protected:
}
// Appends "(align: xx)" to the name of the allocation in devel builds.
// Maybe useful,maybe not... no harm in atatching it. :D
// Maybe useful,maybe not... no harm in attaching it. :D
string _getName( const string& src )
{
#ifdef PCSX2_DEVBUILD

View File

@ -92,7 +92,14 @@ void SaveState::FreezeAll()
Freeze(cpuRegs); // cpu regs + COP0
Freeze(psxRegs); // iop regs
Freeze(fpuRegs); // fpu regs
if (GetVersion() >= 0x6)
Freeze(fpuRegs);
else
{
// Old versiosn didn't save the ACCflags...
FreezeLegacy(fpuRegs, sizeof(u32)); // fpu regs
fpuRegs.ACCflag = 0;
}
Freeze(tlb); // tlbs
Freeze(EEsCycle);
@ -190,15 +197,12 @@ gzLoadingState::gzLoadingState( const string& filename ) :
gzread( m_file, &m_version, 4 );
if( m_version != g_SaveVersion )
if( m_version < g_SaveVersion )
{
if( ( m_version >> 16 ) == 0x7a30 )
{
Console::Error(
"Savestate load aborted:\n"
"\tVTLB edition cannot safely load savestates created by the VM edition." );
throw Exception::UnsupportedStateVersion( m_version );
}
Console::Error(
"Savestate load aborted:\n"
"\tThe savestate was created with a newer version of Pcsx2. I don't know how to load it!" );
throw Exception::UnsupportedStateVersion( m_version );
}
_testCdvdCrc();

View File

@ -27,14 +27,14 @@
// Savestate Versioning!
// If you make changes to the savestate version, please increment the value below.
static const u32 g_SaveVersion = 0x8b400005;
static const u32 g_SaveVersion = 0x8b400006;
// this function is meant to be sued in the place of GSfreeze, and provides a safe layer
// this function is meant to be used in the place of GSfreeze, and provides a safe layer
// between the GS saving function and the MTGS's needs. :)
extern s32 CALLBACK gsSafeFreeze( int mode, freezeData *data );
// This class provides the base API for both loading and saving savestates.
// Normally you'll want to use one of the four "functional" derrived classes rather
// Normally you'll want to use one of the four "functional" derived classes rather
// than this class directly: gzLoadingState, gzSavingState (gzipped disk-saved
// states), and memLoadingState, memSavingState (uncompressed memory states).
class SaveState

View File

@ -71,6 +71,7 @@ void recC_EQ_xmm(int info);
void recC_LE_xmm(int info);
void recC_LT_xmm(int info);
void recCVT_S_xmm(int info);
void recCVT_W();
void recDIV_S_xmm(int info);
void recMADD_S_xmm(int info);
void recMADDA_S_xmm(int info);
@ -149,6 +150,12 @@ void recCFC1(void)
MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] );
_deleteEEreg(_Rt_, 0);
if (_Fs_ == 31)
{
AND32ItoR(EAX, 0x0083c078); //remove always-zero bits
OR32ItoR(EAX, 0x01000001); //set always-one bits
}
if(EEINST_ISLIVE1(_Rt_))
{
CDQ( );
@ -952,6 +959,12 @@ FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS);
void recCVT_W()
{
if (CHECK_FPU_FULL)
{
DOUBLE::recCVT_W();
return;
}
int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
if( regs >= 0 )

View File

@ -23,6 +23,8 @@
#include "ix86/ix86.h"
#include "iR5900.h"
#include "iFPU.h"
/* Version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */
//set overflow flag (set only if FPU_RESULT is 1)
#define FPU_FLAGS_OVERFLOW 1
@ -33,9 +35,6 @@
//but can cause problems due to insuffecient clamping levels in the VUs)
#define FPU_RESULT 1
//should be more correct when 1. see madd/msub documentation
#define FPU_CLAMPISH_MADD_MSUB 1
//also impacts other aspects of DIV/R/SQRT correctness
#define FPU_FLAGS_ID 1
@ -138,6 +137,12 @@ void recCFC1(void)
MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] );
_deleteEEreg(_Rt_, 0);
if (_Fs_ == 31)
{
AND32ItoR(EAX, 0x0083c078); //remove always-zero bits
OR32ItoR(EAX, 0x01000001); //set always-one bits
}
if(EEINST_ISLIVE1(_Rt_))
{
@ -150,6 +155,7 @@ void recCFC1(void)
EEINST_RESETHASLIVE1(_Rt_);
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
}
}
void recCTC1( void )
@ -439,8 +445,13 @@ void ToDouble(int reg)
// converts really large normal numbers to PS2 signed max
// converts really small normal numbers to zero (flush)
// doesn't handle inf/nan/denormal
void ToPS2FPU_Full(int reg, int flags, int absreg)
void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc)
{
if (flags)
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO | FPUflagU));
if (flags && acc)
AND32ItoM((uptr)&fpuRegs.ACCflag, ~1);
SSE_MOVAPS_XMM_to_XMM(absreg, reg);
SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&dbl_s_pos);
@ -467,6 +478,8 @@ void ToPS2FPU_Full(int reg, int flags, int absreg)
SSE_ORPS_M128_to_XMM(reg, (uptr)&s_pos); //clamp
if (flags && FPU_FLAGS_OVERFLOW)
OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagO | FPUflagSO));
if (flags && FPU_FLAGS_OVERFLOW && acc)
OR32ItoM((uptr)&fpuRegs.ACCflag, 1);
u8 *end3 = JMP8(0);
x86SetJ8(to_underflow);
@ -488,10 +501,10 @@ void ToPS2FPU_Full(int reg, int flags, int absreg)
x86SetJ8(end3);
}
void ToPS2FPU(int reg, int flags, int absreg)
void ToPS2FPU(int reg, bool flags, int absreg, bool acc)
{
if (FPU_RESULT)
ToPS2FPU_Full(reg, flags, absreg);
ToPS2FPU_Full(reg, flags, absreg, acc);
else
{
SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); //clamp
@ -530,9 +543,7 @@ void SetMaxValue(int regd)
#define ALLOC_ACC(areg) { areg = _allocTempXMMreg(XMMT_FPS, -1); GET_ACC(areg); }
//doesn't hurt to clear flags even if not emulated
#define CLEAR_OU_FLAGS { if (FPU_FLAGS_OVERFLOW || FPU_FLAGS_UNDERFLOW) \
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); }
#define CLEAR_OU_FLAGS { AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO | FPUflagU)); }
//------------------------------------------------------------------
@ -638,7 +649,7 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = {
SSE2_ADDSD_XMM_to_XMM, SSE2_MULSD_XMM_to_XMM, SSE2_MAXSD_XMM_to_XMM, SSE2_MINSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM };
void recFPUOp(int info, int regd, int op)
void recFPUOp(int info, int regd, int op, bool acc)
{
int sreg, treg;
ALLOC_S(sreg); ALLOC_T(treg);
@ -648,11 +659,9 @@ void recFPUOp(int info, int regd, int op)
ToDouble(sreg); ToDouble(treg);
CLEAR_OU_FLAGS;
recFPUOpXMM_to_XMM[op](sreg, treg);
ToPS2FPU(sreg, 1, treg);
ToPS2FPU(sreg, true, treg, acc);
SSE_MOVSS_XMM_to_XMM(regd, sreg);
_freeXMMreg(sreg); _freeXMMreg(treg);
@ -665,14 +674,14 @@ void recFPUOp(int info, int regd, int op)
//------------------------------------------------------------------
void recADD_S_xmm(int info)
{
recFPUOp(info, EEREC_D, 0);
recFPUOp(info, EEREC_D, 0, false);
}
FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
void recADDA_S_xmm(int info)
{
recFPUOp(info, EEREC_ACC, 0);
recFPUOp(info, EEREC_ACC, 0, true);
}
FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
@ -681,7 +690,7 @@ FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
//------------------------------------------------------------------
// BC1x XMM
//------------------------------------------------------------------
/*
static void _setupBranchTest()
{
_eeFlushAllUnused();
@ -716,7 +725,7 @@ void recBC1TL( void )
{
_setupBranchTest();
recDoBranchImm_Likely(JZ32(0));
}
}*/
//------------------------------------------------------------------
//TOKNOW : how does C.??.S behave with denormals?
@ -747,13 +756,11 @@ void recC_EQ_xmm(int info)
}
FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT);
//REC_FPUFUNC(C_EQ);
void recC_F()
/*void recC_F()
{
AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC );
}
//REC_FPUFUNC(C_F);
}*/
void recC_LE_xmm(int info )
{
@ -878,7 +885,7 @@ void recDIVhelper1(int regd, int regt) // Sets flags
SSE2_DIVSD_XMM_to_XMM(regd, regt);
ToPS2FPU(regd, 0, regt);
ToPS2FPU(regd, false, regt, false);
x86SetJ32(bjmp32);
@ -892,7 +899,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
SSE2_DIVSD_XMM_to_XMM(regd, regt);
ToPS2FPU(regd, 0, regt);
ToPS2FPU(regd, false, regt, false);
}
void recDIV_S_xmm(int info)
@ -934,147 +941,79 @@ FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
//------------------------------------------------------------------
// MADD/MSUB XMM
//------------------------------------------------------------------
// currently : flags might not be set too correctly (underflows)
void recMaddsub(int info, int regd, int op)
// Unlike what the documentation implies, it seems that MADD/MSUB support all numbers just like other operations
// The complex overflow conditions the document describes apparently test whether the multiplication's result
// has overflowed and whether the last operation that used ACC as a destination has overflowed.
// For example, { adda.s -MAX, 0.0 ; madd.s fd, MAX, 1.0 } -> fd = 0
// while { adda.s -MAX, -MAX ; madd.s fd, MAX, 1.0 } -> fd = -MAX
// (where MAX is 0x7fffffff and -MAX is 0xffffffff)
void recMaddsub(int info, int regd, int op, bool acc)
{
if (FPU_CLAMPISH_MADD_MSUB)
{
int sreg, treg;
ALLOC_S(sreg); ALLOC_T(treg);
ToDouble(sreg); ToDouble(treg);
//CLEAR_OU_FLAGS; //no point, done later again
SSE2_MULSD_XMM_to_XMM(sreg, treg);
ToPS2FPU(sreg, 1, treg);
CLEAR_OU_FLAGS; //again
GET_ACC(treg);
if (FPU_ADD_SUB_HACK) //ADD or SUB
FPU_ADD_SUB(treg, sreg); //might be problematic for something!!!!
// TEST RESULT AND ACCUMULATOR FOR "INFINITIES", RETURN CORRECTLY-SIGNED "INFINITY" IF NEEDED.
// OTHERWISE, CONVERT TO DOUBLE (NO NEED FOR SPECIAL CONVERSION)
SSE_UCOMISS_M32_to_XMM(sreg, (uptr)&pos_inf); //sets ZF if equal or uncomparable
u8 *mulovf = JE8(0);
SSE_UCOMISS_M32_to_XMM(sreg, (uptr)&neg_inf);
u8 *mulovf2 = JE8(0);
SSE2_CVTSS2SD_XMM_to_XMM(sreg, sreg); //else, simply convert
SSE_UCOMISS_M32_to_XMM(treg, (uptr)&pos_inf); //sets ZF if equal or uncomparable
u8 *accovf = JE8(0);
SSE_UCOMISS_M32_to_XMM(treg, (uptr)&neg_inf);
u8 *accovf2 = JE8(0);
SSE2_CVTSS2SD_XMM_to_XMM(treg, treg); //else, simply convert
u8 *operation = JMP8(0);
x86SetJ8(mulovf);
x86SetJ8(mulovf2);
if (op == 1) //sub
SSE_XORPS_M128_to_XMM(sreg, (uptr)&s_neg);
SSE_MOVAPS_XMM_to_XMM(treg, sreg);
SetMaxValue(treg); //clamp and continue to set ovf flag below
x86SetJ8(accovf);
x86SetJ8(accovf2);
if (FPU_FLAGS_OVERFLOW) //do not SetMaxValue (maybe...)
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO | FPUflagSO);
u32 *skipall = JMP32(0);
x86SetJ8(operation);
// PERFORM THE ACCUMULATION AND TEST RESULT. CONVERT TO SINGLE (DIFFERENT THAN USUAL)
if (op == 1)
SSE2_SUBSD_XMM_to_XMM(treg, sreg);
else
SSE2_ADDSD_XMM_to_XMM(treg, sreg);
//test for overflow and underflow (different than usual)
int absreg = sreg;
SSE_MOVAPS_XMM_to_XMM(absreg, treg);
SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&dbl_s_pos);
SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_cvt_overflow);
u8 *to_overflow = JAE8(0);
SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_underflow);
u8 *to_underflow = JB8(0);
SSE2_CVTSD2SS_XMM_to_XMM(treg, treg); //simply convert
u8 *end = JMP8(0);
x86SetJ8(to_overflow);
SSE2_CVTSD2SS_XMM_to_XMM(treg, treg);
SetMaxValue(treg);
if (FPU_FLAGS_OVERFLOW)
OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagO | FPUflagSO));
u8 *end2 = JMP8(0);
x86SetJ8(to_underflow);
if (FPU_FLAGS_UNDERFLOW) //set underflow flags if not zero
{
SSE2_XORPD_XMM_to_XMM(absreg, absreg);
SSE2_UCOMISD_XMM_to_XMM(treg, absreg);
u8 *is_zero = JE8(0);
OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagU | FPUflagSU));
x86SetJ8(is_zero);
}
SSE2_CVTSD2SS_XMM_to_XMM(treg, treg);
SSE_ANDPS_M128_to_XMM(treg, (uptr)&s_neg); //flush to zero
x86SetJ8(end);
x86SetJ8(end2);
x86SetJ32(skipall);
SSE_MOVSS_XMM_to_XMM(regd, treg);
_freeXMMreg(sreg); _freeXMMreg(treg);
}
int sreg, treg;
ALLOC_S(sreg); ALLOC_T(treg);
ToDouble(sreg); ToDouble(treg);
SSE2_MULSD_XMM_to_XMM(sreg, treg);
ToPS2FPU(sreg, true, treg, false);
GET_ACC(treg);
if (FPU_ADD_SUB_HACK) //ADD or SUB
FPU_ADD_SUB(treg, sreg); //might be problematic for something!!!!
// TEST FOR ACC/MUL OVERFLOWS, PROPOGATE THEM IF THEY OCCUR
TEST32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO);
u8 *mulovf = JNZ8(0);
ToDouble(sreg); //else, convert
TEST32ItoM((uptr)&fpuRegs.ACCflag, 1);
u8 *accovf = JNZ8(0);
ToDouble(treg); //else, convert
u8 *operation = JMP8(0);
x86SetJ8(mulovf);
if (op == 1) //sub
SSE_XORPS_M128_to_XMM(sreg, (uptr)&s_neg);
SSE_MOVAPS_XMM_to_XMM(treg, sreg); //fall through below
x86SetJ8(accovf);
SetMaxValue(treg); //just in case... I think it has to be a MaxValue already here
CLEAR_OU_FLAGS; //clear U flag
if (FPU_FLAGS_OVERFLOW)
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO | FPUflagSO);
if (FPU_FLAGS_OVERFLOW && acc)
OR32ItoM((uptr)&fpuRegs.ACCflag, 1);
u32 *skipall = JMP32(0);
// PERFORM THE ACCUMULATION AND TEST RESULT. CONVERT TO SINGLE
x86SetJ8(operation);
if (op == 1)
SSE2_SUBSD_XMM_to_XMM(treg, sreg);
else
{
int sreg, treg;
ALLOC_S(sreg); ALLOC_T(treg);
ToDouble(sreg); ToDouble(treg);
CLEAR_OU_FLAGS;
SSE2_MULSD_XMM_to_XMM(sreg, treg);
GET_ACC(treg); ToDouble(treg);
if (op == 1)
SSE2_SUBSD_XMM_to_XMM(treg, sreg);
else
SSE2_ADDSD_XMM_to_XMM(treg, sreg);
ToPS2FPU(treg, 1, sreg);
SSE_MOVSS_XMM_to_XMM(regd, treg);
_freeXMMreg(sreg); _freeXMMreg(treg);
}
SSE2_ADDSD_XMM_to_XMM(treg, sreg);
ToPS2FPU(treg, true, sreg, acc);
x86SetJ32(skipall);
SSE_MOVSS_XMM_to_XMM(regd, treg);
_freeXMMreg(sreg); _freeXMMreg(treg);
}
void recMADD_S_xmm(int info)
{
recMaddsub(info, EEREC_D, 0);
recMaddsub(info, EEREC_D, 0, false);
}
FPURECOMPILE_CONSTCODE(MADD_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
void recMADDA_S_xmm(int info)
{
recMaddsub(info, EEREC_ACC, 0);
recMaddsub(info, EEREC_ACC, 0, true);
}
FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
@ -1088,14 +1027,14 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X
//TOKNOW : handles denormals like VU, maybe?
void recMAX_S_xmm(int info)
{
recFPUOp(info, EEREC_D, 2);
recFPUOp(info, EEREC_D, 2, false);
}
FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
void recMIN_S_xmm(int info)
{
recFPUOp(info, EEREC_D, 3);
recFPUOp(info, EEREC_D, 3, false);
}
FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
@ -1120,33 +1059,32 @@ FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS);
void recMSUB_S_xmm(int info)
{
recMaddsub(info, EEREC_D, 1);
recMaddsub(info, EEREC_D, 1, false);
}
FPURECOMPILE_CONSTCODE(MSUB_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
void recMSUBA_S_xmm(int info)
{
recMaddsub(info, EEREC_ACC, 1);
recMaddsub(info, EEREC_ACC, 1, true);
}
FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
//------------------------------------------------------------------
//------------------------------------------------------------------
// MUL XMM
//------------------------------------------------------------------
void recMUL_S_xmm(int info)
{
recFPUOp(info, EEREC_D, 1);
recFPUOp(info, EEREC_D, 1, false);
}
FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
void recMULA_S_xmm(int info)
{
recFPUOp(info, EEREC_ACC, 1);
recFPUOp(info, EEREC_ACC, 1, true);
}
FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
@ -1175,7 +1113,7 @@ FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS);
void recSUB_S_xmm(int info)
{
recFPUOp(info, EEREC_D, 4);
recFPUOp(info, EEREC_D, 4, false);
}
FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
@ -1183,7 +1121,7 @@ FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
void recSUBA_S_xmm(int info)
{
recFPUOp(info, EEREC_ACC, 4);
recFPUOp(info, EEREC_ACC, 4, true);
}
FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
@ -1243,7 +1181,7 @@ void recSQRT_S_xmm(int info)
SSE2_SQRTSD_XMM_to_XMM(EEREC_D, EEREC_D);
ToPS2FPU(EEREC_D, 0, t1reg);
ToPS2FPU(EEREC_D, false, t1reg, false);
x86SetJ32(pjmpx);
@ -1278,7 +1216,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
AND32ItoR(tempReg, 1); //Check sign (if regt == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags (even when 0/0)
SSE_XORPS_XMM_to_XMM(regd, regt); // Make regd Positive or Negative
SetMaxValue(regd); //clamp to max
pjmp32 = JMP32(0);
@ -1297,7 +1235,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
SSE2_SQRTSD_XMM_to_XMM(regt, regt);
SSE2_DIVSD_XMM_to_XMM(regd, regt);
ToPS2FPU(regd, 0, regt);
ToPS2FPU(regd, false, regt, false);
x86SetJ32(pjmp32);
_freeXMMreg(t1reg);
@ -1313,7 +1251,7 @@ void recRSQRThelper2(int regd, int regt) // Preforms the RSQRT function when reg
SSE2_SQRTSD_XMM_to_XMM(regt, regt);
SSE2_DIVSD_XMM_to_XMM(regd, regt);
ToPS2FPU(regd, 0, regt);
ToPS2FPU(regd, false, regt, false);
}
void recRSQRT_S_xmm(int info)

View File

@ -53,8 +53,6 @@ uptr psxhwLUT[0x10000];
#define MAPBASE 0x48000000
#define RECMEM_SIZE (8*1024*1024)
#define PSX_MEMMASK 0x5fffffff // mask when comparing two pcs
// R3000A statics
int psxreclog = 0;
@ -572,7 +570,11 @@ void recResetIOP()
for (int i = 0; i < 0x10000; i++)
recLUT_SetPage(psxRecLUT, 0, 0, 0, i, 0);
// IOP knows 64k pages, hence for the 0x10000's
// The bottom 2 bits of PC are always zero, so we <<14 to "compress"
// the pc indexer into it's lower common denominator.
// We're only mapping 20 pages here in 4 places.
// 0x80 comes from : (Ps2MemSize::IopRam / 0x10000) * 4
for (int i=0; i<0x80; i++)
@ -619,54 +621,6 @@ static void recShutdown()
#pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code
/*
static __forceinline void R3000AExecute()
{
BASEBLOCK* pblock;
pblock = PSX_GETBLOCK(psxRegs.pc);
if ( !pblock->GetFnptr() || (pblock->GetStartPC()&PSX_MEMMASK) != (psxRegs.pc&PSX_MEMMASK) ) {
psxRecRecompile(psxRegs.pc);
}
assert( pblock->GetFnptr() != 0 );
#ifdef _DEBUG
fnptr = (u8*)pblock->GetFnptr();
#ifdef _MSC_VER
__asm {
// save data
mov oldesi, esi;
mov s_uSaveESP, esp;
sub s_uSaveESP, 8;
push ebp;
call fnptr; // jump into function
// restore data
pop ebp;
mov esi, oldesi;
}
#else // linux
__asm__("movl %%esi, %0\n"
"movl %%esp, %1\n"
"sub $8, %%esp\n"
"push %%ebp\n"
"call *%2\n"
"pop %%ebp\n"
"movl %0, %%esi\n" : "=m"(oldesi), "=m"(s_uSaveESP) : "c"(fnptr) : );
#endif // _MSC_VER
#else
((R3000AFNPTR)pblock->GetFnptr())();
#endif
}*/
u32 g_psxlastpc = 0;
#ifdef _MSC_VER