mirror of https://github.com/PCSX2/pcsx2.git
Nneeve patch for "Full" FPU, improves some clamping and adds missing values to the savestate info.
Savestates: Pcsx2 now errors when it encounters a savestate made by a newer version of pcsx2. Added a new SafeList type to the SafeArray collection (not well tested yet). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@701 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
801d71f7f0
commit
97cdf187e5
|
@ -165,8 +165,6 @@ extern s32 psxCycleEE; // tracks IOP's current sych status with the EE
|
|||
|
||||
#define _JumpTarget_ ((_Target_ << 2) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction
|
||||
#define _BranchTarget_ (((s32)(s16)_Imm_ * 4) + _PC_) // Calculates the target during a branch instruction
|
||||
//#define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction
|
||||
//#define _BranchTarget_ ((short)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction
|
||||
|
||||
#define _SetLink(x) psxRegs.GPR.r[x] = _PC_ + 4; // Sets the return address in the link register
|
||||
|
||||
|
|
|
@ -135,6 +135,7 @@ struct fpuRegisters {
|
|||
FPRreg fpr[32]; // 32bit floating point registers
|
||||
u32 fprc[32]; // 32bit floating point control registers
|
||||
FPRreg ACC; // 32 bit accumulator
|
||||
u32 ACCflag; // an internal accumulator overflow flag
|
||||
};
|
||||
|
||||
struct tlbs
|
||||
|
|
|
@ -67,10 +67,10 @@ extern void pcsx2_aligned_free(void* pmem);
|
|||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Handy little class for allocating a resizable memory block, complete with
|
||||
// exception-based error handling and automatic cleanup.
|
||||
|
||||
//
|
||||
template< typename T >
|
||||
class SafeArray : public NoncopyableObject
|
||||
{
|
||||
|
@ -86,11 +86,12 @@ protected:
|
|||
int m_size; // size of the allocation of memory
|
||||
|
||||
const static std::string m_str_Unnamed;
|
||||
|
||||
protected:
|
||||
// Internal contructor for use by derrived classes. This allws a derrived class to
|
||||
// Internal constructor for use by derived classes. This allows a derived class to
|
||||
// use its own memory allocation (with an aligned memory, for example).
|
||||
// Throws:
|
||||
// Exception::OutOfMemory if the allocated_mem pointr is NULL.
|
||||
// Exception::OutOfMemory if the allocated_mem pointer is NULL.
|
||||
explicit SafeArray( const std::string& name, T* allocated_mem, int initSize ) :
|
||||
Name( name )
|
||||
, ChunkSize( DefaultChunkSize )
|
||||
|
@ -139,8 +140,6 @@ public:
|
|||
// amount requested. The memory allocation is not resized smaller.
|
||||
void MakeRoomFor( int blockSize )
|
||||
{
|
||||
std::string temp;
|
||||
|
||||
if( blockSize > m_size )
|
||||
{
|
||||
const uint newalloc = blockSize + ChunkSize;
|
||||
|
@ -158,12 +157,12 @@ public:
|
|||
}
|
||||
|
||||
// Gets a pointer to the requested allocation index.
|
||||
// DevBuilds : Throws std::out_of_range() if the index is invalid.
|
||||
// DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid.
|
||||
T *GetPtr( uint idx=0 ) { return _getPtr( idx ); }
|
||||
const T *GetPtr( uint idx=0 ) const { return _getPtr( idx ); }
|
||||
|
||||
// Gets an element of this memory allocation much as if it were an array.
|
||||
// DevBuilds : Throws std::out_of_range() if the index is invalid.
|
||||
// DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid.
|
||||
T& operator[]( int idx ) { return *_getPtr( (uint)idx ); }
|
||||
const T& operator[]( int idx ) const { return *_getPtr( (uint)idx ); }
|
||||
|
||||
|
@ -180,7 +179,7 @@ protected:
|
|||
// Performance Considerations: This function adds quite a bit of overhead
|
||||
// to array indexing and thus should be done infrequently if used in
|
||||
// time-critical situations. Instead of using it from inside loops, cache
|
||||
// the pointer into a local variable and use stad (unsafe) C indexes.
|
||||
// the pointer into a local variable and use std (unsafe) C indexes.
|
||||
T* _getPtr( uint i ) const
|
||||
{
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
|
@ -199,7 +198,138 @@ protected:
|
|||
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
//
|
||||
template< typename T >
|
||||
class SafeList : public SafeArray<T>
|
||||
{
|
||||
public:
|
||||
static const int DefaultChunkSize = 0x80 * sizeof(T);
|
||||
|
||||
public:
|
||||
const std::string Name; // user-assigned block name
|
||||
int ChunkSize; // assigned DefaultChunkSize on init, reconfigurable at any time.
|
||||
|
||||
protected:
|
||||
T* m_ptr;
|
||||
int m_allocsize; // size of the allocation of memory
|
||||
uint m_length; // length of the array (active items, not buffer allocation)
|
||||
|
||||
const static std::string m_str_Unnamed;
|
||||
|
||||
protected:
|
||||
void _boundsCheck( uint i )
|
||||
{
|
||||
if( IsDevBuild && i >= (uint)m_length )
|
||||
{
|
||||
assert( 0 ); // makes debugging easier sometimes. :)
|
||||
throw Exception::IndexBoundsFault(
|
||||
"Index out of bounds on SafeArray: " + Name +
|
||||
" (index=" + to_string(i) +
|
||||
", length=" + to_string(m_length) + ")"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
virtual ~SafeList()
|
||||
{
|
||||
}
|
||||
|
||||
explicit SafeList( const std::string& name="Unnamed" ) :
|
||||
Name( name )
|
||||
, ChunkSize( DefaultChunkSize )
|
||||
, m_ptr( NULL )
|
||||
, m_allocsize( 0 )
|
||||
, m_length( 0 )
|
||||
{
|
||||
}
|
||||
|
||||
explicit SafeList( int initialSize, const std::string& name="Unnamed" ) :
|
||||
Name( name )
|
||||
, ChunkSize( DefaultChunkSize )
|
||||
, m_ptr( (T*)malloc( initialSize * sizeof(T) ) )
|
||||
, m_allocsize( initialSize )
|
||||
, m_length( 0 )
|
||||
{
|
||||
if( m_ptr == NULL )
|
||||
throw Exception::OutOfMemory();
|
||||
}
|
||||
|
||||
// Returns the size of the list, as according to the array type. This includes
|
||||
// mapped items only. The actual size of the allocation may differ.
|
||||
int GetLength() const { return m_length; }
|
||||
|
||||
// Returns the size of the list, in bytes. This includes mapped items only.
|
||||
// The actual size of the allocation may differ.
|
||||
int GetSizeInBytes() const { return m_length * sizeof(T); }
|
||||
|
||||
// Ensures that the allocation is large enough to fit data of the
|
||||
// amount requested. The memory allocation is not resized smaller.
|
||||
void MakeRoomFor( int blockSize )
|
||||
{
|
||||
if( blockSize > m_allocsize )
|
||||
{
|
||||
const uint newalloc = blockSize + ChunkSize;
|
||||
m_ptr = _virtual_realloc( newalloc );
|
||||
if( m_ptr == NULL )
|
||||
{
|
||||
throw Exception::OutOfMemory(
|
||||
"Out-of-memory on list re-allocation. "
|
||||
"Old size: " + to_string( m_allocsize ) + " bytes, "
|
||||
"New size: " + to_string( newalloc ) + " bytes"
|
||||
);
|
||||
}
|
||||
m_allocsize = newalloc;
|
||||
}
|
||||
}
|
||||
|
||||
// Gets an element of this memory allocation much as if it were an array.
|
||||
// DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid.
|
||||
T& operator[]( int idx ) { return *_getPtr( (uint)idx ); }
|
||||
const T& operator[]( int idx ) const { return *_getPtr( (uint)idx ); }
|
||||
|
||||
int Add( const T& src )
|
||||
{
|
||||
MakeRoomFor( m_length + 1 );
|
||||
m_ptr[m_length] = src;
|
||||
return m_length++;
|
||||
}
|
||||
|
||||
// Performs a standard array-copy removal of the given item. All items past the
|
||||
// given item are copied over. Throws Exception::IndexBoundsFault() if the index
|
||||
// is invalid (devbuilds only)
|
||||
void Remove( int index )
|
||||
{
|
||||
_boundsCheck( index );
|
||||
int copylen = m_length - index;
|
||||
if( copylen > 0 )
|
||||
memcpy_fast( &m_ptr[index], &m_ptr[index+1], copylen );
|
||||
}
|
||||
|
||||
virtual SafeList<T>* Clone() const
|
||||
{
|
||||
SafeList<T>* retval = new SafeList<T>( m_length );
|
||||
memcpy_fast( retval->GetPtr(), m_ptr, sizeof(T) * m_length );
|
||||
return retval;
|
||||
}
|
||||
|
||||
protected:
|
||||
// A safe array index fetcher. Throws an exception if the array index
|
||||
// is outside the bounds of the array.
|
||||
// Performance Considerations: This function adds quite a bit of overhead
|
||||
// to array indexing and thus should be done infrequently if used in
|
||||
// time-critical situations. Instead of using it from inside loops, cache
|
||||
// the pointer into a local variable and use std (unsafe) C indexes.
|
||||
T* _getPtr( uint i ) const
|
||||
{
|
||||
_boundsCheck( i );
|
||||
return &m_ptr[i];
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Handy little class for allocating a resizable memory block, complete with
|
||||
// exception-based error handling and automatic cleanup.
|
||||
// This one supports aligned data allocations too!
|
||||
|
@ -214,7 +344,7 @@ protected:
|
|||
}
|
||||
|
||||
// Appends "(align: xx)" to the name of the allocation in devel builds.
|
||||
// Maybe useful,maybe not... no harm in atatching it. :D
|
||||
// Maybe useful,maybe not... no harm in attaching it. :D
|
||||
string _getName( const string& src )
|
||||
{
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
|
|
|
@ -92,7 +92,14 @@ void SaveState::FreezeAll()
|
|||
|
||||
Freeze(cpuRegs); // cpu regs + COP0
|
||||
Freeze(psxRegs); // iop regs
|
||||
Freeze(fpuRegs); // fpu regs
|
||||
if (GetVersion() >= 0x6)
|
||||
Freeze(fpuRegs);
|
||||
else
|
||||
{
|
||||
// Old versiosn didn't save the ACCflags...
|
||||
FreezeLegacy(fpuRegs, sizeof(u32)); // fpu regs
|
||||
fpuRegs.ACCflag = 0;
|
||||
}
|
||||
Freeze(tlb); // tlbs
|
||||
|
||||
Freeze(EEsCycle);
|
||||
|
@ -190,15 +197,12 @@ gzLoadingState::gzLoadingState( const string& filename ) :
|
|||
|
||||
gzread( m_file, &m_version, 4 );
|
||||
|
||||
if( m_version != g_SaveVersion )
|
||||
if( m_version < g_SaveVersion )
|
||||
{
|
||||
if( ( m_version >> 16 ) == 0x7a30 )
|
||||
{
|
||||
Console::Error(
|
||||
"Savestate load aborted:\n"
|
||||
"\tVTLB edition cannot safely load savestates created by the VM edition." );
|
||||
throw Exception::UnsupportedStateVersion( m_version );
|
||||
}
|
||||
Console::Error(
|
||||
"Savestate load aborted:\n"
|
||||
"\tThe savestate was created with a newer version of Pcsx2. I don't know how to load it!" );
|
||||
throw Exception::UnsupportedStateVersion( m_version );
|
||||
}
|
||||
|
||||
_testCdvdCrc();
|
||||
|
|
|
@ -27,14 +27,14 @@
|
|||
// Savestate Versioning!
|
||||
// If you make changes to the savestate version, please increment the value below.
|
||||
|
||||
static const u32 g_SaveVersion = 0x8b400005;
|
||||
static const u32 g_SaveVersion = 0x8b400006;
|
||||
|
||||
// this function is meant to be sued in the place of GSfreeze, and provides a safe layer
|
||||
// this function is meant to be used in the place of GSfreeze, and provides a safe layer
|
||||
// between the GS saving function and the MTGS's needs. :)
|
||||
extern s32 CALLBACK gsSafeFreeze( int mode, freezeData *data );
|
||||
|
||||
// This class provides the base API for both loading and saving savestates.
|
||||
// Normally you'll want to use one of the four "functional" derrived classes rather
|
||||
// Normally you'll want to use one of the four "functional" derived classes rather
|
||||
// than this class directly: gzLoadingState, gzSavingState (gzipped disk-saved
|
||||
// states), and memLoadingState, memSavingState (uncompressed memory states).
|
||||
class SaveState
|
||||
|
|
|
@ -71,6 +71,7 @@ void recC_EQ_xmm(int info);
|
|||
void recC_LE_xmm(int info);
|
||||
void recC_LT_xmm(int info);
|
||||
void recCVT_S_xmm(int info);
|
||||
void recCVT_W();
|
||||
void recDIV_S_xmm(int info);
|
||||
void recMADD_S_xmm(int info);
|
||||
void recMADDA_S_xmm(int info);
|
||||
|
@ -149,6 +150,12 @@ void recCFC1(void)
|
|||
MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] );
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
|
||||
if (_Fs_ == 31)
|
||||
{
|
||||
AND32ItoR(EAX, 0x0083c078); //remove always-zero bits
|
||||
OR32ItoR(EAX, 0x01000001); //set always-one bits
|
||||
}
|
||||
|
||||
if(EEINST_ISLIVE1(_Rt_))
|
||||
{
|
||||
CDQ( );
|
||||
|
@ -952,6 +959,12 @@ FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS);
|
|||
|
||||
void recCVT_W()
|
||||
{
|
||||
if (CHECK_FPU_FULL)
|
||||
{
|
||||
DOUBLE::recCVT_W();
|
||||
return;
|
||||
}
|
||||
|
||||
int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
|
||||
|
||||
if( regs >= 0 )
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
#include "ix86/ix86.h"
|
||||
#include "iR5900.h"
|
||||
#include "iFPU.h"
|
||||
|
||||
/* Version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */
|
||||
|
||||
//set overflow flag (set only if FPU_RESULT is 1)
|
||||
#define FPU_FLAGS_OVERFLOW 1
|
||||
|
@ -33,9 +35,6 @@
|
|||
//but can cause problems due to insuffecient clamping levels in the VUs)
|
||||
#define FPU_RESULT 1
|
||||
|
||||
//should be more correct when 1. see madd/msub documentation
|
||||
#define FPU_CLAMPISH_MADD_MSUB 1
|
||||
|
||||
//also impacts other aspects of DIV/R/SQRT correctness
|
||||
#define FPU_FLAGS_ID 1
|
||||
|
||||
|
@ -138,6 +137,12 @@ void recCFC1(void)
|
|||
|
||||
MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] );
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
|
||||
if (_Fs_ == 31)
|
||||
{
|
||||
AND32ItoR(EAX, 0x0083c078); //remove always-zero bits
|
||||
OR32ItoR(EAX, 0x01000001); //set always-one bits
|
||||
}
|
||||
|
||||
if(EEINST_ISLIVE1(_Rt_))
|
||||
{
|
||||
|
@ -150,6 +155,7 @@ void recCFC1(void)
|
|||
EEINST_RESETHASLIVE1(_Rt_);
|
||||
MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void recCTC1( void )
|
||||
|
@ -439,8 +445,13 @@ void ToDouble(int reg)
|
|||
// converts really large normal numbers to PS2 signed max
|
||||
// converts really small normal numbers to zero (flush)
|
||||
// doesn't handle inf/nan/denormal
|
||||
void ToPS2FPU_Full(int reg, int flags, int absreg)
|
||||
void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc)
|
||||
{
|
||||
if (flags)
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO | FPUflagU));
|
||||
if (flags && acc)
|
||||
AND32ItoM((uptr)&fpuRegs.ACCflag, ~1);
|
||||
|
||||
SSE_MOVAPS_XMM_to_XMM(absreg, reg);
|
||||
SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&dbl_s_pos);
|
||||
|
||||
|
@ -467,6 +478,8 @@ void ToPS2FPU_Full(int reg, int flags, int absreg)
|
|||
SSE_ORPS_M128_to_XMM(reg, (uptr)&s_pos); //clamp
|
||||
if (flags && FPU_FLAGS_OVERFLOW)
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagO | FPUflagSO));
|
||||
if (flags && FPU_FLAGS_OVERFLOW && acc)
|
||||
OR32ItoM((uptr)&fpuRegs.ACCflag, 1);
|
||||
u8 *end3 = JMP8(0);
|
||||
|
||||
x86SetJ8(to_underflow);
|
||||
|
@ -488,10 +501,10 @@ void ToPS2FPU_Full(int reg, int flags, int absreg)
|
|||
x86SetJ8(end3);
|
||||
}
|
||||
|
||||
void ToPS2FPU(int reg, int flags, int absreg)
|
||||
void ToPS2FPU(int reg, bool flags, int absreg, bool acc)
|
||||
{
|
||||
if (FPU_RESULT)
|
||||
ToPS2FPU_Full(reg, flags, absreg);
|
||||
ToPS2FPU_Full(reg, flags, absreg, acc);
|
||||
else
|
||||
{
|
||||
SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); //clamp
|
||||
|
@ -530,9 +543,7 @@ void SetMaxValue(int regd)
|
|||
|
||||
#define ALLOC_ACC(areg) { areg = _allocTempXMMreg(XMMT_FPS, -1); GET_ACC(areg); }
|
||||
|
||||
//doesn't hurt to clear flags even if not emulated
|
||||
#define CLEAR_OU_FLAGS { if (FPU_FLAGS_OVERFLOW || FPU_FLAGS_UNDERFLOW) \
|
||||
AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); }
|
||||
#define CLEAR_OU_FLAGS { AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO | FPUflagU)); }
|
||||
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -638,7 +649,7 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
|
|||
static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = {
|
||||
SSE2_ADDSD_XMM_to_XMM, SSE2_MULSD_XMM_to_XMM, SSE2_MAXSD_XMM_to_XMM, SSE2_MINSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM };
|
||||
|
||||
void recFPUOp(int info, int regd, int op)
|
||||
void recFPUOp(int info, int regd, int op, bool acc)
|
||||
{
|
||||
int sreg, treg;
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
|
@ -648,11 +659,9 @@ void recFPUOp(int info, int regd, int op)
|
|||
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
|
||||
CLEAR_OU_FLAGS;
|
||||
|
||||
recFPUOpXMM_to_XMM[op](sreg, treg);
|
||||
|
||||
ToPS2FPU(sreg, 1, treg);
|
||||
ToPS2FPU(sreg, true, treg, acc);
|
||||
SSE_MOVSS_XMM_to_XMM(regd, sreg);
|
||||
|
||||
_freeXMMreg(sreg); _freeXMMreg(treg);
|
||||
|
@ -665,14 +674,14 @@ void recFPUOp(int info, int regd, int op)
|
|||
//------------------------------------------------------------------
|
||||
void recADD_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_D, 0);
|
||||
recFPUOp(info, EEREC_D, 0, false);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
||||
void recADDA_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_ACC, 0);
|
||||
recFPUOp(info, EEREC_ACC, 0, true);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
|
||||
|
@ -681,7 +690,7 @@ FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
|
|||
//------------------------------------------------------------------
|
||||
// BC1x XMM
|
||||
//------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
static void _setupBranchTest()
|
||||
{
|
||||
_eeFlushAllUnused();
|
||||
|
@ -716,7 +725,7 @@ void recBC1TL( void )
|
|||
{
|
||||
_setupBranchTest();
|
||||
recDoBranchImm_Likely(JZ32(0));
|
||||
}
|
||||
}*/
|
||||
//------------------------------------------------------------------
|
||||
|
||||
//TOKNOW : how does C.??.S behave with denormals?
|
||||
|
@ -747,13 +756,11 @@ void recC_EQ_xmm(int info)
|
|||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT);
|
||||
//REC_FPUFUNC(C_EQ);
|
||||
|
||||
void recC_F()
|
||||
/*void recC_F()
|
||||
{
|
||||
AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC );
|
||||
}
|
||||
//REC_FPUFUNC(C_F);
|
||||
}*/
|
||||
|
||||
void recC_LE_xmm(int info )
|
||||
{
|
||||
|
@ -878,7 +885,7 @@ void recDIVhelper1(int regd, int regt) // Sets flags
|
|||
|
||||
SSE2_DIVSD_XMM_to_XMM(regd, regt);
|
||||
|
||||
ToPS2FPU(regd, 0, regt);
|
||||
ToPS2FPU(regd, false, regt, false);
|
||||
|
||||
x86SetJ32(bjmp32);
|
||||
|
||||
|
@ -892,7 +899,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags
|
|||
|
||||
SSE2_DIVSD_XMM_to_XMM(regd, regt);
|
||||
|
||||
ToPS2FPU(regd, 0, regt);
|
||||
ToPS2FPU(regd, false, regt, false);
|
||||
}
|
||||
|
||||
void recDIV_S_xmm(int info)
|
||||
|
@ -934,147 +941,79 @@ FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
|||
//------------------------------------------------------------------
|
||||
// MADD/MSUB XMM
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// currently : flags might not be set too correctly (underflows)
|
||||
void recMaddsub(int info, int regd, int op)
|
||||
|
||||
// Unlike what the documentation implies, it seems that MADD/MSUB support all numbers just like other operations
|
||||
// The complex overflow conditions the document describes apparently test whether the multiplication's result
|
||||
// has overflowed and whether the last operation that used ACC as a destination has overflowed.
|
||||
// For example, { adda.s -MAX, 0.0 ; madd.s fd, MAX, 1.0 } -> fd = 0
|
||||
// while { adda.s -MAX, -MAX ; madd.s fd, MAX, 1.0 } -> fd = -MAX
|
||||
// (where MAX is 0x7fffffff and -MAX is 0xffffffff)
|
||||
void recMaddsub(int info, int regd, int op, bool acc)
|
||||
{
|
||||
if (FPU_CLAMPISH_MADD_MSUB)
|
||||
{
|
||||
int sreg, treg;
|
||||
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
|
||||
//CLEAR_OU_FLAGS; //no point, done later again
|
||||
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
|
||||
ToPS2FPU(sreg, 1, treg);
|
||||
CLEAR_OU_FLAGS; //again
|
||||
|
||||
GET_ACC(treg);
|
||||
|
||||
if (FPU_ADD_SUB_HACK) //ADD or SUB
|
||||
FPU_ADD_SUB(treg, sreg); //might be problematic for something!!!!
|
||||
|
||||
|
||||
// TEST RESULT AND ACCUMULATOR FOR "INFINITIES", RETURN CORRECTLY-SIGNED "INFINITY" IF NEEDED.
|
||||
// OTHERWISE, CONVERT TO DOUBLE (NO NEED FOR SPECIAL CONVERSION)
|
||||
|
||||
SSE_UCOMISS_M32_to_XMM(sreg, (uptr)&pos_inf); //sets ZF if equal or uncomparable
|
||||
u8 *mulovf = JE8(0);
|
||||
SSE_UCOMISS_M32_to_XMM(sreg, (uptr)&neg_inf);
|
||||
u8 *mulovf2 = JE8(0);
|
||||
SSE2_CVTSS2SD_XMM_to_XMM(sreg, sreg); //else, simply convert
|
||||
|
||||
SSE_UCOMISS_M32_to_XMM(treg, (uptr)&pos_inf); //sets ZF if equal or uncomparable
|
||||
u8 *accovf = JE8(0);
|
||||
SSE_UCOMISS_M32_to_XMM(treg, (uptr)&neg_inf);
|
||||
u8 *accovf2 = JE8(0);
|
||||
SSE2_CVTSS2SD_XMM_to_XMM(treg, treg); //else, simply convert
|
||||
u8 *operation = JMP8(0);
|
||||
|
||||
x86SetJ8(mulovf);
|
||||
x86SetJ8(mulovf2);
|
||||
if (op == 1) //sub
|
||||
SSE_XORPS_M128_to_XMM(sreg, (uptr)&s_neg);
|
||||
SSE_MOVAPS_XMM_to_XMM(treg, sreg);
|
||||
SetMaxValue(treg); //clamp and continue to set ovf flag below
|
||||
|
||||
x86SetJ8(accovf);
|
||||
x86SetJ8(accovf2);
|
||||
if (FPU_FLAGS_OVERFLOW) //do not SetMaxValue (maybe...)
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO | FPUflagSO);
|
||||
u32 *skipall = JMP32(0);
|
||||
|
||||
x86SetJ8(operation);
|
||||
|
||||
|
||||
// PERFORM THE ACCUMULATION AND TEST RESULT. CONVERT TO SINGLE (DIFFERENT THAN USUAL)
|
||||
|
||||
if (op == 1)
|
||||
SSE2_SUBSD_XMM_to_XMM(treg, sreg);
|
||||
else
|
||||
SSE2_ADDSD_XMM_to_XMM(treg, sreg);
|
||||
|
||||
//test for overflow and underflow (different than usual)
|
||||
int absreg = sreg;
|
||||
|
||||
SSE_MOVAPS_XMM_to_XMM(absreg, treg);
|
||||
SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&dbl_s_pos);
|
||||
|
||||
SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_cvt_overflow);
|
||||
u8 *to_overflow = JAE8(0);
|
||||
|
||||
SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_underflow);
|
||||
u8 *to_underflow = JB8(0);
|
||||
|
||||
SSE2_CVTSD2SS_XMM_to_XMM(treg, treg); //simply convert
|
||||
u8 *end = JMP8(0);
|
||||
|
||||
x86SetJ8(to_overflow);
|
||||
SSE2_CVTSD2SS_XMM_to_XMM(treg, treg);
|
||||
SetMaxValue(treg);
|
||||
if (FPU_FLAGS_OVERFLOW)
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagO | FPUflagSO));
|
||||
u8 *end2 = JMP8(0);
|
||||
|
||||
x86SetJ8(to_underflow);
|
||||
if (FPU_FLAGS_UNDERFLOW) //set underflow flags if not zero
|
||||
{
|
||||
SSE2_XORPD_XMM_to_XMM(absreg, absreg);
|
||||
SSE2_UCOMISD_XMM_to_XMM(treg, absreg);
|
||||
u8 *is_zero = JE8(0);
|
||||
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagU | FPUflagSU));
|
||||
|
||||
x86SetJ8(is_zero);
|
||||
}
|
||||
SSE2_CVTSD2SS_XMM_to_XMM(treg, treg);
|
||||
SSE_ANDPS_M128_to_XMM(treg, (uptr)&s_neg); //flush to zero
|
||||
|
||||
x86SetJ8(end);
|
||||
x86SetJ8(end2);
|
||||
x86SetJ32(skipall);
|
||||
|
||||
SSE_MOVSS_XMM_to_XMM(regd, treg);
|
||||
|
||||
_freeXMMreg(sreg); _freeXMMreg(treg);
|
||||
}
|
||||
int sreg, treg;
|
||||
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
|
||||
ToPS2FPU(sreg, true, treg, false);
|
||||
GET_ACC(treg);
|
||||
|
||||
if (FPU_ADD_SUB_HACK) //ADD or SUB
|
||||
FPU_ADD_SUB(treg, sreg); //might be problematic for something!!!!
|
||||
|
||||
// TEST FOR ACC/MUL OVERFLOWS, PROPOGATE THEM IF THEY OCCUR
|
||||
|
||||
TEST32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO);
|
||||
u8 *mulovf = JNZ8(0);
|
||||
ToDouble(sreg); //else, convert
|
||||
|
||||
TEST32ItoM((uptr)&fpuRegs.ACCflag, 1);
|
||||
u8 *accovf = JNZ8(0);
|
||||
ToDouble(treg); //else, convert
|
||||
u8 *operation = JMP8(0);
|
||||
|
||||
x86SetJ8(mulovf);
|
||||
if (op == 1) //sub
|
||||
SSE_XORPS_M128_to_XMM(sreg, (uptr)&s_neg);
|
||||
SSE_MOVAPS_XMM_to_XMM(treg, sreg); //fall through below
|
||||
|
||||
x86SetJ8(accovf);
|
||||
SetMaxValue(treg); //just in case... I think it has to be a MaxValue already here
|
||||
CLEAR_OU_FLAGS; //clear U flag
|
||||
if (FPU_FLAGS_OVERFLOW)
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO | FPUflagSO);
|
||||
if (FPU_FLAGS_OVERFLOW && acc)
|
||||
OR32ItoM((uptr)&fpuRegs.ACCflag, 1);
|
||||
u32 *skipall = JMP32(0);
|
||||
|
||||
// PERFORM THE ACCUMULATION AND TEST RESULT. CONVERT TO SINGLE
|
||||
|
||||
x86SetJ8(operation);
|
||||
if (op == 1)
|
||||
SSE2_SUBSD_XMM_to_XMM(treg, sreg);
|
||||
else
|
||||
{
|
||||
int sreg, treg;
|
||||
ALLOC_S(sreg); ALLOC_T(treg);
|
||||
ToDouble(sreg); ToDouble(treg);
|
||||
|
||||
CLEAR_OU_FLAGS;
|
||||
|
||||
SSE2_MULSD_XMM_to_XMM(sreg, treg);
|
||||
|
||||
GET_ACC(treg); ToDouble(treg);
|
||||
|
||||
if (op == 1)
|
||||
SSE2_SUBSD_XMM_to_XMM(treg, sreg);
|
||||
else
|
||||
SSE2_ADDSD_XMM_to_XMM(treg, sreg);
|
||||
|
||||
ToPS2FPU(treg, 1, sreg);
|
||||
SSE_MOVSS_XMM_to_XMM(regd, treg);
|
||||
|
||||
_freeXMMreg(sreg); _freeXMMreg(treg);
|
||||
}
|
||||
SSE2_ADDSD_XMM_to_XMM(treg, sreg);
|
||||
|
||||
ToPS2FPU(treg, true, sreg, acc);
|
||||
x86SetJ32(skipall);
|
||||
|
||||
SSE_MOVSS_XMM_to_XMM(regd, treg);
|
||||
|
||||
_freeXMMreg(sreg); _freeXMMreg(treg);
|
||||
}
|
||||
|
||||
void recMADD_S_xmm(int info)
|
||||
{
|
||||
recMaddsub(info, EEREC_D, 0);
|
||||
recMaddsub(info, EEREC_D, 0, false);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MADD_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
|
||||
|
||||
void recMADDA_S_xmm(int info)
|
||||
{
|
||||
recMaddsub(info, EEREC_ACC, 0);
|
||||
recMaddsub(info, EEREC_ACC, 0, true);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
|
||||
|
@ -1088,14 +1027,14 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X
|
|||
//TOKNOW : handles denormals like VU, maybe?
|
||||
void recMAX_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_D, 2);
|
||||
recFPUOp(info, EEREC_D, 2, false);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
||||
void recMIN_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_D, 3);
|
||||
recFPUOp(info, EEREC_D, 3, false);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
@ -1120,33 +1059,32 @@ FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS);
|
|||
|
||||
void recMSUB_S_xmm(int info)
|
||||
{
|
||||
recMaddsub(info, EEREC_D, 1);
|
||||
recMaddsub(info, EEREC_D, 1, false);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MSUB_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
|
||||
|
||||
void recMSUBA_S_xmm(int info)
|
||||
{
|
||||
recMaddsub(info, EEREC_ACC, 1);
|
||||
recMaddsub(info, EEREC_ACC, 1, true);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT);
|
||||
//------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// MUL XMM
|
||||
//------------------------------------------------------------------
|
||||
void recMUL_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_D, 1);
|
||||
recFPUOp(info, EEREC_D, 1, false);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
||||
void recMULA_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_ACC, 1);
|
||||
recFPUOp(info, EEREC_ACC, 1, true);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
|
||||
|
@ -1175,7 +1113,7 @@ FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS);
|
|||
|
||||
void recSUB_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_D, 4);
|
||||
recFPUOp(info, EEREC_D, 4, false);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
||||
|
@ -1183,7 +1121,7 @@ FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
|
|||
|
||||
void recSUBA_S_xmm(int info)
|
||||
{
|
||||
recFPUOp(info, EEREC_ACC, 4);
|
||||
recFPUOp(info, EEREC_ACC, 4, true);
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);
|
||||
|
@ -1243,7 +1181,7 @@ void recSQRT_S_xmm(int info)
|
|||
|
||||
SSE2_SQRTSD_XMM_to_XMM(EEREC_D, EEREC_D);
|
||||
|
||||
ToPS2FPU(EEREC_D, 0, t1reg);
|
||||
ToPS2FPU(EEREC_D, false, t1reg, false);
|
||||
|
||||
x86SetJ32(pjmpx);
|
||||
|
||||
|
@ -1278,7 +1216,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg);
|
||||
AND32ItoR(tempReg, 1); //Check sign (if regt == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags
|
||||
OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags (even when 0/0)
|
||||
SSE_XORPS_XMM_to_XMM(regd, regt); // Make regd Positive or Negative
|
||||
SetMaxValue(regd); //clamp to max
|
||||
pjmp32 = JMP32(0);
|
||||
|
@ -1297,7 +1235,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
SSE2_SQRTSD_XMM_to_XMM(regt, regt);
|
||||
SSE2_DIVSD_XMM_to_XMM(regd, regt);
|
||||
|
||||
ToPS2FPU(regd, 0, regt);
|
||||
ToPS2FPU(regd, false, regt, false);
|
||||
x86SetJ32(pjmp32);
|
||||
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -1313,7 +1251,7 @@ void recRSQRThelper2(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
SSE2_SQRTSD_XMM_to_XMM(regt, regt);
|
||||
SSE2_DIVSD_XMM_to_XMM(regd, regt);
|
||||
|
||||
ToPS2FPU(regd, 0, regt);
|
||||
ToPS2FPU(regd, false, regt, false);
|
||||
}
|
||||
|
||||
void recRSQRT_S_xmm(int info)
|
||||
|
|
|
@ -53,8 +53,6 @@ uptr psxhwLUT[0x10000];
|
|||
#define MAPBASE 0x48000000
|
||||
#define RECMEM_SIZE (8*1024*1024)
|
||||
|
||||
#define PSX_MEMMASK 0x5fffffff // mask when comparing two pcs
|
||||
|
||||
// R3000A statics
|
||||
int psxreclog = 0;
|
||||
|
||||
|
@ -572,7 +570,11 @@ void recResetIOP()
|
|||
|
||||
for (int i = 0; i < 0x10000; i++)
|
||||
recLUT_SetPage(psxRecLUT, 0, 0, 0, i, 0);
|
||||
|
||||
// IOP knows 64k pages, hence for the 0x10000's
|
||||
|
||||
// The bottom 2 bits of PC are always zero, so we <<14 to "compress"
|
||||
// the pc indexer into it's lower common denominator.
|
||||
|
||||
// We're only mapping 20 pages here in 4 places.
|
||||
// 0x80 comes from : (Ps2MemSize::IopRam / 0x10000) * 4
|
||||
for (int i=0; i<0x80; i++)
|
||||
|
@ -619,54 +621,6 @@ static void recShutdown()
|
|||
|
||||
#pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code
|
||||
|
||||
/*
|
||||
static __forceinline void R3000AExecute()
|
||||
{
|
||||
BASEBLOCK* pblock;
|
||||
|
||||
pblock = PSX_GETBLOCK(psxRegs.pc);
|
||||
|
||||
if ( !pblock->GetFnptr() || (pblock->GetStartPC()&PSX_MEMMASK) != (psxRegs.pc&PSX_MEMMASK) ) {
|
||||
psxRecRecompile(psxRegs.pc);
|
||||
}
|
||||
|
||||
assert( pblock->GetFnptr() != 0 );
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
fnptr = (u8*)pblock->GetFnptr();
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
__asm {
|
||||
// save data
|
||||
mov oldesi, esi;
|
||||
mov s_uSaveESP, esp;
|
||||
sub s_uSaveESP, 8;
|
||||
push ebp;
|
||||
|
||||
call fnptr; // jump into function
|
||||
// restore data
|
||||
pop ebp;
|
||||
mov esi, oldesi;
|
||||
}
|
||||
|
||||
#else // linux
|
||||
|
||||
__asm__("movl %%esi, %0\n"
|
||||
"movl %%esp, %1\n"
|
||||
"sub $8, %%esp\n"
|
||||
"push %%ebp\n"
|
||||
"call *%2\n"
|
||||
"pop %%ebp\n"
|
||||
"movl %0, %%esi\n" : "=m"(oldesi), "=m"(s_uSaveESP) : "c"(fnptr) : );
|
||||
#endif // _MSC_VER
|
||||
|
||||
#else
|
||||
((R3000AFNPTR)pblock->GetFnptr())();
|
||||
#endif
|
||||
}*/
|
||||
|
||||
u32 g_psxlastpc = 0;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
|
Loading…
Reference in New Issue