diff --git a/pcsx2/R3000A.h b/pcsx2/R3000A.h index 333c3d60d9..2c6f1cadcd 100644 --- a/pcsx2/R3000A.h +++ b/pcsx2/R3000A.h @@ -165,8 +165,6 @@ extern s32 psxCycleEE; // tracks IOP's current sych status with the EE #define _JumpTarget_ ((_Target_ << 2) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction #define _BranchTarget_ (((s32)(s16)_Imm_ * 4) + _PC_) // Calculates the target during a branch instruction -//#define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction -//#define _BranchTarget_ ((short)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction #define _SetLink(x) psxRegs.GPR.r[x] = _PC_ + 4; // Sets the return address in the link register diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index 9c14139a94..0117b1d8af 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -135,6 +135,7 @@ struct fpuRegisters { FPRreg fpr[32]; // 32bit floating point registers u32 fprc[32]; // 32bit floating point control registers FPRreg ACC; // 32 bit accumulator + u32 ACCflag; // an internal accumulator overflow flag }; struct tlbs diff --git a/pcsx2/SafeArray.h b/pcsx2/SafeArray.h index cd893f5ed3..11f156f3c6 100644 --- a/pcsx2/SafeArray.h +++ b/pcsx2/SafeArray.h @@ -67,10 +67,10 @@ extern void pcsx2_aligned_free(void* pmem); } -////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// // Handy little class for allocating a resizable memory block, complete with // exception-based error handling and automatic cleanup. - +// template< typename T > class SafeArray : public NoncopyableObject { @@ -86,11 +86,12 @@ protected: int m_size; // size of the allocation of memory const static std::string m_str_Unnamed; + protected: - // Internal contructor for use by derrived classes. This allws a derrived class to + // Internal constructor for use by derived classes. This allows a derived class to // use its own memory allocation (with an aligned memory, for example). // Throws: - // Exception::OutOfMemory if the allocated_mem pointr is NULL. + // Exception::OutOfMemory if the allocated_mem pointer is NULL. explicit SafeArray( const std::string& name, T* allocated_mem, int initSize ) : Name( name ) , ChunkSize( DefaultChunkSize ) @@ -139,8 +140,6 @@ public: // amount requested. The memory allocation is not resized smaller. void MakeRoomFor( int blockSize ) { - std::string temp; - if( blockSize > m_size ) { const uint newalloc = blockSize + ChunkSize; @@ -158,12 +157,12 @@ public: } // Gets a pointer to the requested allocation index. - // DevBuilds : Throws std::out_of_range() if the index is invalid. + // DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid. T *GetPtr( uint idx=0 ) { return _getPtr( idx ); } const T *GetPtr( uint idx=0 ) const { return _getPtr( idx ); } // Gets an element of this memory allocation much as if it were an array. - // DevBuilds : Throws std::out_of_range() if the index is invalid. + // DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid. T& operator[]( int idx ) { return *_getPtr( (uint)idx ); } const T& operator[]( int idx ) const { return *_getPtr( (uint)idx ); } @@ -180,7 +179,7 @@ protected: // Performance Considerations: This function adds quite a bit of overhead // to array indexing and thus should be done infrequently if used in // time-critical situations. Instead of using it from inside loops, cache - // the pointer into a local variable and use stad (unsafe) C indexes. + // the pointer into a local variable and use std (unsafe) C indexes. T* _getPtr( uint i ) const { #ifdef PCSX2_DEVBUILD @@ -199,7 +198,138 @@ protected: }; -////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// +// +// +template< typename T > +class SafeList : public SafeArray +{ +public: + static const int DefaultChunkSize = 0x80 * sizeof(T); + +public: + const std::string Name; // user-assigned block name + int ChunkSize; // assigned DefaultChunkSize on init, reconfigurable at any time. + +protected: + T* m_ptr; + int m_allocsize; // size of the allocation of memory + uint m_length; // length of the array (active items, not buffer allocation) + + const static std::string m_str_Unnamed; + +protected: + void _boundsCheck( uint i ) + { + if( IsDevBuild && i >= (uint)m_length ) + { + assert( 0 ); // makes debugging easier sometimes. :) + throw Exception::IndexBoundsFault( + "Index out of bounds on SafeArray: " + Name + + " (index=" + to_string(i) + + ", length=" + to_string(m_length) + ")" + ); + } + } + +public: + virtual ~SafeList() + { + } + + explicit SafeList( const std::string& name="Unnamed" ) : + Name( name ) + , ChunkSize( DefaultChunkSize ) + , m_ptr( NULL ) + , m_allocsize( 0 ) + , m_length( 0 ) + { + } + + explicit SafeList( int initialSize, const std::string& name="Unnamed" ) : + Name( name ) + , ChunkSize( DefaultChunkSize ) + , m_ptr( (T*)malloc( initialSize * sizeof(T) ) ) + , m_allocsize( initialSize ) + , m_length( 0 ) + { + if( m_ptr == NULL ) + throw Exception::OutOfMemory(); + } + + // Returns the size of the list, as according to the array type. This includes + // mapped items only. The actual size of the allocation may differ. + int GetLength() const { return m_length; } + + // Returns the size of the list, in bytes. This includes mapped items only. + // The actual size of the allocation may differ. + int GetSizeInBytes() const { return m_length * sizeof(T); } + + // Ensures that the allocation is large enough to fit data of the + // amount requested. The memory allocation is not resized smaller. + void MakeRoomFor( int blockSize ) + { + if( blockSize > m_allocsize ) + { + const uint newalloc = blockSize + ChunkSize; + m_ptr = _virtual_realloc( newalloc ); + if( m_ptr == NULL ) + { + throw Exception::OutOfMemory( + "Out-of-memory on list re-allocation. " + "Old size: " + to_string( m_allocsize ) + " bytes, " + "New size: " + to_string( newalloc ) + " bytes" + ); + } + m_allocsize = newalloc; + } + } + + // Gets an element of this memory allocation much as if it were an array. + // DevBuilds : Throws Exception::IndexBoundsFault() if the index is invalid. + T& operator[]( int idx ) { return *_getPtr( (uint)idx ); } + const T& operator[]( int idx ) const { return *_getPtr( (uint)idx ); } + + int Add( const T& src ) + { + MakeRoomFor( m_length + 1 ); + m_ptr[m_length] = src; + return m_length++; + } + + // Performs a standard array-copy removal of the given item. All items past the + // given item are copied over. Throws Exception::IndexBoundsFault() if the index + // is invalid (devbuilds only) + void Remove( int index ) + { + _boundsCheck( index ); + int copylen = m_length - index; + if( copylen > 0 ) + memcpy_fast( &m_ptr[index], &m_ptr[index+1], copylen ); + } + + virtual SafeList* Clone() const + { + SafeList* retval = new SafeList( m_length ); + memcpy_fast( retval->GetPtr(), m_ptr, sizeof(T) * m_length ); + return retval; + } + +protected: + // A safe array index fetcher. Throws an exception if the array index + // is outside the bounds of the array. + // Performance Considerations: This function adds quite a bit of overhead + // to array indexing and thus should be done infrequently if used in + // time-critical situations. Instead of using it from inside loops, cache + // the pointer into a local variable and use std (unsafe) C indexes. + T* _getPtr( uint i ) const + { + _boundsCheck( i ); + return &m_ptr[i]; + } +}; + +////////////////////////////////////////////////////////////////////////////////////////// // Handy little class for allocating a resizable memory block, complete with // exception-based error handling and automatic cleanup. // This one supports aligned data allocations too! @@ -214,7 +344,7 @@ protected: } // Appends "(align: xx)" to the name of the allocation in devel builds. - // Maybe useful,maybe not... no harm in atatching it. :D + // Maybe useful,maybe not... no harm in attaching it. :D string _getName( const string& src ) { #ifdef PCSX2_DEVBUILD diff --git a/pcsx2/SaveState.cpp b/pcsx2/SaveState.cpp index 5af572ef15..73b0e83193 100644 --- a/pcsx2/SaveState.cpp +++ b/pcsx2/SaveState.cpp @@ -92,7 +92,14 @@ void SaveState::FreezeAll() Freeze(cpuRegs); // cpu regs + COP0 Freeze(psxRegs); // iop regs - Freeze(fpuRegs); // fpu regs + if (GetVersion() >= 0x6) + Freeze(fpuRegs); + else + { + // Old versiosn didn't save the ACCflags... + FreezeLegacy(fpuRegs, sizeof(u32)); // fpu regs + fpuRegs.ACCflag = 0; + } Freeze(tlb); // tlbs Freeze(EEsCycle); @@ -190,15 +197,12 @@ gzLoadingState::gzLoadingState( const string& filename ) : gzread( m_file, &m_version, 4 ); - if( m_version != g_SaveVersion ) + if( m_version < g_SaveVersion ) { - if( ( m_version >> 16 ) == 0x7a30 ) - { - Console::Error( - "Savestate load aborted:\n" - "\tVTLB edition cannot safely load savestates created by the VM edition." ); - throw Exception::UnsupportedStateVersion( m_version ); - } + Console::Error( + "Savestate load aborted:\n" + "\tThe savestate was created with a newer version of Pcsx2. I don't know how to load it!" ); + throw Exception::UnsupportedStateVersion( m_version ); } _testCdvdCrc(); diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index ac26d1f086..0a4bc57927 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -27,14 +27,14 @@ // Savestate Versioning! // If you make changes to the savestate version, please increment the value below. -static const u32 g_SaveVersion = 0x8b400005; +static const u32 g_SaveVersion = 0x8b400006; -// this function is meant to be sued in the place of GSfreeze, and provides a safe layer +// this function is meant to be used in the place of GSfreeze, and provides a safe layer // between the GS saving function and the MTGS's needs. :) extern s32 CALLBACK gsSafeFreeze( int mode, freezeData *data ); // This class provides the base API for both loading and saving savestates. -// Normally you'll want to use one of the four "functional" derrived classes rather +// Normally you'll want to use one of the four "functional" derived classes rather // than this class directly: gzLoadingState, gzSavingState (gzipped disk-saved // states), and memLoadingState, memSavingState (uncompressed memory states). class SaveState diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index d48167be46..4623989b62 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -71,6 +71,7 @@ void recC_EQ_xmm(int info); void recC_LE_xmm(int info); void recC_LT_xmm(int info); void recCVT_S_xmm(int info); +void recCVT_W(); void recDIV_S_xmm(int info); void recMADD_S_xmm(int info); void recMADDA_S_xmm(int info); @@ -149,6 +150,12 @@ void recCFC1(void) MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] ); _deleteEEreg(_Rt_, 0); + if (_Fs_ == 31) + { + AND32ItoR(EAX, 0x0083c078); //remove always-zero bits + OR32ItoR(EAX, 0x01000001); //set always-one bits + } + if(EEINST_ISLIVE1(_Rt_)) { CDQ( ); @@ -952,6 +959,12 @@ FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS); void recCVT_W() { + if (CHECK_FPU_FULL) + { + DOUBLE::recCVT_W(); + return; + } + int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); if( regs >= 0 ) diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index 86f52f919e..d563881610 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -23,6 +23,8 @@ #include "ix86/ix86.h" #include "iR5900.h" #include "iFPU.h" + +/* Version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */ //set overflow flag (set only if FPU_RESULT is 1) #define FPU_FLAGS_OVERFLOW 1 @@ -33,9 +35,6 @@ //but can cause problems due to insuffecient clamping levels in the VUs) #define FPU_RESULT 1 -//should be more correct when 1. see madd/msub documentation -#define FPU_CLAMPISH_MADD_MSUB 1 - //also impacts other aspects of DIV/R/SQRT correctness #define FPU_FLAGS_ID 1 @@ -138,6 +137,12 @@ void recCFC1(void) MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] ); _deleteEEreg(_Rt_, 0); + + if (_Fs_ == 31) + { + AND32ItoR(EAX, 0x0083c078); //remove always-zero bits + OR32ItoR(EAX, 0x01000001); //set always-one bits + } if(EEINST_ISLIVE1(_Rt_)) { @@ -150,6 +155,7 @@ void recCFC1(void) EEINST_RESETHASLIVE1(_Rt_); MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); } + } void recCTC1( void ) @@ -439,8 +445,13 @@ void ToDouble(int reg) // converts really large normal numbers to PS2 signed max // converts really small normal numbers to zero (flush) // doesn't handle inf/nan/denormal -void ToPS2FPU_Full(int reg, int flags, int absreg) +void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc) { + if (flags) + AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO | FPUflagU)); + if (flags && acc) + AND32ItoM((uptr)&fpuRegs.ACCflag, ~1); + SSE_MOVAPS_XMM_to_XMM(absreg, reg); SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&dbl_s_pos); @@ -467,6 +478,8 @@ void ToPS2FPU_Full(int reg, int flags, int absreg) SSE_ORPS_M128_to_XMM(reg, (uptr)&s_pos); //clamp if (flags && FPU_FLAGS_OVERFLOW) OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagO | FPUflagSO)); + if (flags && FPU_FLAGS_OVERFLOW && acc) + OR32ItoM((uptr)&fpuRegs.ACCflag, 1); u8 *end3 = JMP8(0); x86SetJ8(to_underflow); @@ -488,10 +501,10 @@ void ToPS2FPU_Full(int reg, int flags, int absreg) x86SetJ8(end3); } -void ToPS2FPU(int reg, int flags, int absreg) +void ToPS2FPU(int reg, bool flags, int absreg, bool acc) { if (FPU_RESULT) - ToPS2FPU_Full(reg, flags, absreg); + ToPS2FPU_Full(reg, flags, absreg, acc); else { SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); //clamp @@ -530,9 +543,7 @@ void SetMaxValue(int regd) #define ALLOC_ACC(areg) { areg = _allocTempXMMreg(XMMT_FPS, -1); GET_ACC(areg); } -//doesn't hurt to clear flags even if not emulated -#define CLEAR_OU_FLAGS { if (FPU_FLAGS_OVERFLOW || FPU_FLAGS_UNDERFLOW) \ - AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); } +#define CLEAR_OU_FLAGS { AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO | FPUflagU)); } //------------------------------------------------------------------ @@ -638,7 +649,7 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = { SSE2_ADDSD_XMM_to_XMM, SSE2_MULSD_XMM_to_XMM, SSE2_MAXSD_XMM_to_XMM, SSE2_MINSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM }; -void recFPUOp(int info, int regd, int op) +void recFPUOp(int info, int regd, int op, bool acc) { int sreg, treg; ALLOC_S(sreg); ALLOC_T(treg); @@ -648,11 +659,9 @@ void recFPUOp(int info, int regd, int op) ToDouble(sreg); ToDouble(treg); - CLEAR_OU_FLAGS; - recFPUOpXMM_to_XMM[op](sreg, treg); - ToPS2FPU(sreg, 1, treg); + ToPS2FPU(sreg, true, treg, acc); SSE_MOVSS_XMM_to_XMM(regd, sreg); _freeXMMreg(sreg); _freeXMMreg(treg); @@ -665,14 +674,14 @@ void recFPUOp(int info, int regd, int op) //------------------------------------------------------------------ void recADD_S_xmm(int info) { - recFPUOp(info, EEREC_D, 0); + recFPUOp(info, EEREC_D, 0, false); } FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); void recADDA_S_xmm(int info) { - recFPUOp(info, EEREC_ACC, 0); + recFPUOp(info, EEREC_ACC, 0, true); } FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); @@ -681,7 +690,7 @@ FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); //------------------------------------------------------------------ // BC1x XMM //------------------------------------------------------------------ - + /* static void _setupBranchTest() { _eeFlushAllUnused(); @@ -716,7 +725,7 @@ void recBC1TL( void ) { _setupBranchTest(); recDoBranchImm_Likely(JZ32(0)); -} +}*/ //------------------------------------------------------------------ //TOKNOW : how does C.??.S behave with denormals? @@ -747,13 +756,11 @@ void recC_EQ_xmm(int info) } FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT); -//REC_FPUFUNC(C_EQ); -void recC_F() +/*void recC_F() { AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); -} -//REC_FPUFUNC(C_F); +}*/ void recC_LE_xmm(int info ) { @@ -878,7 +885,7 @@ void recDIVhelper1(int regd, int regt) // Sets flags SSE2_DIVSD_XMM_to_XMM(regd, regt); - ToPS2FPU(regd, 0, regt); + ToPS2FPU(regd, false, regt, false); x86SetJ32(bjmp32); @@ -892,7 +899,7 @@ void recDIVhelper2(int regd, int regt) // Doesn't sets flags SSE2_DIVSD_XMM_to_XMM(regd, regt); - ToPS2FPU(regd, 0, regt); + ToPS2FPU(regd, false, regt, false); } void recDIV_S_xmm(int info) @@ -934,147 +941,79 @@ FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); //------------------------------------------------------------------ // MADD/MSUB XMM //------------------------------------------------------------------ - -// currently : flags might not be set too correctly (underflows) -void recMaddsub(int info, int regd, int op) + +// Unlike what the documentation implies, it seems that MADD/MSUB support all numbers just like other operations +// The complex overflow conditions the document describes apparently test whether the multiplication's result +// has overflowed and whether the last operation that used ACC as a destination has overflowed. +// For example, { adda.s -MAX, 0.0 ; madd.s fd, MAX, 1.0 } -> fd = 0 +// while { adda.s -MAX, -MAX ; madd.s fd, MAX, 1.0 } -> fd = -MAX +// (where MAX is 0x7fffffff and -MAX is 0xffffffff) +void recMaddsub(int info, int regd, int op, bool acc) { - if (FPU_CLAMPISH_MADD_MSUB) - { - int sreg, treg; - - ALLOC_S(sreg); ALLOC_T(treg); - ToDouble(sreg); ToDouble(treg); - - //CLEAR_OU_FLAGS; //no point, done later again - - SSE2_MULSD_XMM_to_XMM(sreg, treg); - - ToPS2FPU(sreg, 1, treg); - CLEAR_OU_FLAGS; //again - - GET_ACC(treg); - - if (FPU_ADD_SUB_HACK) //ADD or SUB - FPU_ADD_SUB(treg, sreg); //might be problematic for something!!!! - - - // TEST RESULT AND ACCUMULATOR FOR "INFINITIES", RETURN CORRECTLY-SIGNED "INFINITY" IF NEEDED. - // OTHERWISE, CONVERT TO DOUBLE (NO NEED FOR SPECIAL CONVERSION) - - SSE_UCOMISS_M32_to_XMM(sreg, (uptr)&pos_inf); //sets ZF if equal or uncomparable - u8 *mulovf = JE8(0); - SSE_UCOMISS_M32_to_XMM(sreg, (uptr)&neg_inf); - u8 *mulovf2 = JE8(0); - SSE2_CVTSS2SD_XMM_to_XMM(sreg, sreg); //else, simply convert - - SSE_UCOMISS_M32_to_XMM(treg, (uptr)&pos_inf); //sets ZF if equal or uncomparable - u8 *accovf = JE8(0); - SSE_UCOMISS_M32_to_XMM(treg, (uptr)&neg_inf); - u8 *accovf2 = JE8(0); - SSE2_CVTSS2SD_XMM_to_XMM(treg, treg); //else, simply convert - u8 *operation = JMP8(0); - - x86SetJ8(mulovf); - x86SetJ8(mulovf2); - if (op == 1) //sub - SSE_XORPS_M128_to_XMM(sreg, (uptr)&s_neg); - SSE_MOVAPS_XMM_to_XMM(treg, sreg); - SetMaxValue(treg); //clamp and continue to set ovf flag below - - x86SetJ8(accovf); - x86SetJ8(accovf2); - if (FPU_FLAGS_OVERFLOW) //do not SetMaxValue (maybe...) - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO | FPUflagSO); - u32 *skipall = JMP32(0); - - x86SetJ8(operation); - - - // PERFORM THE ACCUMULATION AND TEST RESULT. CONVERT TO SINGLE (DIFFERENT THAN USUAL) - - if (op == 1) - SSE2_SUBSD_XMM_to_XMM(treg, sreg); - else - SSE2_ADDSD_XMM_to_XMM(treg, sreg); - - //test for overflow and underflow (different than usual) - int absreg = sreg; - - SSE_MOVAPS_XMM_to_XMM(absreg, treg); - SSE2_ANDPD_M128_to_XMM(absreg, (uptr)&dbl_s_pos); - - SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_cvt_overflow); - u8 *to_overflow = JAE8(0); - - SSE2_UCOMISD_M64_to_XMM(absreg, (uptr)&dbl_underflow); - u8 *to_underflow = JB8(0); - - SSE2_CVTSD2SS_XMM_to_XMM(treg, treg); //simply convert - u8 *end = JMP8(0); - - x86SetJ8(to_overflow); - SSE2_CVTSD2SS_XMM_to_XMM(treg, treg); - SetMaxValue(treg); - if (FPU_FLAGS_OVERFLOW) - OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagO | FPUflagSO)); - u8 *end2 = JMP8(0); - - x86SetJ8(to_underflow); - if (FPU_FLAGS_UNDERFLOW) //set underflow flags if not zero - { - SSE2_XORPD_XMM_to_XMM(absreg, absreg); - SSE2_UCOMISD_XMM_to_XMM(treg, absreg); - u8 *is_zero = JE8(0); - - OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagU | FPUflagSU)); - - x86SetJ8(is_zero); - } - SSE2_CVTSD2SS_XMM_to_XMM(treg, treg); - SSE_ANDPS_M128_to_XMM(treg, (uptr)&s_neg); //flush to zero - - x86SetJ8(end); - x86SetJ8(end2); - x86SetJ32(skipall); - - SSE_MOVSS_XMM_to_XMM(regd, treg); - - _freeXMMreg(sreg); _freeXMMreg(treg); - } + int sreg, treg; + + ALLOC_S(sreg); ALLOC_T(treg); + ToDouble(sreg); ToDouble(treg); + + SSE2_MULSD_XMM_to_XMM(sreg, treg); + + ToPS2FPU(sreg, true, treg, false); + GET_ACC(treg); + + if (FPU_ADD_SUB_HACK) //ADD or SUB + FPU_ADD_SUB(treg, sreg); //might be problematic for something!!!! + + // TEST FOR ACC/MUL OVERFLOWS, PROPOGATE THEM IF THEY OCCUR + + TEST32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO); + u8 *mulovf = JNZ8(0); + ToDouble(sreg); //else, convert + + TEST32ItoM((uptr)&fpuRegs.ACCflag, 1); + u8 *accovf = JNZ8(0); + ToDouble(treg); //else, convert + u8 *operation = JMP8(0); + + x86SetJ8(mulovf); + if (op == 1) //sub + SSE_XORPS_M128_to_XMM(sreg, (uptr)&s_neg); + SSE_MOVAPS_XMM_to_XMM(treg, sreg); //fall through below + + x86SetJ8(accovf); + SetMaxValue(treg); //just in case... I think it has to be a MaxValue already here + CLEAR_OU_FLAGS; //clear U flag + if (FPU_FLAGS_OVERFLOW) + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagO | FPUflagSO); + if (FPU_FLAGS_OVERFLOW && acc) + OR32ItoM((uptr)&fpuRegs.ACCflag, 1); + u32 *skipall = JMP32(0); + + // PERFORM THE ACCUMULATION AND TEST RESULT. CONVERT TO SINGLE + + x86SetJ8(operation); + if (op == 1) + SSE2_SUBSD_XMM_to_XMM(treg, sreg); else - { - int sreg, treg; - ALLOC_S(sreg); ALLOC_T(treg); - ToDouble(sreg); ToDouble(treg); - - CLEAR_OU_FLAGS; - - SSE2_MULSD_XMM_to_XMM(sreg, treg); - - GET_ACC(treg); ToDouble(treg); - - if (op == 1) - SSE2_SUBSD_XMM_to_XMM(treg, sreg); - else - SSE2_ADDSD_XMM_to_XMM(treg, sreg); - - ToPS2FPU(treg, 1, sreg); - SSE_MOVSS_XMM_to_XMM(regd, treg); - - _freeXMMreg(sreg); _freeXMMreg(treg); - } + SSE2_ADDSD_XMM_to_XMM(treg, sreg); + + ToPS2FPU(treg, true, sreg, acc); + x86SetJ32(skipall); + + SSE_MOVSS_XMM_to_XMM(regd, treg); + + _freeXMMreg(sreg); _freeXMMreg(treg); } void recMADD_S_xmm(int info) { - recMaddsub(info, EEREC_D, 0); + recMaddsub(info, EEREC_D, 0, false); } FPURECOMPILE_CONSTCODE(MADD_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); void recMADDA_S_xmm(int info) { - recMaddsub(info, EEREC_ACC, 0); + recMaddsub(info, EEREC_ACC, 0, true); } FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); @@ -1088,14 +1027,14 @@ FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|X //TOKNOW : handles denormals like VU, maybe? void recMAX_S_xmm(int info) { - recFPUOp(info, EEREC_D, 2); + recFPUOp(info, EEREC_D, 2, false); } FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); void recMIN_S_xmm(int info) { - recFPUOp(info, EEREC_D, 3); + recFPUOp(info, EEREC_D, 3, false); } FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); @@ -1120,33 +1059,32 @@ FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS); void recMSUB_S_xmm(int info) { - recMaddsub(info, EEREC_D, 1); + recMaddsub(info, EEREC_D, 1, false); } FPURECOMPILE_CONSTCODE(MSUB_S, XMMINFO_WRITED|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); void recMSUBA_S_xmm(int info) { - recMaddsub(info, EEREC_ACC, 1); + recMaddsub(info, EEREC_ACC, 1, true); } FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); //------------------------------------------------------------------ - - + //------------------------------------------------------------------ // MUL XMM //------------------------------------------------------------------ void recMUL_S_xmm(int info) { - recFPUOp(info, EEREC_D, 1); + recFPUOp(info, EEREC_D, 1, false); } FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); void recMULA_S_xmm(int info) { - recFPUOp(info, EEREC_ACC, 1); + recFPUOp(info, EEREC_ACC, 1, true); } FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); @@ -1175,7 +1113,7 @@ FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS); void recSUB_S_xmm(int info) { - recFPUOp(info, EEREC_D, 4); + recFPUOp(info, EEREC_D, 4, false); } FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); @@ -1183,7 +1121,7 @@ FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); void recSUBA_S_xmm(int info) { - recFPUOp(info, EEREC_ACC, 4); + recFPUOp(info, EEREC_ACC, 4, true); } FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); @@ -1243,7 +1181,7 @@ void recSQRT_S_xmm(int info) SSE2_SQRTSD_XMM_to_XMM(EEREC_D, EEREC_D); - ToPS2FPU(EEREC_D, 0, t1reg); + ToPS2FPU(EEREC_D, false, t1reg, false); x86SetJ32(pjmpx); @@ -1278,7 +1216,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg); AND32ItoR(tempReg, 1); //Check sign (if regt == zero, sign will be set) pjmp1 = JZ8(0); //Skip if not set - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags (even when 0/0) SSE_XORPS_XMM_to_XMM(regd, regt); // Make regd Positive or Negative SetMaxValue(regd); //clamp to max pjmp32 = JMP32(0); @@ -1297,7 +1235,7 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg SSE2_SQRTSD_XMM_to_XMM(regt, regt); SSE2_DIVSD_XMM_to_XMM(regd, regt); - ToPS2FPU(regd, 0, regt); + ToPS2FPU(regd, false, regt, false); x86SetJ32(pjmp32); _freeXMMreg(t1reg); @@ -1313,7 +1251,7 @@ void recRSQRThelper2(int regd, int regt) // Preforms the RSQRT function when reg SSE2_SQRTSD_XMM_to_XMM(regt, regt); SSE2_DIVSD_XMM_to_XMM(regd, regt); - ToPS2FPU(regd, 0, regt); + ToPS2FPU(regd, false, regt, false); } void recRSQRT_S_xmm(int info) diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 9d3ed2cbd8..aed618e8e8 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -53,8 +53,6 @@ uptr psxhwLUT[0x10000]; #define MAPBASE 0x48000000 #define RECMEM_SIZE (8*1024*1024) -#define PSX_MEMMASK 0x5fffffff // mask when comparing two pcs - // R3000A statics int psxreclog = 0; @@ -572,7 +570,11 @@ void recResetIOP() for (int i = 0; i < 0x10000; i++) recLUT_SetPage(psxRecLUT, 0, 0, 0, i, 0); - + // IOP knows 64k pages, hence for the 0x10000's + + // The bottom 2 bits of PC are always zero, so we <<14 to "compress" + // the pc indexer into it's lower common denominator. + // We're only mapping 20 pages here in 4 places. // 0x80 comes from : (Ps2MemSize::IopRam / 0x10000) * 4 for (int i=0; i<0x80; i++) @@ -619,54 +621,6 @@ static void recShutdown() #pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code -/* -static __forceinline void R3000AExecute() -{ - BASEBLOCK* pblock; - - pblock = PSX_GETBLOCK(psxRegs.pc); - - if ( !pblock->GetFnptr() || (pblock->GetStartPC()&PSX_MEMMASK) != (psxRegs.pc&PSX_MEMMASK) ) { - psxRecRecompile(psxRegs.pc); - } - - assert( pblock->GetFnptr() != 0 ); - -#ifdef _DEBUG - - fnptr = (u8*)pblock->GetFnptr(); - -#ifdef _MSC_VER - - __asm { - // save data - mov oldesi, esi; - mov s_uSaveESP, esp; - sub s_uSaveESP, 8; - push ebp; - - call fnptr; // jump into function - // restore data - pop ebp; - mov esi, oldesi; - } - -#else // linux - - __asm__("movl %%esi, %0\n" - "movl %%esp, %1\n" - "sub $8, %%esp\n" - "push %%ebp\n" - "call *%2\n" - "pop %%ebp\n" - "movl %0, %%esi\n" : "=m"(oldesi), "=m"(s_uSaveESP) : "c"(fnptr) : ); -#endif // _MSC_VER - -#else - ((R3000AFNPTR)pblock->GetFnptr())(); -#endif -}*/ - u32 g_psxlastpc = 0; #ifdef _MSC_VER