From 4b69e809e5dcff6c95b94d138e6f4f2193bc0b32 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Tue, 12 Aug 2008 08:03:07 +0000 Subject: [PATCH] Stuff that was changed: - Fixed up pcsx2_2005.sln to not give errors for missing plugin files. - PCSX2 now exits properly with ESC key and GSdx :p - Updated my VU Skip to the latest version I had. - Added/Modified alot of speed hacks. - Added a few SSE and x86 instructions coded by DiablosOf. - Working on VUs: Added some code that fixes SPS in some games, but runs slow. Turn off this code with the speedhack "Disable Extra Overflow Checks" to get back the speed. - Also regarding the VU's, it seems MGS3 doesn't like something I coded. But I'm not sure what it is yet; hopefully I'll figure out the problem ^^ - Some other minor changes I probably forgot to mention :p git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@6 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/Counters.c | 25 +- pcsx2/FPU.c | 406 ++++++++++--- pcsx2/Misc.c | 32 +- pcsx2/Misc.h | 11 +- pcsx2/windows/CpuDlg.c | 8 +- pcsx2/windows/VCprojects/pcsx2_2005.sln | 72 --- pcsx2/windows/WinMain.c | 63 +- pcsx2/windows/ini.c | 19 +- pcsx2/windows/pcsx2.rc | 33 +- pcsx2/windows/resource.h | 31 +- pcsx2/x86/iFPU.c | 4 +- pcsx2/x86/iR3000A.cpp | 12 +- pcsx2/x86/iVUmicro.c | 777 ++++++++++++++++++++---- pcsx2/x86/iVUmicro.h | 13 +- pcsx2/x86/ix86-32/iR5900-32.c | 34 +- pcsx2/x86/ix86/ix86.c | 154 ++--- 16 files changed, 1168 insertions(+), 526 deletions(-) diff --git a/pcsx2/Counters.c b/pcsx2/Counters.c index f7659791e6..04a9fc286b 100644 --- a/pcsx2/Counters.c +++ b/pcsx2/Counters.c @@ -5,15 +5,15 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -31,7 +31,9 @@ extern u8 psxhblankgate; int hblankend = 0; Counter counters[6]; u32 nextCounter, nextsCounter; +static void (*s_prevExecuteVU0Block)() = NULL; static void (*s_prevExecuteVU1Block)() = NULL; + LARGE_INTEGER lfreq; // its so it doesnt keep triggering an interrupt once its reached its target @@ -126,7 +128,9 @@ void rcntInit() { rcntSet(); assert(Cpu != NULL && Cpu->ExecuteVU1Block != NULL ); + s_prevExecuteVU0Block = Cpu->ExecuteVU0Block; s_prevExecuteVU1Block = Cpu->ExecuteVU1Block; + } // debug code, used for stats @@ -246,8 +250,11 @@ void VSync() // swap the vsync field u32 newfield = (*(u32*)(PS2MEM_GS+0x1000)&0x2000) ? 0 : 0x2000; *(u32*)(PS2MEM_GS+0x1000) = (*(u32*)(PS2MEM_GS+0x1000) & ~(1<<13)) | newfield; + + iFrame++; + // wait until GS stops if( CHECK_MULTIGS ) { GSRingBufSimplePacket(GS_RINGTYPE_VSYNC, newfield, 0, 0); @@ -303,7 +310,10 @@ void VSync() if( CHECK_MULTIGS ) GSRingBufSimplePacket(GS_RINGTYPE_FRAMESKIP, 1, 0, 0); else GSsetFrameSkip(1); if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_VUSKIP ) + { Cpu->ExecuteVU1Block = DummyExecuteVU1Block; + //Cpu->ExecuteVU0Block = NULL; + } bOkayToSkip = noSkipFrames; bKeepSkipping = yesSkipFrames; } @@ -312,8 +322,11 @@ void VSync() { if (bKeepSkipping <= 1) { //first set VU1 to enabled THEN unfreeze GS regs - if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_VUSKIP ) + if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_VUSKIP ) + { Cpu->ExecuteVU1Block = s_prevExecuteVU1Block; + //Cpu->ExecuteVU0Block = s_prevExecuteVU0Block; + } if( CHECK_MULTIGS ) GSRingBufSimplePacket(GS_RINGTYPE_FRAMESKIP, 0, 0, 0); else GSsetFrameSkip(0); bOkayToSkip--; @@ -614,15 +627,11 @@ void rcntWmode(int index, u32 value) } //if(change != 0) SysPrintf("Weee\n"); //counters[index].sCycleT = cpuRegs.cycle - ((cpuRegs.cycle - counters[index].sCycleT) % counters[index].rate); -#ifdef PCSX2_DEVBUILD if(!(value & 0x80)) SysPrintf("Stopping\n"); -#endif } else { -#ifdef PCSX2_DEVBUILD SysPrintf("Counter %d not running c%x s%x c%x\n", index, counters[index].count, counters[index].sCycleT, cpuRegs.cycle); if(value & 0x80) SysPrintf("Starting %d, v%x\n", index, value); -#endif counters[index].sCycleT = cpuRegs.cycle; } //if((value & 0x80) && !(counters[index].mode & 0x80)) rcntUpd(index); //Counter wasnt started, so set the start cycle diff --git a/pcsx2/FPU.c b/pcsx2/FPU.c index a0e7e86202..862dd3ba61 100644 --- a/pcsx2/FPU.c +++ b/pcsx2/FPU.c @@ -5,12 +5,12 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -23,23 +23,129 @@ // Helper Macros //**************************************************************** + +// IEEE 754 Values +#define PosInfinity 0x7f800000 +#define NegInfinity 0xff800000 +#define posFmax 0x7F7FFFFF +#define negFmax 0xFF7FFFFF + + +/* Used in compare function to compensate for differences between IEEE 754 and the FPU. + Setting it to ~0x00000000 = Compares Exact Value. (comment out this macro for faster Exact Compare method) + Setting it to ~0x00000001 = Discards the least significant bit when comparing. + Setting it to ~0x00000003 = Discards the least 2 significant bits when comparing... etc.. */ +#define comparePrecision ~0x00000001 + +// Operands #define _Ft_ ( ( cpuRegs.code >> 16 ) & 0x1F ) #define _Fs_ ( ( cpuRegs.code >> 11 ) & 0x1F ) #define _Fd_ ( ( cpuRegs.code >> 6 ) & 0x1F ) +// Floats #define _FtValf_ fpuRegs.fpr[ _Ft_ ].f #define _FsValf_ fpuRegs.fpr[ _Fs_ ].f #define _FdValf_ fpuRegs.fpr[ _Fd_ ].f #define _FAValf_ fpuRegs.ACC.f -#define _ContVal_ fpuRegs.fprc[ 31 ] - -// Testing +// U32's #define _FtValUl_ fpuRegs.fpr[ _Ft_ ].UL #define _FsValUl_ fpuRegs.fpr[ _Fs_ ].UL #define _FdValUl_ fpuRegs.fpr[ _Fd_ ].UL #define _FAValUl_ fpuRegs.ACC.UL +// FPU Control Reg (FCR31) +#define _ContVal_ fpuRegs.fprc[ 31 ] + +// FCR31 Flags +#define FPUflagC 0X00800000 +#define FPUflagI 0X00020000 +#define FPUflagD 0X00010000 +#define FPUflagO 0X00008000 +#define FPUflagU 0X00004000 +#define FPUflagSI 0X00000040 +#define FPUflagSD 0X00000020 +#define FPUflagSO 0X00000010 +#define FPUflagSU 0X00000008 + +//**************************************************************** + +// If we have an infinity value, then Overflow has occured. +#define checkOverflow(xReg, cFlagsToSet, shouldReturn) { \ + if ( ( xReg & ~0x80000000 ) == PosInfinity ) { \ + /*SysPrintf( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ \ + xReg = ( xReg & 0x80000000 ) | posFmax; \ + _ContVal_ |= cFlagsToSet; \ + if ( shouldReturn ) { return; } \ + } \ +} + +// If we have a denormal value, then Underflow has occured. +#define checkUnderflow(xReg, cFlagsToSet, shouldReturn) { \ + if ( ( ( xReg & 0x7F800000 ) == 0 ) && ( ( xReg & 0x007FFFFF ) != 0 ) ) { \ + /*SysPrintf( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ \ + xReg &= 0x80000000; \ + _ContVal_ |= cFlagsToSet; \ + if ( shouldReturn ) { return; } \ + } \ +} + +/* Checks if Divide by Zero will occur. (z/y = x) + cFlagsToSet1 = Flags to set if (z != 0) + cFlagsToSet2 = Flags to set if (z == 0) + ( Denormals are counted as "0" ) +*/ +#define checkDivideByZero(xReg, yDivisorReg, zDividendReg, cFlagsToSet1, cFlagsToSet2, shouldReturn) { \ + if ( ( yDivisorReg & 0x7F800000 ) == 0 ) { \ + _ContVal_ |= ( ( zDividendReg & 0x7F800000 ) == 0 ) ? cFlagsToSet2 : cFlagsToSet1; \ + xReg = ( ( yDivisorReg ^ zDividendReg ) & 0x80000000 ) | posFmax; \ + if ( shouldReturn ) { return; } \ + } \ +} + +/* Clears the "Cause Flags" of the Control/Status Reg + The "EE Core Users Manual" implies that all the Cause flags are cleared every instruction... + But, the "EE Core Instruction Set Manual" says that only certain Cause Flags are cleared + for specific instructions... I'm just setting them to clear when the Instruction Set Manual + says to... (cottonvibes) +*/ +#define clearFPUFlags(cFlags) { \ + _ContVal_ &= ~( cFlags ) ; \ +} + +#ifdef comparePrecision +// This compare discards the least-significant bit(s) in order to solve some rounding issues. + #define C_cond_S(cond) { \ + FPRreg tempA, tempB; \ + tempA.UL = _FsValUl_ & comparePrecision; \ + tempB.UL = _FtValUl_ & comparePrecision; \ + _ContVal_ = ( ( tempA.f ) cond ( tempB.f ) ) ? \ + ( _ContVal_ | FPUflagC ) : \ + ( _ContVal_ & ~FPUflagC ); \ + } +#else +// Used for Comparing; This compares if the floats are exactly the same. + #define C_cond_S(cond) { \ + _ContVal_ = ( _FsValf_ cond _FtValf_ ) ? \ + ( _ContVal_ | FPUflagC ) : \ + ( _ContVal_ & ~FPUflagC ); \ + } +#endif + +// Conditional Branch +#define BC1(cond) \ + if ( ( _ContVal_ & FPUflagC ) cond 0 ) { \ + intDoBranch( _BranchTarget_ ); \ + } + +// Conditional Branch +#define BC1L(cond) \ + if ( ( _ContVal_ & FPUflagC ) cond 0 ) { \ + intDoBranch( _BranchTarget_ ); \ + } else cpuRegs.pc += 4; + +//**************************************************************** +// Used to manage FPU Opcodes //**************************************************************** void COP1() { @@ -49,26 +155,6 @@ void COP1() { Int_COP1PrintTable[_Rs_](); } -/********************************************************* -* Load and store for COP1 * -* Format: OP rt, offset(base) * -*********************************************************/ - - - -void LWC1() { - s32 addr; - - addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s16)(cpuRegs.code);// ((cpuRegs.code & 0x8000 ? 0xFFFF8000 : 0)| (cpuRegs.code & 0x7fff)); - memRead32(addr, &fpuRegs.fpr[_Rt_].UL); -} - -void SWC1() { - s32 addr; - addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s16)(cpuRegs.code);//((cpuRegs.code & 0x8000 ? 0xFFFF8000 : 0)| (cpuRegs.code & 0x7fff)); - memWrite32(addr, fpuRegs.fpr[_Rt_].UL); -} - void COP1_BC1() { Int_COP1BC1PrintTable[_Rt_](); } @@ -87,88 +173,226 @@ void COP1_Unknown() { #endif } +//**************************************************************** +// FPU Opcodes +//**************************************************************** +float fpuDouble(u32 f) +{ + switch(f & 0x7f800000){ + case 0x0: + f &= 0x80000000; + return *(float*)&f; + break; + case 0x7f800000: + f = (f & 0x80000000)|0x7f7fffff; + return *(float*)&f; + break; + default: + return *(float*)&f; + break; + } +} -void MFC1() { - if ( !_Rt_ ) return; - cpuRegs.GPR.r[_Rt_].UD[0] = (s32)_FsValUl_; +void ABS_S() { + _FdValf_ = fpufabsf( fpuDouble( _FsValUl_ ) ); + clearFPUFlags( FPUflagO | FPUflagU ); +} + +void ADD_S() { + _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + checkOverflow( _FdValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void ADDA_S() { + _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + checkOverflow( _FAValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void BC1F() { + BC1(==); +} + +void BC1FL() { + BC1L(==); // Equal to 0 +} + +void BC1T() { + BC1(!=); +} + +void BC1TL() { + BC1L(!=); // different from 0 +} + +void C_EQ() { + C_cond_S(==); +} + +void C_F() { + clearFPUFlags( FPUflagC ); //clears C regardless +} + +void C_LE() { + C_cond_S(<=); +} + +void C_LT() { + C_cond_S(<); } void CFC1() { - if ( !_Rt_ || ( _Fs_ != 0 && _Fs_ != 31 ) ) return; + if ( !_Rt_ || ( (_Fs_ != 0) && (_Fs_ != 31) ) ) return; cpuRegs.GPR.r[_Rt_].UD[0] = (s32)fpuRegs.fprc[_Fs_]; } -void MTC1() { - _FsValUl_ = cpuRegs.GPR.r[_Rt_].UL[0]; -} - void CTC1() { if(_Fs_!=31) return; fpuRegs.fprc[_Fs_] = cpuRegs.GPR.r[_Rt_].UL[0]; } -#define C_cond_S(cond) \ - _ContVal_ = ( _FsValf_ cond _FtValf_ ) ? \ - ( _ContVal_ | 0x00800000 ) : \ - ( _ContVal_ & ~0x00800000 ); - -void C_F() { _ContVal_ &= ~0x00800000;} //clears C regardless -void C_EQ() { C_cond_S(==); } -void C_LT() { C_cond_S(<); } -void C_LE() { C_cond_S(<=); } - -#define BC1(cond) \ - if ( ( _ContVal_ & 0x00800000 ) cond 0 ) { \ - intDoBranch( _BranchTarget_ ); \ - } -void BC1F() { BC1(==); } -void BC1T() { BC1(!=); } - -#define BC1L(cond) \ - if ( ( _ContVal_ & 0x00800000 ) cond 0 ) { \ - intDoBranch( _BranchTarget_ ); \ - } else cpuRegs.pc += 4; -void BC1FL() { BC1L(==); } // Equal to 0 -void BC1TL() { BC1L(!=); } // different from 0 - - -void ADD_S() { _FdValf_ = _FsValf_ + _FtValf_; } -void SUB_S() { _FdValf_ = _FsValf_ - _FtValf_; } -void MUL_S() { _FdValf_ = _FsValf_ * _FtValf_; } -void MOV_S() { _FdValf_ = _FsValf_; } -void NEG_S() { _FdValf_ = -_FsValf_; } -void ADDA_S() { _FAValf_ = _FsValf_ + _FtValf_; } -void SUBA_S() { _FAValf_ = _FsValf_ - _FtValf_; } -void MULA_S() { _FAValf_ = _FsValf_ * _FtValf_; } -void MADD_S() { _FdValf_ = _FAValf_ + ( _FsValf_ * _FtValf_ ); } -void MSUB_S() { _FdValf_ = _FAValf_ - ( _FsValf_ * _FtValf_ ); } -void MADDA_S() { _FAValf_ += _FsValf_ * _FtValf_; } -void MSUBA_S() { _FAValf_ -= _FsValf_ * _FtValf_; } -void ABS_S() { _FdValf_ = fpufabsf(_FsValf_); } -void MAX_S() { _FdValf_ = max( _FsValf_, _FtValf_ ); } -void MIN_S() { _FdValf_ = min( _FsValf_, _FtValf_ ); } -void DIV_S() { if ( _FtValf_ ) { _FdValf_ = _FsValf_ / _FtValf_; } } -void SQRT_S() { - //if ( _FtValf_ >= 0.0f ) { _FdValf_ = fpusqrtf( _FtValf_ ); } - //else { - _FdValf_ = fpusqrtf(fpufabsf(_FtValf_)); - //} -} -void CVT_S() { _FdValf_ = (float)(s32)_FsValUl_; } -void CVT_W() { - if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValUl_ = (s32)(float)_FsValf_; } - else if ( _FsValUl_ & 0x80000000 ) { _FdValUl_ = 0x80000000; } - else { _FdValUl_ = 0x7fffffff; } +void CVT_S() { + _FdValf_ = (float)(s32)_FsValUl_; + _FdValf_ = fpuDouble( _FdValUl_ ); } +void CVT_W() { + if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValUl_ = (s32)(float)_FsValf_; } + else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } + else { _FdValUl_ = 0x80000000; } +} + +void DIV_S() { + checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI, 1 ); + _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); + checkOverflow( _FdValUl_, 0, 1); + checkUnderflow( _FdValUl_, 0, 1 ); +} + +void LWC1() { + u32 addr; + addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s32)(s16)(cpuRegs.code); + if (addr & 0x00000003) { SysPrintf( "FPU (LWC1 Opcode): Invalid Memory Address\n" ); return; } // Should signal an exception? + memRead32(addr, &fpuRegs.fpr[_Rt_].UL); +} + +/* The Instruction Set manual has an overly complicated way of + determining the flags that are set. Hopefully this shorter + method provides a similar outcome and is faster. (cottonvibes) +*/ +void MADD_S() { + FPRreg temp; + temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); + checkOverflow( _FdValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void MADDA_S() { + _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + checkOverflow( _FAValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void MAX_S() { + _FdValf_ = max( _FsValf_, _FtValf_ ); + clearFPUFlags( FPUflagO | FPUflagU ); +} + +void MFC1() { + if ( !_Rt_ ) return; + cpuRegs.GPR.r[_Rt_].UD[0] = (s32)_FsValUl_; +} + +void MIN_S() { + _FdValf_ = min( _FsValf_, _FtValf_ ); + clearFPUFlags( FPUflagO | FPUflagU ); +} + +void MOV_S() { + _FdValUl_ = _FsValUl_; +} + +void MSUB_S() { + FPRreg temp; + temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); + checkOverflow( _FdValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void MSUBA_S() { + _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + checkOverflow( _FAValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void MTC1() { + _FsValUl_ = cpuRegs.GPR.r[_Rt_].UL[0]; +} + +void MUL_S() { + _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + checkOverflow( _FdValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void MULA_S() { + _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + checkOverflow( _FAValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void NEG_S() { + _FdValUl_ = (_FsValUl_ ^ 0x80000000); + clearFPUFlags( FPUflagO | FPUflagU ); +} + void RSQRT_S() { - if ( _FtValf_ >= 0.0f ) { - float tmp = fpusqrtf( _FtValf_ ); - if ( tmp != 0 ) { _FdValf_ = _FsValf_ / tmp; } + FPRreg temp; + if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = ( ( _FsValUl_ ^ _FtValUl_ ) & 0x80000000 ) | posFmax; + return; } + else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative + _ContVal_ |= FPUflagI | FPUflagSI; + temp.f = fpusqrtf( fpufabsf( fpuDouble( _FtValUl_ ) ) ); + _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); + } + else { _FdValf_ = fpuDouble( _FsValUl_ ) / fpusqrtf( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero + + checkOverflow( _FdValUl_, 0, 1 ); + checkUnderflow( _FdValUl_, 0, 1 ); } -//3322 2222 2222 1111 1111 1100 0000 0000 -//1098 7654 3210 9876 5432 1098 7654 3210 -//0000 0000 0000 0000 0000 0000 0000 0000 +void SQRT_S() { + if ( ( _FtValUl_ & 0xFF800000 ) == 0x80000000 ) { _FdValUl_ = 0x80000000; } // If Ft = -0 + else if ( _FtValUl_ & 0x80000000 ) { // If Ft is Negative + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValf_ = fpusqrtf( fpufabsf( fpuDouble( _FtValUl_ ) ) ); + } + else { _FdValf_ = fpusqrtf( fpuDouble( _FtValUl_ ) ); } // If Ft is Positive + clearFPUFlags( FPUflagD ); +} + +void SUB_S() { + _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + checkOverflow( _FdValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void SUBA_S() { + _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + checkOverflow( _FAValUl_, FPUflagO | FPUflagSO, 1 ); + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU, 1 ); +} + +void SWC1() { + u32 addr; + addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s32)(s16)(cpuRegs.code); + if (addr & 0x00000003) { SysPrintf( "FPU (SWC1 Opcode): Invalid Memory Address\n" ); return; } // Should signal an exception? + memWrite32(addr, fpuRegs.fpr[_Rt_].UL); +} \ No newline at end of file diff --git a/pcsx2/Misc.c b/pcsx2/Misc.c index bd6ffe674f..e4f6853ed7 100644 --- a/pcsx2/Misc.c +++ b/pcsx2/Misc.c @@ -1,19 +1,19 @@ /* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2008 Pcsx2 Team + * Copyright (C) 2002-2003 Pcsx2 Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include @@ -40,8 +40,6 @@ #include "Cache.h" -#include "Paths.h" - u32 dwSaveVersion = 0x7a300010; u32 dwCurSaveStateVer = 0; extern u32 s_iLastCOP0Cycle; @@ -581,8 +579,8 @@ int SaveState(char *file) { SysPrintf("Saving GS\n"); if( CHECK_MULTIGS ) { // have to call in thread, otherwise weird stuff will start happening - u64 uf = (uptr)f; - GSRingBufSimplePacket(GS_RINGTYPE_SAVE, (u32)(uf&0xffffffff), (u32)(uf>>32), 0); + uptr uf = (uptr)f; + GSRingBufSimplePacket(GS_RINGTYPE_SAVE, (int)(uf&0xffffffff), (int)(uf>>32), 0); gsWaitGS(); } else { @@ -713,8 +711,8 @@ int LoadState(char *file) { SysPrintf("Loading GS\n"); if( CHECK_MULTIGS ) { // have to call in thread, otherwise weird stuff will start happening - u64 uf = (uptr)f; - GSRingBufSimplePacket(GS_RINGTYPE_LOAD, (u32)(uf&0xffffffff), (u32)(uf>>32), 0); + uptr uf = (uptr)f; + GSRingBufSimplePacket(GS_RINGTYPE_LOAD, (int)(uf&0xffffffff), (int)(uf>>32), 0); gsWaitGS(); } else { @@ -757,7 +755,7 @@ int SaveGSState(char *file) return 0; } -extern HWND pDsp; +extern long pDsp; int LoadGSState(char *file) { int ret; @@ -768,7 +766,7 @@ int LoadGSState(char *file) f = gzopen(file, "rb"); if (f == NULL) { - sprintf(strfile, SSTATES_DIR "/%s", file); + sprintf(strfile, "sstates/%s", file); // try prefixing with sstates f = gzopen(strfile, "rb"); if( f == NULL ) { @@ -894,7 +892,7 @@ void ProcessFKeys(int fkey, int shift) assert(fkey >= 1 && fkey <= 12 ); switch(fkey) { case 1: - sprintf(Text, SSTATES_DIR "/%8.8X.%3.3d", ElfCRC, StatesC); + sprintf(Text, "sstates/%8.8X.%3.3d", ElfCRC, StatesC); ret = SaveState(Text); break; case 2: @@ -904,12 +902,12 @@ void ProcessFKeys(int fkey, int shift) StatesC = (StatesC+1)%NUM_STATES; SysPrintf("*PCSX2*: Selected State %ld\n", StatesC); if( GSchangeSaveState != NULL ) { - sprintf(Text, SSTATES_DIR "/%8.8X.%3.3d", ElfCRC, StatesC); + sprintf(Text, "sstates/%8.8X.%3.3d", ElfCRC, StatesC); GSchangeSaveState(StatesC, Text); } break; case 3: - sprintf (Text, SSTATES_DIR "/%8.8X.%3.3d", ElfCRC, StatesC); + sprintf (Text, "sstates/%8.8X.%3.3d", ElfCRC, StatesC); ret = LoadState(Text); break; @@ -996,10 +994,10 @@ void ProcessFKeys(int fkey, int shift) tok = strtok(NULL, " "); if( tok != NULL ) strcat(name, tok); - sprintf(Text, SSTATES_DIR "/%s.%d.gs", name, StatesC); + sprintf(Text, "sstates/%s.%d.gs", name, StatesC); } else - sprintf(Text, SSTATES_DIR "/%8.8X.%d.gs", ElfCRC, StatesC); + sprintf(Text, "sstates/%8.8X.%d.gs", ElfCRC, StatesC); SaveGSState(Text); } diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index 620de85d71..8855ca49cf 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -13,7 +13,7 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __MISC_H__ @@ -52,7 +52,14 @@ #define CHECK_DUALCORE (Config.Options&PCSX2_DUALCORE) #define CHECK_EEREC (Config.Options&PCSX2_EEREC) #define CHECK_COP2REC (Config.Options&PCSX2_COP2REC) // goes with ee option -#define CHECK_FORCEABS (~(Config.Hacks >> 1) & 1) // always on, (Config.Options&PCSX2_FORCEABS) +//------------ SPEED HACKS!!! --------------- +#define CHECK_OVERFLOW (!(Config.Hacks & 0x2)) +#define CHECK_EXTRA_OVERFLOW (!(Config.Hacks & 0x40)) +#define CHECK_EESYNC_HACK (Config.Hacks & 0x1) +#define CHECK_IOPSYNC_HACK (Config.Hacks & 0x10) +#define CHECK_EE_IOP_EXTRA (Config.Hacks & 0x20) +#define CHECK_DENORMALS ((Config.Hacks & 0x8) ? 0xffc0 : 0xff80) //If enabled, Denormals are Zero for the recs +#define CHECK_FASTBRANCHES (Config.Hacks & 0x80) #define CHECK_FRAMELIMIT (Config.Options&PCSX2_FRAMELIMIT_MASK) diff --git a/pcsx2/windows/CpuDlg.c b/pcsx2/windows/CpuDlg.c index 79596d86a2..8979005042 100644 --- a/pcsx2/windows/CpuDlg.c +++ b/pcsx2/windows/CpuDlg.c @@ -1,19 +1,19 @@ /* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2008 Pcsx2 Team + * Copyright (C) 2002-2004 Pcsx2 Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/pcsx2/windows/VCprojects/pcsx2_2005.sln b/pcsx2/windows/VCprojects/pcsx2_2005.sln index c9499fde0a..de8dbbeca4 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2005.sln +++ b/pcsx2/windows/VCprojects/pcsx2_2005.sln @@ -2,18 +2,6 @@ Microsoft Visual Studio Solution File, Format Version 9.00 # Visual Studio 2005 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pcsx2", "pcsx2_2005.vcproj", "{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spu2PeopsSound", "..\..\..\plugins\spu2\PeopsSPU2\spu2PeopsSound_2005.vcproj", "{F9E64144-301B-48BC-8D35-A2686DBB1982}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ZeroSPU2", "..\..\..\plugins\spu2\zerospu2\ZeroSPU2_2005.vcproj", "{7F059854-568D-4E08-9D00-1E78E203E4DC}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ZeroPAD", "..\..\..\plugins\pad\zeropad\Windows\ZeroPAD_2005.vcproj", "{CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPU2null", "..\..\..\plugins\spu2\SPU2null\Src\SPU2null_2005.vcproj", "{D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zerogs", "..\..\..\plugins\gs\zerogs\dx\Win32\zerogs_2005.vcproj", "{5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CDVDiso", "..\..\..\plugins\cdvd\CDVDiso\src\Win32\CDVDiso_2005.vcproj", "{BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug TLB|Win32 = Debug TLB|Win32 @@ -33,66 +21,6 @@ Global {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release TLB|Win32.Build.0 = Release TLB|Win32 {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|Win32.ActiveCfg = Release|Win32 {1CEFD830-2B76-4596-A4EE-BCD7280A60BD}.Release|Win32.Build.0 = Release|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Debug TLB|Win32.ActiveCfg = Debug|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Debug TLB|Win32.Build.0 = Debug|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Debug|Win32.ActiveCfg = Debug|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Debug|Win32.Build.0 = Debug|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Release (to Public)|Win32.ActiveCfg = Release|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Release (to Public)|Win32.Build.0 = Release|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Release TLB|Win32.ActiveCfg = Release|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Release TLB|Win32.Build.0 = Release|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Release|Win32.ActiveCfg = Release|Win32 - {F9E64144-301B-48BC-8D35-A2686DBB1982}.Release|Win32.Build.0 = Release|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug TLB|Win32.ActiveCfg = Debug|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug TLB|Win32.Build.0 = Debug|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug|Win32.ActiveCfg = Debug|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Debug|Win32.Build.0 = Debug|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Release (to Public)|Win32.ActiveCfg = Release|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Release (to Public)|Win32.Build.0 = Release|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Release TLB|Win32.ActiveCfg = Release|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Release TLB|Win32.Build.0 = Release|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Release|Win32.ActiveCfg = Release|Win32 - {7F059854-568D-4E08-9D00-1E78E203E4DC}.Release|Win32.Build.0 = Release|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Debug TLB|Win32.ActiveCfg = Debug|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Debug TLB|Win32.Build.0 = Debug|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Debug|Win32.ActiveCfg = Debug|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Debug|Win32.Build.0 = Debug|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Release (to Public)|Win32.ActiveCfg = Release|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Release (to Public)|Win32.Build.0 = Release|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Release TLB|Win32.ActiveCfg = Release|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Release TLB|Win32.Build.0 = Release|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Release|Win32.ActiveCfg = Release|Win32 - {CDD9DB83-3BD9-4ED8-BB83-399A2F65F022}.Release|Win32.Build.0 = Release|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Debug TLB|Win32.ActiveCfg = Debug|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Debug TLB|Win32.Build.0 = Debug|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Debug|Win32.ActiveCfg = Debug|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Debug|Win32.Build.0 = Debug|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Release (to Public)|Win32.ActiveCfg = Release|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Release (to Public)|Win32.Build.0 = Release|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Release TLB|Win32.ActiveCfg = Release|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Release TLB|Win32.Build.0 = Release|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Release|Win32.ActiveCfg = Release|Win32 - {D87F63EC-5CD8-4A41-B416-16C7ABD4E5CB}.Release|Win32.Build.0 = Release|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug TLB|Win32.ActiveCfg = Debug|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug TLB|Win32.Build.0 = Debug|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug|Win32.ActiveCfg = Debug|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Debug|Win32.Build.0 = Debug|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release (to Public)|Win32.ActiveCfg = Release (to Public)|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release (to Public)|Win32.Build.0 = Release (to Public)|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release TLB|Win32.ActiveCfg = Release (to Public)|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release TLB|Win32.Build.0 = Release (to Public)|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release|Win32.ActiveCfg = Release|Win32 - {5C6B7D28-E73D-4F71-8FC0-17ADA640EBD8}.Release|Win32.Build.0 = Release|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Debug TLB|Win32.ActiveCfg = Debug|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Debug TLB|Win32.Build.0 = Debug|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Debug|Win32.ActiveCfg = Debug|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Debug|Win32.Build.0 = Debug|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Release (to Public)|Win32.ActiveCfg = Release|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Release (to Public)|Win32.Build.0 = Release|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Release TLB|Win32.ActiveCfg = Release|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Release TLB|Win32.Build.0 = Release|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Release|Win32.ActiveCfg = Release|Win32 - {BB27CC2C-28D1-4438-A0DF-3E120D01EDEC}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/pcsx2/windows/WinMain.c b/pcsx2/windows/WinMain.c index 4d1b57a3f2..17230adb98 100644 --- a/pcsx2/windows/WinMain.c +++ b/pcsx2/windows/WinMain.c @@ -5,15 +5,15 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define WINVER 0x0500 @@ -49,8 +49,6 @@ #include "cheats/cheats.h" -#include "../Paths.h" - #define COMPILEDATE __DATE__ static int efile; @@ -606,6 +604,10 @@ void CALLBACK KeyEvent(keyEvent* ev) case VK_F10: ProcessFKeys(10, shiftkey); break; case VK_F11: ProcessFKeys(11, shiftkey); break; case VK_F12: ProcessFKeys(12, shiftkey); break; + /*case VK_NUMPAD0: + Config.Hacks ^= 2; + if (Config.Hacks & 2) {SysPrintf( "Overflow Check OFF\n" );} else {SysPrintf( "Overflow Check ON\n" );} + break;*/ case VK_ESCAPE: #ifdef PCSX2_DEVBUILD @@ -616,16 +618,21 @@ void CALLBACK KeyEvent(keyEvent* ev) } #endif - ClosePlugins(); - if( !UseGui ) { - // not using GUI and user just quit, so exit - exit(0); - } + ClosePlugins(); + SysClose(); + //ReleasePlugins(); + //needReset = 1; + //efile = 0; + + //if( !UseGui ) { + //not using GUI and user just quit, so exit + exit(0); + //} CreateMainWindow(SW_SHOWNORMAL); RunGui(); - nDisableSC = 0; + nDisableSC = 0; break; default: GSkeyEvent(ev); @@ -707,18 +714,28 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { case WM_INITDIALOG: - if(Config.Hacks & 1) CheckDlgButton(hDlg, IDC_SYNCHACK, TRUE); - if(Config.Hacks & 2) CheckDlgButton(hDlg, IDC_ABSHACK, TRUE); - if(Config.Hacks & 4) CheckDlgButton(hDlg, IDC_SOUNDHACK, TRUE); - return TRUE; + if(Config.Hacks & 0x1) CheckDlgButton(hDlg, IDC_SYNCHACK, TRUE); + if(Config.Hacks & 0x2) CheckDlgButton(hDlg, IDC_OVERFLOWHACK, TRUE); + if(Config.Hacks & 0x4) CheckDlgButton(hDlg, IDC_SOUNDHACK, TRUE); + if(Config.Hacks & 0x8) CheckDlgButton(hDlg, IDC_DENORMALS, TRUE); + if(Config.Hacks & 0x10) CheckDlgButton(hDlg, IDC_SYNCHACK2, TRUE); + if(Config.Hacks & 0x20) CheckDlgButton(hDlg, IDC_SYNCHACK3, TRUE); + if(Config.Hacks & 0x40) CheckDlgButton(hDlg, IDC_OVERFLOWHACK_EXTRA, TRUE); + if(Config.Hacks & 0x80) CheckDlgButton(hDlg, IDC_FASTBRANCHES, TRUE); + return TRUE; case WM_COMMAND: if (LOWORD(wParam) == IDOK) { Config.Hacks = 0; - Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK) ? 1 : 0; - Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_ABSHACK) ? 2 : 0; - Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SOUNDHACK) ? 4 : 0; - + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK) ? 0x1 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_OVERFLOWHACK) ? 0x2 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SOUNDHACK) ? 0x4 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_DENORMALS) ? 0x8 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK2) ? 0x10 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_SYNCHACK3) ? 0x20 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_OVERFLOWHACK_EXTRA) ? 0x40 : 0; + Config.Hacks |= IsDlgButtonChecked(hDlg, IDC_FASTBRANCHES) ? 0x80 : 0; + SaveConfig(); EndDialog(hDlg, TRUE); @@ -1350,16 +1367,16 @@ void ChangeLanguage(char *lang) { static int sinit=0; int SysInit() { - CreateDirectory(MEMCARDS_DIR, NULL); - CreateDirectory(SSTATES_DIR, NULL); + CreateDirectory("memcards", NULL); + CreateDirectory("sstates", NULL); #ifdef EMU_LOG - CreateDirectory(LOGS_DIR, NULL); + CreateDirectory("logs", NULL); #ifdef PCSX2_DEVBUILD if( g_TestRun.plogname != NULL ) emuLog = fopen(g_TestRun.plogname, "w"); if( emuLog == NULL ) - emuLog = fopen(LOGS_DIR "\\emuLog.txt","w"); + emuLog = fopen("logs\\emuLog.txt","w"); #endif if( emuLog != NULL ) diff --git a/pcsx2/windows/ini.c b/pcsx2/windows/ini.c index ff721fad66..6a22778c9f 100644 --- a/pcsx2/windows/ini.c +++ b/pcsx2/windows/ini.c @@ -1,21 +1,20 @@ /* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2008 Pcsx2 Team + * Copyright (C) 2002-2003 Pcsx2 Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - /* 15-09-2004 : file rewriten for work with inis (shadow) */ @@ -27,8 +26,6 @@ #include "win32.h" #include -#include "Paths.h" - int LoadConfig() { FILE *fp; @@ -44,11 +41,11 @@ int LoadConfig() { szTemp = strrchr(szIniFile, '\\'); if(!szTemp) return -1; - strcpy(szTemp, "\\" CONFIG_DIR "\\pcsx2.ini"); - fp=fopen(CONFIG_DIR "\\pcsx2.ini","rt");//check if pcsx2.ini really exists + strcpy(szTemp, "\\inis\\pcsx2.ini"); + fp=fopen("inis\\pcsx2.ini","rt");//check if pcsx2.ini really exists if (!fp) { - CreateDirectory(CONFIG_DIR,NULL); + CreateDirectory("inis",NULL); return -1; } fclose(fp); @@ -129,7 +126,7 @@ void SaveConfig() { szTemp = strrchr(szIniFile, '\\'); if(!szTemp) return; - strcpy(szTemp, "\\" CONFIG_DIR "\\pcsx2.ini"); + strcpy(szTemp, "\\inis\\pcsx2.ini"); //interface sprintf(szValue,"%s",Conf->Bios); WritePrivateProfileString("Interface","Bios",szValue,szIniFile); diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index 9d3cab656d..c244f25d6a 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -938,19 +938,30 @@ BEGIN CONTROL 132,IDC_PS2SILVER_RECT,"Static",SS_BITMAP,0,167,70,74 END -IDD_HACKS DIALOGEX 0, 0, 186, 103 +IDD_HACKS DIALOGEX 0, 0, 343, 159 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "PCSX2 Speed Hacks" FONT 8, "MS Shell Dlg", 400, 0, 0x1 BEGIN - DEFPUSHBUTTON "OK",IDOK,41,82,50,14 - PUSHBUTTON "Cancel",IDCANCEL,96,82,50,14 - CONTROL "EE/IOP Sync Hack - General Speedup",IDC_SYNCHACK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,46,172,10 - CONTROL "Disable Forced ABS - Speeds up intense 3D",IDC_ABSHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,34,160,10 - CTEXT "These hacks will effect the speed of PCSX2 but possibly comprimise on compatability",IDC_HACKDESC,7,7,172,17 + DEFPUSHBUTTON "OK",IDOK,119,138,50,14 + PUSHBUTTON "Cancel",IDCANCEL,174,138,50,14 + CONTROL "EE Sync Hack (x2) - Doubles the cycle rate of the EE.",IDC_SYNCHACK, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,49,329,10 + CONTROL "Disable All Overflow Checks - Doesn't check for overflow at all in the VU Recs.",IDC_OVERFLOWHACK, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,37,329,10 + CTEXT "These hacks will effect the speed of PCSX2 but possibly comprimise on compatability",IDC_HACKDESC,7,7,329,17 CONTROL "Tighter SPU2 Sync ( FFXII vids) - slower",IDC_SOUNDHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,59,154,9 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,118,329,9 + CONTROL "Denormals are Zero - Makes very small numbers be equal to zero. (Big speedup on Intel CPUs)",IDC_DENORMALS, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,104,329,9 + CONTROL "IOP Sync Hack (x2) - Doubles the cycle rate of the IOP.",IDC_SYNCHACK2, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,62,329,10 + CONTROL "EE/IOP Sync Hack (x3) - Makes EE and IOP hacks triple the cycle rate instead of doubling it.",IDC_SYNCHACK3, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,75,329,10 + CONTROL "Disable Extra Overflow Checks - Disables extra overflow checks used to help stop SPS.",IDC_OVERFLOWHACK_EXTRA, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,24,329,10 + CONTROL "EE/IOP Fast Branches - Quick branching (very small speedup)",IDC_FASTBRANCHES, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,89,329,10 END @@ -971,9 +982,9 @@ BEGIN IDD_HACKS, DIALOG BEGIN LEFTMARGIN, 7 - RIGHTMARGIN, 179 + RIGHTMARGIN, 336 TOPMARGIN, 7 - BOTTOMMARGIN, 96 + BOTTOMMARGIN, 152 END END #endif // APSTUDIO_INVOKED @@ -1549,7 +1560,7 @@ BEGIN IDD_CPUDLG, DIALOG BEGIN LEFTMARGIN, 7 - RIGHTMARGIN, 551 + RIGHTMARGIN, 558 TOPMARGIN, 7 END END diff --git a/pcsx2/windows/resource.h b/pcsx2/windows/resource.h index a80c2bbe84..7b45d766ef 100644 --- a/pcsx2/windows/resource.h +++ b/pcsx2/windows/resource.h @@ -1,21 +1,3 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2008 Pcsx2 Team - * Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ //{{NO_DEPENDENCIES}} // Microsoft Visual C++ generated include file. // Used by pcsx2.rc @@ -508,8 +490,12 @@ #define IDC_CHECK2 1217 #define IDC_SYNCHACK 1217 #define IDC_SPU2HACK 1218 +#define IDC_SYNCHACK2 1218 #define IDC_VSYNCRATE 1219 +#define IDC_SYNCHACK3 1219 #define IDC_IOPGPR0 1220 +#define IDC_SYNCHACK4 1220 +#define IDC_FASTBRANCHES 1220 #define IDC_IOPGPR1 1221 #define IDC_IOPGPR2 1222 #define IDC_IOPGPR16 1223 @@ -582,6 +568,7 @@ #define IDC_ADDGS 1278 #define IDC_CONVERTEDCODE 1278 #define IDC_CUSTOM_CONSECUTIVE_FRAMES 1278 +#define IDC_OVERFLOWHACK 1278 #define IDC_HACKDESC 1279 #define IDC_CONVERT 1279 #define IDC_EDITPATCH 1279 @@ -589,6 +576,7 @@ #define IDC_READY 1280 #define IDC_ADDPATCH 1280 #define IDC_FRAMESKIP_LABEL2 1280 +#define IDC_OVERFLOWHACK_EXTRA 1280 #define IDC_GROUP 1281 #define IDC_ADDRAW 1281 #define IDC_FRAMESKIP_LABEL3 1281 @@ -617,6 +605,7 @@ #define IDC_ICON2 1300 #define IDC_CHECK1 1300 #define IDC_SOUNDHACK 1300 +#define IDC_DENORMALS 1301 #define IDC_FRAMELIMIT_OPTIONS 1303 #define IDC_LOG 1500 #define IDC_CPULOG 1500 @@ -647,8 +636,8 @@ #define IDC_IOPGPULOG 1527 #define IDC_IOPCNTLOG 1529 #define IDC_EECNTLOG 1530 -#define IDC_STDOUTPUTLOG 1531 -#define IDC_SYMLOG 1532 +#define IDC_SYMLOG 1531 +#define IDC_STDOUTPUTLOG 1532 #define IDC_SEARCH 1701 #define IDC_VALUE 1702 #define IDC_OLD 1703 @@ -752,7 +741,7 @@ #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 136 #define _APS_NEXT_COMMAND_VALUE 40018 -#define _APS_NEXT_CONTROL_VALUE 1301 +#define _APS_NEXT_CONTROL_VALUE 1304 #define _APS_NEXT_SYMED_VALUE 102 #endif #endif diff --git a/pcsx2/x86/iFPU.c b/pcsx2/x86/iFPU.c index bc5d578892..ae8c93cb3b 100644 --- a/pcsx2/x86/iFPU.c +++ b/pcsx2/x86/iFPU.c @@ -945,7 +945,7 @@ static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0, 0, 0 }; void recSQRT_S_xmm(int info) { if( info & PROCESS_EE_T ) { - //if( CHECK_FORCEABS ) { + //if( CHECK_OVERFLOW ) { if( EEREC_D == EEREC_T ) SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); else { SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T); @@ -960,7 +960,7 @@ void recSQRT_S_xmm(int info) }*/ } else { - //if( CHECK_FORCEABS ) { + //if( CHECK_OVERFLOW ) { SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]); SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index d48703b17f..ecc0b2919f 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -1,19 +1,19 @@ /* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2008 Pcsx2 Team + * Copyright (C) 2002-2005 Pcsx2 Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ // recompiler reworked to add dynamic linking Jan06 @@ -969,8 +969,8 @@ void psxSetBranchImm( u32 imm ) *ptr = (uptr)JMP32((uptr)psxDispatcher - ( (uptr)x86Ptr + 5 )); } -#define USE_FAST_BRANCHES (Config.Hacks & 1) -#define PSXCYCLE_MULT ((Config.Hacks & 1) ? 2.125 : (17/16)) +#define USE_FAST_BRANCHES CHECK_FASTBRANCHES +#define PSXCYCLE_MULT (CHECK_IOPSYNC_HACK ? (CHECK_EE_IOP_EXTRA ? 3.1875 : 2.125) : (17/16)) static void iPsxBranchTest(u32 newpc, u32 cpuBranch) { diff --git a/pcsx2/x86/iVUmicro.c b/pcsx2/x86/iVUmicro.c index 72cccf63f3..d8db8d3cf6 100644 --- a/pcsx2/x86/iVUmicro.c +++ b/pcsx2/x86/iVUmicro.c @@ -5,17 +5,16 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - // stop compiling if NORECBUILD build (only for Visual Studio) #if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) @@ -1045,8 +1044,6 @@ void CheckForOverflowSS_(int fdreg, int t0reg) // SSE_ANDPS_XMM_to_XMM(fdreg, t0reg); } - - void CheckForOverflow_(int fdreg, int t0reg, int keepxyzw) { // SSE_MAXPS_M128_to_XMM(fdreg, (u32)g_minvals); @@ -1063,19 +1060,250 @@ void CheckForOverflow_(int fdreg, int t0reg, int keepxyzw) SSE_ORPS_M128_to_XMM(t0reg, (uptr)&SSEmovMask[15-keepxyzw][0]); SSE_ANDPS_XMM_to_XMM(fdreg, t0reg); -// SSE_MOVAPS_M128_to_XMM(t0reg, (u32)s_expmask); -// SSE_ANDPS_XMM_to_XMM(t0reg, fdreg); -// SSE_CMPNEPS_M128_to_XMM(t0reg, (u32)s_expmask); -// //SSE_ORPS_M128_to_XMM(t0reg, (u32)g_minvals); -// SSE_ANDPS_XMM_to_XMM(fdreg, t0reg); + //SSE_MOVAPS_M128_to_XMM(t0reg, (u32)s_expmask); + //SSE_ANDPS_XMM_to_XMM(t0reg, fdreg); + //SSE_CMPNEPS_M128_to_XMM(t0reg, (u32)s_expmask); + ////SSE_ORPS_M128_to_XMM(t0reg, (u32)g_minvals); + //SSE_ANDPS_XMM_to_XMM(fdreg, t0reg); +} + +static PCSX2_ALIGNED16(u32 tempRegX[]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; +//static const char* logPOverflow = "VU OVERFLOW!: Changing to +Fmax!!!!!!!!!!!!\n"; +//static const char* logNOverflow = "VU OVERFLOW!: Changing to -Fmax!!!!!!!!!!!!\n"; +// Outputs to the console when overflow has occured. +void testWhenOverflow(int info, int regd, int t0reg) { + /*int x86temp = ALLOCTEMPX86(MODE_8BITREG); + SSE_XORPS_XMM_to_XMM(t0reg, t0reg); + SSE_CMPUNORDPS_XMM_to_XMM(t0reg, regd); + SSE_MOVMSKPS_XMM_to_R32(x86temp, t0reg); + TEST8RtoR(x86temp, x86temp); + { + u8* nooverflow = JE8(0); + SSE_MOVMSKPS_XMM_to_R32(x86temp, regd); + TEST8RtoR(x86temp, x86temp); + { + u8* positiv = JE8(0); + u8* printlog; + PUSH32I((u32)logNOverflow); + printlog = JMP8(0); + x86SetJ8(positiv); + PUSH32I((u32)logPOverflow); + x86SetJ8(printlog); + CALLFunc((uptr)SysPrintf); + } + x86SetJ8(nooverflow); + } + _freeX86reg(x86temp);*/ + SSE_MOVAPS_XMM_to_M128((uptr)tempRegX, regd); + tempRegX[0] &= 0xff800000; + tempRegX[1] &= 0xff800000; + tempRegX[2] &= 0xff800000; + tempRegX[3] &= 0xff800000; + if ( (tempRegX[0] == 0x7f800000) || (tempRegX[1] == 0x7f800000) || (tempRegX[2] == 0x7f800000) || (tempRegX[3] == 0x7f800000) ) + SysPrintf( "VU OVERFLOW!: Changing to +Fmax!!!!!!!!!!!!\n" ); + if ( (tempRegX[0] == 0xff800000) || (tempRegX[1] == 0xff800000) || (tempRegX[2] == 0xff800000) || (tempRegX[3] == 0xff800000) ) + SysPrintf( "VU OVERFLOW!: Changing to -Fmax!!!!!!!!!!!!\n" ); +} + +// Clamps infinities to max/min non-infinity number (doesn't use any temp regs) +void vuFloat(int regd, int XYZW) { + if( CHECK_OVERFLOW ) { + switch (XYZW) { + case 0: // Don't do anything if no vectors are being modified. + break; + + case 15: //1111 + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + break; + + case 1: //1000 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + break; + + case 2: //0100 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + break; + + case 3://1100 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + break; + + case 4: //0010 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + break; + + case 5://1010 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + break; + + case 6: //0110 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + break; + + case 7: //1110 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + break; + + case 8: //0001 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + break; + + case 9: //1001 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + break; + + case 10: //0101 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + break; + + case 11: //1101 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + break; + + case 12: //0011 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + break; + + case 13: //1011 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + break; + + case 14: //0111 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xe1); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + break; + } + } +} + +// Clamps infinities to max/min non-infinity number (uses a temp reg) +void vuFloat2(int regd, int regTemp, int XYZW) { + if( CHECK_OVERFLOW ) { + if (XYZW != 0xf) { // here we use a temp reg because not all xyzw are being modified + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_MINPS_M128_to_XMM(regTemp, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regTemp, (uptr)g_minvals); + VU_MERGE_REGS_CUSTOM(regd, regTemp, XYZW); + } + else { // all xyzw are being modified, so no need to use temp reg + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + } + } +} + +// Clamps infinities to max/min non-infinity number +void vuFloat3(uptr x86ptr) +{ + u8* pjmp; + + if( CHECK_OVERFLOW ) { + CMP32ItoM(x86ptr, 0x7f800000 ); + pjmp = JNZ8(0); + MOV32ItoM(x86ptr, 0x7f7fffff ); + x86SetJ8(pjmp); + + CMP32ItoM(x86ptr, 0xff800000 ); + pjmp = JNZ8(0); + MOV32ItoM(x86ptr, 0xff7fffff ); + x86SetJ8(pjmp); + } } void CheckForOverflow(VURegs *VU, int info, int regd) { - if( CHECK_FORCEABS && EEREC_TEMP != regd) { - // changing the order produces different results (tektag) - CheckForOverflow_(regd, EEREC_TEMP, _X_Y_Z_W); + testWhenOverflow(info, regd, EEREC_TEMP); + //CheckForOverflow_(regd, EEREC_TEMP, _X_Y_Z_W); + if (EEREC_TEMP != regd) { + //testWhenOverflow(info, regd, EEREC_TEMP); + vuFloat2(regd, EEREC_TEMP, _X_Y_Z_W); } + else + vuFloat(regd, _X_Y_Z_W); } // if unordered replaces with 0x7f7fffff @@ -1094,6 +1322,12 @@ void ClampUnordered(int regd, int t0reg, int dosign) // __asm ret //} +static PCSX2_ALIGNED16(u32 VU_Underflow_Mask1[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; +static PCSX2_ALIGNED16(u32 VU_Underflow_Mask2[4]) = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; +static PCSX2_ALIGNED16(u32 VU_Zero_Mask[4]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; +static PCSX2_ALIGNED16(u32 VU_Zero_Helper_Mask[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; +static PCSX2_ALIGNED16(u32 VU_Signed_Zero_Mask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; + // VU Flags // NOTE: flags don't compute under/over flows since it is highly unlikely // that games used them. Including them will lower performance. @@ -1103,14 +1337,15 @@ void recUpdateFlags(VURegs * VU, int reg, int info) u8* pjmp; u32 macaddr, stataddr, prevstataddr; int x86macflag, x86newflag, x86oldflag; - const static u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const static u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + if( !(info & PROCESS_VU_UPDATEFLAGS) ) return; flagmask = macarr[_X_Y_Z_W]; macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0); - stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); - prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); + stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address + prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address if( stataddr == 0 ) { stataddr = prevstataddr; @@ -1125,42 +1360,125 @@ void recUpdateFlags(VURegs * VU, int reg, int info) // can do with 8 bits since only computing zero/sign flags if( EEREC_TEMP != reg ) { - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); //Clear EEREC_TEMP - SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, reg); // set all F's if each vector is zero - - MOV32MtoR(x86oldflag, prevstataddr); // load the previous status in to x86oldflag +/* +DD:CC:BB:AA +11:10:01:00 +00:01:10:11*/ + SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw - SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // move the sign bits of the previous calculation (is reg vec zero) in to x86newflag + MOV32MtoR(x86oldflag, prevstataddr); // Load the previous status in to x86oldflag - XOR32RtoR(EAX, EAX); //Clear EAX + //-------------------------Check for Overflow flags------------------------------ - //if( !(g_VUGameFixes&VUFIX_SIGNEDZERO) ) { - SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // necessary! //EEREC_TEMP = !EEREC_TEMP & reg, - // so if the result was zero before, EEREC_TEMP will now be blank. - //} + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF - AND32ItoR(x86newflag, 0x0f&flagmask); //Grab "Is zero" bits from the first calculation - pjmp = JZ8(0); //Skip if none are + XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag + + SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation + + XOR32RtoR(EAX, EAX); //Clear EAX + + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) + pjmp = JZ8(0); // Skip if none are + OR32ItoR(EAX, 8); // Set if they are + x86SetJ8(pjmp); + + OR32RtoR(x86macflag, x86newflag); + SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4 + + //-------------------------Check for Underflow flags------------------------------ + + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg + + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]); + SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF + + SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]); + SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF + + SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation + + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR32ItoR(EAX, 4); // Set if they are + x86SetJ8(pjmp); + + OR32RtoR(x86macflag, x86newflag); + SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4 + + //-------------------------Optional Code: Denormals Are Zero------------------------------ + SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg + + // Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account + SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); + SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP); + + //-------------------------Check for Signed flags------------------------------ + + //SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[ 0 ]); + //SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[ 0 ]); // If (EEREC_TEMP == 0x80000000) set all F's for that vector + + //SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg + + // The following code makes sure the Signed Bit isn't set with Negative Zero + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPNEPS_XMM_to_XMM(EEREC_TEMP, reg); // Set all F's if each vector is not zero + SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg); + + SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the EEREC_TEMP + + // Replace the 4 lines of code above with this line if you don't care that Negative Zero sets the Signed flag + //SSE_MOVMSKPS_XMM_to_R32(x86newflag, reg); // Move the sign bits of the reg + + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR32ItoR(EAX, 2); // Set if they are + x86SetJ8(pjmp); + + OR32RtoR(x86macflag, x86newflag); + SHL32ItoR(x86macflag, 4); // Shift the Overflow, Underflow, and Zero flags left 4 + + //-------------------------Check for Zero flags------------------------------ + + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, reg); // Set all F's if each vector is zero + + /* This code does the same thing as the above two lines + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Helper_Mask[ 0 ]); // EEREC_TEMP &= 0x7fffffff + SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == 0x00000000) set all F's for that vector + */ + + SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation + + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are OR32ItoR(EAX, 1); // Set if they are x86SetJ8(pjmp); - /*if( !(g_VUGameFixes&VUFIX_SIGNEDZERO) )*/ SSE_MOVMSKPS_XMM_to_R32(x86macflag, EEREC_TEMP); // Grab sign bits from before, remember if "reg" - //Was zero, so will the sign bits - //else SSE_MOVMSKPS_XMM_to_R32(x86macflag, reg); // unless we are using the signed zero fix, in which case, we keep it either way ;) - - - AND32ItoR(x86macflag, 0x0f&flagmask); // Seperate the vectors we are using - pjmp = JZ8(0); - OR32ItoR(EAX, 2); // Set the "Signed" flag if it is signed - x86SetJ8(pjmp); - SHL32ItoR(x86newflag, 4); // Shift the zero flags left 4 OR32RtoR(x86macflag, x86newflag); + + //-------------------------Finally: Send the Flags to the Mac Address------------------------------ + SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip back reg to wzyx + + if( macaddr != 0 ) + MOV16RtoM(macaddr, x86macflag); + else + SysPrintf( "VU ALLOCATION ERROR: Macaddr == EAX!!! Can't set Mac Flags!\n" ); } + //-------------------------Flag Setting if (reg == EEREC_TEMP)------------------------------ else { + + SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw + + MOV32MtoR(x86oldflag, prevstataddr); //move current (previous) status register to x86oldflag + SSE_MOVMSKPS_XMM_to_R32(x86macflag, EEREC_TEMP); // mask is < 0 (including 80000000) Get sign bits of all 4 vectors // put results in lower 4 bits of x86macflag - MOV32MtoR(x86oldflag, prevstataddr); //move current (previous) status register to x86oldflag + XOR32RtoR(EAX, EAX); //Clear EAX for our new flag SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&s_FloatMinMax[8]); //if the result zero? @@ -1175,7 +1493,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info) // so if the result was zero, regardless of if its signed or not, it wont set the signed flags //} - AND32ItoR(x86macflag, 0xf&flagmask); //seperate out the flags we are actually using? + AND32ItoR(x86macflag, 0x0f & _X_Y_Z_W ); //seperate out the flags we are actually using? pjmp = JZ8(0); //if none are the flags are set to 1 (aka the result is non-zero & positive, or they were zero) dont set the "signed" flag OR32ItoR(EAX, 2); //else we are signed x86SetJ8(pjmp); @@ -1184,35 +1502,43 @@ void recUpdateFlags(VURegs * VU, int reg, int info) NOT32R(x86newflag); //flip! //} - AND32ItoR(x86newflag, 0xf&flagmask); //mask out the vectors we didnt use + AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); //mask out the vectors we didnt use pjmp = JZ8(0); //If none were zero skip OR32ItoR(EAX, 1); //We had a zero, so set el status flag with "zero":p x86SetJ8(pjmp); - SHL32ItoR(x86newflag, 4); //Move our zero flags left 4 - OR32RtoR(x86macflag, x86newflag); //then stick our signed flags intront of it + SHL32ItoR(x86macflag, 4); //Move our signed flags left 4 + OR32RtoR(x86newflag, x86macflag); //then stick our zero flags after it + + //MOV8RmtoROffset(x86newflag, x86macflag, (u32)g_MACFlagTransform); // transform + + if( macaddr != 0 ) + MOV8RtoM(macaddr, x86newflag); + else + SysPrintf( "VU ALLOCATION ERROR: Macaddr == EAX!!! Can't set Mac Flags!\n" ); + } // x86macflag - new untransformed mac flag, EAX - new status bits, x86oldflag - old status flag // x86macflag = zero_wzyx | sign_wzyx - MOV8RmtoROffset(x86newflag, x86macflag, (u32)g_MACFlagTransform); // transform + //MOV8RmtoROffset(x86newflag, x86macflag, (u32)g_MACFlagTransform); // transform //MOV32RtoR(x86macflag, x86newflag ); //MOV32RtoR(x86macflag, x86oldflag); //SHL32ItoR(x86macflag, 6); //OR32RtoR(x86oldflag, x86macflag); - if( macaddr != 0 ) { + //if( macaddr != 0 ) { // has to write full 32 bits! - MOV8RtoM(macaddr, x86newflag); + //MOV8RtoM(macaddr, x86newflag); // vampire night breaks with (g_VUGameFixes&VUFIX_EXTRAFLAGS), crazi taxi needs it - /* if( (g_VUGameFixes&VUFIX_EXTRAFLAGS) && flagmask != 0xf ) { - MOV8MtoR(x86newflag, VU_VI_ADDR(REG_MAC_FLAG, 2)); // get previous written - AND8ItoR(x86newflag, ~g_MACFlagTransform[(flagmask|(flagmask<<4))]); - OR8RtoM(macaddr, x86newflag); - } */ - } + //if( (g_VUGameFixes&VUFIX_EXTRAFLAGS) && flagmask != 0xf ) { + // MOV8MtoR(x86newflag, VU_VI_ADDR(REG_MAC_FLAG, 2)); // get previous written + // AND8ItoR(x86newflag, ~g_MACFlagTransform[(flagmask|(flagmask<<4))]); + // OR8RtoM(macaddr, x86newflag); + //} + //} //AND32ItoR(x86oldflag, 0x0c0); SHR32ItoR(x86oldflag, 6); @@ -1232,7 +1558,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info) /* VU Upper instructions */ /******************************/ -static PCSX2_ALIGNED16(int const_abs_table[16][4]) = +static PCSX2_ALIGNED16(u32 const_abs_table[16][4]) = { { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, @@ -1256,12 +1582,12 @@ void recVUMI_ABS(VURegs *VU, int info) { if ( _Ft_ == 0 ) return; - if (_X_Y_Z_W != 0xf) { + if (_X_Y_Z_W != 0xf) { // here we use a temp reg because not all xyzw are being modified SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] ); VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } else { + } else { // all xyzw are being modified, so no need to use temp reg if( EEREC_T != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); SSE_ANDPS_M128_to_XMM(EEREC_T, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] ); } @@ -1273,7 +1599,7 @@ void recVUMI_ADD(VURegs *VU, int info) { if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - if( _Fs_ == 0 && _Ft_ == 0 ) { + if( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2 if( _X_Y_Z_W != 0xf ) { SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)s_two); VU_MERGE_REGS(EEREC_D, EEREC_TEMP); @@ -1283,7 +1609,11 @@ void recVUMI_ADD(VURegs *VU, int info) } } else { - if( _X_Y_Z_W == 8 ) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + } + if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); else if (EEREC_D == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S); else { @@ -1291,13 +1621,13 @@ void recVUMI_ADD(VURegs *VU, int info) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); } } - else if (_X_Y_Z_W != 0xf) { + else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); VU_MERGE_REGS(EEREC_D, EEREC_TEMP); } - else { + else { // All xyzw being modified (xyzw == 1111) if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T); else if (EEREC_D == EEREC_T) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); else { @@ -1309,12 +1639,16 @@ void recVUMI_ADD(VURegs *VU, int info) // if( _Fd_ == 0 && (_Fs_ == 0 || _Ft_ == 0) ) // info |= PROCESS_VU_UPDATEFLAGS; - recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat3(addr); + vuFloat(EEREC_S, _X_Y_Z_W); + } if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); if( _XYZW_SS ) { @@ -1365,14 +1699,18 @@ void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info) } } } - recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); - if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); + //if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); } void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + //vuFloat(EEREC_T, xyzw); + } if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); if( _Ft_ == 0 && xyzw < 3 ) { @@ -1428,8 +1766,8 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info) } } } - recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_ADDi(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -1441,6 +1779,11 @@ void recVUMI_ADDw(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 3, info); } void recVUMI_ADDA(VURegs *VU, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + vuFloat(EEREC_ACC, _X_Y_Z_W); + } if( _X_Y_Z_W == 8 ) { if (EEREC_ACC == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T); else if (EEREC_ACC == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_S); @@ -1463,12 +1806,18 @@ void recVUMI_ADDA(VURegs *VU, int info) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_T); } } - recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } -void recVUMI_ADDA_iq(VURegs *VU, int addr, int info) +void recVUMI_ADDA_iq(VURegs *VU, uptr addr, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat3(addr); + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_ACC, _X_Y_Z_W); + } + if( _XYZW_SS ) { assert( EEREC_ACC != EEREC_TEMP ); if( EEREC_ACC == EEREC_S ) { @@ -1508,12 +1857,18 @@ void recVUMI_ADDA_iq(VURegs *VU, int addr, int info) } } } - recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + //vuFloat(EEREC_T, xyzw); + vuFloat(EEREC_ACC, _X_Y_Z_W); + } + if( _X_Y_Z_W == 8 ) { if( xyzw == 0 ) { SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); @@ -1546,8 +1901,8 @@ void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info) } } } - recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_ADDAi(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -1559,6 +1914,10 @@ void recVUMI_ADDAw(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 3, info); } void recVUMI_SUB(VURegs *VU, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + } if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); if( EEREC_S == EEREC_T ) { @@ -1597,14 +1956,18 @@ void recVUMI_SUB(VURegs *VU, int info) } } } - recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); // neopets works better with this? //CheckForOverflow(info, EEREC_D); } -void recVUMI_SUB_iq(VURegs *VU, int addr, int info) +void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat3(addr); + vuFloat(EEREC_S, _X_Y_Z_W); + } if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); if( _XYZW_SS ) { @@ -1669,16 +2032,20 @@ void recVUMI_SUB_iq(VURegs *VU, int addr, int info) } } } - recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); - if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); + //if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); } static PCSX2_ALIGNED16(s_unaryminus[4]) = {0x80000000, 0, 0, 0}; void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + //vuFloat(EEREC_T, xyzw); + } if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); if( _X_Y_Z_W == 8 ) { @@ -1797,8 +2164,8 @@ void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info) } } } - recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_SUBi(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -1810,6 +2177,12 @@ void recVUMI_SUBw(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 3, info); } void recVUMI_SUBA(VURegs *VU, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + vuFloat(EEREC_ACC, _X_Y_Z_W); + } + if( EEREC_S == EEREC_T ) { if (_X_Y_Z_W != 0xf) SSE_ANDPS_M128_to_XMM(EEREC_ACC, (uptr)&SSEmovMask[15-_X_Y_Z_W][0]); else SSE_XORPS_XMM_to_XMM(EEREC_ACC, EEREC_ACC); @@ -1844,12 +2217,18 @@ void recVUMI_SUBA(VURegs *VU, int info) SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_T); } } - recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } -void recVUMI_SUBA_iq(VURegs *VU, int addr, int info) +void recVUMI_SUBA_iq(VURegs *VU, uptr addr, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat3(addr); + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_ACC, _X_Y_Z_W); + } + if( _XYZW_SS ) { if( EEREC_ACC == EEREC_S ) { _vuFlipRegSS(VU, EEREC_ACC); @@ -1896,12 +2275,18 @@ void recVUMI_SUBA_iq(VURegs *VU, int addr, int info) SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); } } - recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + //vuFloat(EEREC_T, xyzw); + vuFloat(EEREC_ACC, _X_Y_Z_W); + } + if( _X_Y_Z_W == 8 ) { if( xyzw == 0 ) { SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); @@ -1938,8 +2323,8 @@ void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info) SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); } } - recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_SUBAi(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -1951,6 +2336,12 @@ void recVUMI_SUBAw(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 3, info); } void recVUMI_MUL_toD(VURegs *VU, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + } + if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S); VU_MERGE_REGS(regd, EEREC_TEMP); @@ -1984,8 +2375,14 @@ void recVUMI_MUL_toD(VURegs *VU, int regd, int info) } } -void recVUMI_MUL_iq_toD(VURegs *VU, int addr, int regd, int info) +void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat3(addr); + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + } + if( _XYZW_SS ) { if( regd == EEREC_TEMP ) { _vuFlipRegSS(VU, EEREC_S); @@ -2035,6 +2432,12 @@ void recVUMI_MUL_iq_toD(VURegs *VU, int addr, int regd, int info) void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + //vuFloat(EEREC_T, xyzw); + } + if( _Ft_ == 0 ) { if( xyzw < 3 ) { if (_X_Y_Z_W != 0xf) { @@ -2100,6 +2503,7 @@ void recVUMI_MUL(VURegs *VU, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MUL_toD(VU, EEREC_D, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MUL_iq(VURegs *VU, int addr, int info) @@ -2107,9 +2511,10 @@ void recVUMI_MUL_iq(VURegs *VU, int addr, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MUL_iq_toD(VU, addr, EEREC_D, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); // spacefisherman needs overflow checking on MULi.z - if( addr == VU_REGQ_ADDR || _Z ) - CheckForOverflow(VU, info, EEREC_D); + //if( addr == VU_REGQ_ADDR || _Z ) + // CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MUL_xyzw(VURegs *VU, int xyzw, int info) @@ -2117,6 +2522,7 @@ void recVUMI_MUL_xyzw(VURegs *VU, int xyzw, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_D, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MULi(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -2130,18 +2536,21 @@ void recVUMI_MULA( VURegs *VU, int info ) { recVUMI_MUL_toD(VU, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MULA_iq(VURegs *VU, int addr, int info) { recVUMI_MUL_iq_toD(VU, addr, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MULA_xyzw(VURegs *VU, int xyzw, int info) { recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MULAi(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -2153,6 +2562,12 @@ void recVUMI_MULAw(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 3, info); } void recVUMI_MADD_toD(VURegs *VU, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + } + if( _X_Y_Z_W == 8 ) { if( regd == EEREC_ACC ) { SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); @@ -2202,8 +2617,14 @@ void recVUMI_MADD_toD(VURegs *VU, int regd, int info) } } -void recVUMI_MADD_iq_toD(VURegs *VU, int addr, int regd, int info) +void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat3(addr); + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + } + if( _X_Y_Z_W == 8 ) { if( regd == EEREC_ACC ) { if( _Fs_ == 0 ) { @@ -2277,6 +2698,12 @@ void recVUMI_MADD_iq_toD(VURegs *VU, int addr, int regd, int info) void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_ACC, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + } + if( _Ft_ == 0 ) { if( xyzw == 3 ) { @@ -2376,6 +2803,7 @@ void recVUMI_MADD(VURegs *VU, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MADD_toD(VU, EEREC_D, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MADD_iq(VURegs *VU, int addr, int info) @@ -2383,8 +2811,9 @@ void recVUMI_MADD_iq(VURegs *VU, int addr, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MADD_iq_toD(VU, addr, EEREC_D, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); - if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); + //if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MADD_xyzw(VURegs *VU, int xyzw, int info) @@ -2392,9 +2821,10 @@ void recVUMI_MADD_xyzw(VURegs *VU, int xyzw, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MADD_xyzw_toD(VU, xyzw, EEREC_D, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); // super bust-a-move arrows - CheckForOverflow(VU, info, EEREC_D); + //CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MADDi(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -2408,46 +2838,59 @@ void recVUMI_MADDA( VURegs *VU, int info ) { recVUMI_MADD_toD(VU, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MADDAi( VURegs *VU , int info) { recVUMI_MADD_iq_toD( VU, VU_VI_ADDR(REG_I, 1), EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MADDAq( VURegs *VU , int info) { recVUMI_MADD_iq_toD( VU, VU_REGQ_ADDR, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MADDAx( VURegs *VU , int info) { recVUMI_MADD_xyzw_toD(VU, 0, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MADDAy( VURegs *VU , int info) { recVUMI_MADD_xyzw_toD(VU, 1, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MADDAz( VURegs *VU , int info) { recVUMI_MADD_xyzw_toD(VU, 2, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MADDAw( VURegs *VU , int info) { recVUMI_MADD_xyzw_toD(VU, 3, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + } + if (_X_Y_Z_W != 0xf) { int t1reg = _vuGetTempXMMreg(info); @@ -2497,6 +2940,12 @@ void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info) { + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_ACC, _X_Y_Z_W); + vuFloat(regd, _X_Y_Z_W); + } + if (_X_Y_Z_W != 0xf) { int t1reg = _vuGetTempXMMreg(info); @@ -2561,6 +3010,7 @@ void recVUMI_MSUB(VURegs *VU, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MSUB_toD(VU, EEREC_D, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MSUB_iq(VURegs *VU, int addr, int info) @@ -2568,8 +3018,9 @@ void recVUMI_MSUB_iq(VURegs *VU, int addr, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MSUB_iq_toD(VU, EEREC_D, addr, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); - if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); + //if( addr == VU_REGQ_ADDR ) CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MSUBi(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } @@ -2579,6 +3030,7 @@ void recVUMI_MSUBx(VURegs *VU, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 0, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MSUBy(VURegs *VU, int info) @@ -2586,6 +3038,7 @@ void recVUMI_MSUBy(VURegs *VU, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 1, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MSUBz(VURegs *VU, int info) @@ -2593,6 +3046,7 @@ void recVUMI_MSUBz(VURegs *VU, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 2, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MSUBw(VURegs *VU, int info) @@ -2600,53 +3054,65 @@ void recVUMI_MSUBw(VURegs *VU, int info) if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 3, info); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_MSUBA( VURegs *VU, int info ) { recVUMI_MSUB_toD(VU, EEREC_ACC, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MSUBAi( VURegs *VU, int info ) { recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_VI_ADDR(REG_I, 1), info ); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MSUBAq( VURegs *VU, int info ) { recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_REGQ_ADDR, info ); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MSUBAx( VURegs *VU, int info ) { recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 0, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MSUBAy( VURegs *VU, int info ) { recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 1, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MSUBAz( VURegs *VU, int info ) { recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 2, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MSUBAw( VURegs *VU, int info ) { recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 3, info); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_MAX(VURegs *VU, int info) { if ( _Fd_ == 0 ) return; + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + } if( _X_Y_Z_W == 8 ) { if (EEREC_D == EEREC_S) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); @@ -2672,9 +3138,13 @@ void recVUMI_MAX(VURegs *VU, int info) } } -void recVUMI_MAX_iq(VURegs *VU, int addr, int info) +void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info) { if ( _Fd_ == 0 ) return; + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat3(addr); + } if( _XYZW_SS ) { if( EEREC_D == EEREC_TEMP ) { @@ -2729,6 +3199,10 @@ void recVUMI_MAX_iq(VURegs *VU, int addr, int info) void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info) { if ( _Fd_ == 0 ) return; + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + //vuFloat(EEREC_T, xyzw); + } if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { if( _Fs_ == 0 && _Ft_ == 0 ) { @@ -2804,6 +3278,10 @@ void recVUMI_MAXw(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 3, info); } void recVUMI_MINI(VURegs *VU, int info) { if ( _Fd_ == 0 ) return; + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat(EEREC_T, _X_Y_Z_W); + } if( _X_Y_Z_W == 8 ) { if (EEREC_D == EEREC_S) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); @@ -2822,12 +3300,12 @@ void recVUMI_MINI(VURegs *VU, int info) else { if( EEREC_D == EEREC_S ) { // need for GT4 vu0rec - ClampUnordered(EEREC_T, EEREC_TEMP, 0); + //ClampUnordered(EEREC_T, EEREC_TEMP, 0); SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T); } else if( EEREC_D == EEREC_T ) { // need for GT4 vu0rec - ClampUnordered(EEREC_S, EEREC_TEMP, 0); + //ClampUnordered(EEREC_S, EEREC_TEMP, 0); SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S); } else { @@ -2837,9 +3315,13 @@ void recVUMI_MINI(VURegs *VU, int info) } } -void recVUMI_MINI_iq(VURegs *VU, int addr, int info) +void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info) { if ( _Fd_ == 0 ) return; + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + vuFloat3(addr); + } if( _XYZW_SS ) { if( EEREC_D == EEREC_TEMP ) { @@ -2894,6 +3376,10 @@ void recVUMI_MINI_iq(VURegs *VU, int addr, int info) void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info) { if ( _Fd_ == 0 ) return; + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, _X_Y_Z_W); + //vuFloat(EEREC_T, xyzw); + } if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { if( xyzw == 0 ) { @@ -2935,8 +3421,13 @@ void recVUMI_MINIw(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 3, info); } void recVUMI_OPMULA( VURegs *VU, int info ) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xD2); // EEREC_T = WYXZ + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, 0xE); + vuFloat(EEREC_T, 0xE); + } + + SSE_MOVAPS_XMM_to_XMM( EEREC_TEMP, EEREC_S ); + SSE_SHUFPS_XMM_to_XMM( EEREC_T, EEREC_T, 0xD2 ); // EEREC_T = WYXZ SSE_SHUFPS_XMM_to_XMM( EEREC_TEMP, EEREC_TEMP, 0xC9 ); // EEREC_TEMP = WXZY SSE_MULPS_XMM_to_XMM( EEREC_TEMP, EEREC_T ); @@ -2947,12 +3438,17 @@ void recVUMI_OPMULA( VURegs *VU, int info ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9); recUpdateFlags(VU, EEREC_ACC, info); + CheckForOverflow(VU, info, EEREC_ACC); } void recVUMI_OPMSUB( VURegs *VU, int info ) { - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + if (CHECK_EXTRA_OVERFLOW) { + vuFloat(EEREC_S, 0xE); + vuFloat(EEREC_T, 0xE); + } + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); SSE_SHUFPS_XMM_to_XMM( EEREC_T, EEREC_T, 0xD2 ); // EEREC_T = WYXZ SSE_SHUFPS_XMM_to_XMM( EEREC_TEMP, EEREC_TEMP, 0xC9 ); // EEREC_TEMP = WXZY @@ -2968,6 +3464,7 @@ void recVUMI_OPMSUB( VURegs *VU, int info ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9); recUpdateFlags(VU, EEREC_D, info); + CheckForOverflow(VU, info, EEREC_D); } void recVUMI_NOP( VURegs *VU, int info ) @@ -3026,13 +3523,18 @@ void recVUMI_ITOF0( VURegs *VU, int info ) VU_MERGE_REGS(EEREC_T, EEREC_TEMP); xmmregs[EEREC_T].mode |= MODE_WRITE; + vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities } else { - if( cpucaps.hasStreamingSIMD2Extensions ) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S); + if( cpucaps.hasStreamingSIMD2Extensions ) { + SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S); + vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities + } else { _deleteVFtoXMMreg(_Fs_, VU==&VU1, 1); SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_T, VU_VFx_ADDR( _Fs_ )); xmmregs[EEREC_T].mode |= MODE_WRITE; + vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities } } } @@ -3051,6 +3553,7 @@ void recVUMI_ITOFX(VURegs *VU, int addr, int info) SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); VU_MERGE_REGS(EEREC_T, EEREC_TEMP); xmmregs[EEREC_T].mode |= MODE_WRITE; + vuFloat2(EEREC_T, EEREC_TEMP, _X_Y_Z_W); // Clamp infinities } else { if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S); else { @@ -3060,6 +3563,7 @@ void recVUMI_ITOFX(VURegs *VU, int addr, int info) } SSE_MULPS_M128_to_XMM(EEREC_T, addr); + vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities } } @@ -3254,7 +3758,7 @@ void recVUMI_DIV(VURegs *VU, int info) } } - //if( !CHECK_FORCEABS ) { + //if( !CHECK_OVERFLOW ) { SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); SSE_MAXSS_M32_to_XMM(EEREC_TEMP, (uptr)&g_minvals[0]); //} @@ -3413,7 +3917,7 @@ void recVUMI_RSQRT(VURegs *VU, int info) } } - //if( !CHECK_FORCEABS ) { + //if( CHECK_OVERFLOW ) { SSE_MAXSS_M32_to_XMM(EEREC_TEMP, (uptr)&g_minvals[0]); SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); //} @@ -4420,7 +4924,7 @@ void recVUMI_FSEQ( VURegs *VU, int info ) ftreg = ALLOCVI(_Ft_, MODE_WRITE); - MOVZX32M8toR( EAX, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); + MOVZX32M16toR( EAX, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); XOR32RtoR(ftreg, ftreg); CMP16ItoR(EAX, imm); @@ -4437,7 +4941,7 @@ void recVUMI_FSOR( VURegs *VU, int info ) ftreg = ALLOCVI(_Ft_, MODE_WRITE); - MOVZX32M8toR( ftreg, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); + MOVZX32M16toR( ftreg, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); OR32ItoR( ftreg, imm ); } @@ -4450,15 +4954,23 @@ void recVUMI_FSSET(VURegs *VU, int info) imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7FF); // keep the low 6 bits ONLY if the upper instruction is an fmac instruction (otherwise rewrite) - metal gear solid 3 -// if( (info & PROCESS_VU_SUPER) && VUREC_FMAC ) { -// MOV32MtoR(EAX, prevaddr); -// AND32ItoR(EAX, 0x3f); -// if ((imm&0xfc0) != 0) OR32ItoR(EAX, imm & 0xFC0); -// MOV32RtoM(writeaddr ? writeaddr : prevaddr, EAX); -// } -// else { - MOV32ItoM(writeaddr ? writeaddr : prevaddr, imm&0xfc0); -// } + //if( (info & PROCESS_VU_SUPER) && VUREC_FMAC ) { + // MOV32MtoR(EAX, prevaddr); + // AND32ItoR(EAX, 0x3f); + // if ((imm&0xfc0) != 0) OR32ItoR(EAX, imm & 0xFC0); + // MOV32RtoM(writeaddr ? writeaddr : prevaddr, EAX); + //} + //else { + // MOV32ItoM(writeaddr ? writeaddr : prevaddr, imm&0xfc0); + //} + if (writeaddr) { + AND32ItoM(writeaddr, 0x3f); + OR32ItoM(writeaddr, imm&0xfc0); + } + else { + AND32ItoM(prevaddr, 0x3f); + OR32ItoM(prevaddr, imm&0xfc0); + } } void recVUMI_FMAND( VURegs *VU, int info ) @@ -4466,17 +4978,28 @@ void recVUMI_FMAND( VURegs *VU, int info ) int fsreg, ftreg; if ( _Ft_ == 0 ) return; - fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ); - ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_8BITREG); + //fsreg = ALLOCVI(_Fs_, MODE_READ); //_checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ); + //ftreg = ALLOCVI(_Ft_, MODE_WRITE);//|MODE_8BITREG); + if( _Ft_ != _Fs_ ) { + fsreg = ALLOCVI(_Fs_, MODE_READ); + ftreg = ALLOCVI(_Ft_, MODE_WRITE); + MOVZX32M16toR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); + AND16RtoR( ftreg, fsreg); + } + else { + ftreg = ALLOCVI(_Ft_, MODE_WRITE); + AND16MtoR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); + } + /* if( fsreg >= 0 ) { if( ftreg != fsreg ) MOV32RtoR(ftreg, fsreg); } - else MOV8MtoR(ftreg, VU_VI_ADDR(_Fs_, 1)); + else MOV16MtoR(ftreg, VU_VI_ADDR(_Fs_, 1)); - //AND16MtoR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); - AND8MtoR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + AND16MtoR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); MOVZX32R8toR(ftreg, ftreg); + */ } void recVUMI_FMEQ( VURegs *VU, int info ) @@ -4485,9 +5008,9 @@ void recVUMI_FMEQ( VURegs *VU, int info ) if ( _Ft_ == 0 ) return; if( _Ft_ == _Fs_ ) { - ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_READ|MODE_8BITREG); - // really 8 since not doing under/over flows - CMP8MtoR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_READ);//|MODE_8BITREG); + + CMP16MtoR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); SETE8R(EAX); MOVZX32R8toR(ftreg, EAX); } @@ -4498,36 +5021,40 @@ void recVUMI_FMEQ( VURegs *VU, int info ) XOR32RtoR(ftreg, ftreg); - CMP8MtoR(fsreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + CMP16MtoR(fsreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); SETE8R(ftreg); } } void recVUMI_FMOR( VURegs *VU, int info ) { - int fsreg, ftreg; + //int fsreg, ftreg; + int ftreg; if ( _Ft_ == 0 ) return; if( _Fs_ == 0 ) { - ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_8BITREG); - MOVZX32M8toR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + ftreg = ALLOCVI(_Ft_,MODE_WRITE);//|MODE_8BITREG); + MOVZX32M16toR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); } if( _Ft_ == _Fs_ ) { - ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_READ|MODE_8BITREG); - OR8MtoR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_READ);//|MODE_8BITREG); + OR16MtoR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); } else { - fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ); + if( info & PROCESS_VU_SUPER ) SysPrintf( "VU ERROR: can't get VI ADDR\n" ); ftreg = ALLOCVI(_Ft_, MODE_WRITE); + MOVZX32M16toR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + OR16MtoR( ftreg, VU_VI_ADDR(_Fs_, 1) ); - MOVZX32M8toR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); - if( fsreg >= 0 ) { + //fsreg = ALLOCVI(_Ft_, MODE_READ); //_checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ); + //MOVZX32M16toR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + /* + if( fsreg >= 0 ) OR16RtoR( ftreg, fsreg); - } - else { + else OR16MtoR( ftreg, VU_VI_ADDR(_Fs_, 1)); - } + */ } } diff --git a/pcsx2/x86/iVUmicro.h b/pcsx2/x86/iVUmicro.h index af217ca27e..28d99da06a 100644 --- a/pcsx2/x86/iVUmicro.h +++ b/pcsx2/x86/iVUmicro.h @@ -1,19 +1,19 @@ /* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2008 Pcsx2 Team + * Copyright (C) 2002-2003 Pcsx2 Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __IVUMICRO_H__ @@ -37,8 +37,12 @@ #define RECOMPILE_VUMI_MAX #define RECOMPILE_VUMI_MINI #define RECOMPILE_VUMI_FTOI +#define RECOMPILE_VUMI_OPM +#define RECOMPILE_VUMI_OTHER + #define RECOMPILE_VUMI_MATH +// #define RECOMPILE_VUMI_MISC #define RECOMPILE_VUMI_E #define RECOMPILE_VUMI_X @@ -46,6 +50,7 @@ #define RECOMPILE_VUMI_FLAG #define RECOMPILE_VUMI_BRANCH #define RECOMPILE_VUMI_ARITHMETIC +// #define RECOMPILE_VUMI_LOADSTORE #ifdef __x86_64__ diff --git a/pcsx2/x86/ix86-32/iR5900-32.c b/pcsx2/x86/ix86-32/iR5900-32.c index 47f0a18816..f24909ebc1 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.c +++ b/pcsx2/x86/ix86-32/iR5900-32.c @@ -1,19 +1,19 @@ /* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2008 Pcsx2 Team + * Copyright (C) 2002-2005 Pcsx2 Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ // recompiler reworked to add dynamic linking zerofrog(@gmail.com) Jan06 @@ -1450,8 +1450,8 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xm //////////////////////////////////////////////////// extern u8 g_MACFlagTransform[256]; // for vus -u32 g_sseMXCSR = 0x9fc0; // disable all exception, round to 0, flush to 0 -u32 g_sseVUMXCSR = 0xff80; // set to 0xffc0 to enable DAZ +u32 g_sseMXCSR = 0x9fc0; //0x9fc0 disable all exception, round to 0, flush to 0 +u32 g_sseVUMXCSR = 0; void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) { @@ -1481,7 +1481,7 @@ void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) int recInit( void ) { int i; - static const u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; recLUT = (uptr*) _aligned_malloc( 0x010000 * sizeof(uptr), 16 ); memset( recLUT, 0, 0x010000 * sizeof(uptr) ); @@ -1576,10 +1576,11 @@ int recInit( void ) SuperVUInit(-1); - for(i = 0; i < 256; ++i) { + for(i = 0; i < 256; ++i) { //x0-xF //0x-Fx g_MACFlagTransform[i] = macarr[i>>4]|(macarr[i&15]<<4); } + g_sseVUMXCSR = CHECK_DENORMALS; SetCPUState(g_sseMXCSR, g_sseVUMXCSR); return 0; @@ -2136,6 +2137,7 @@ void SetBranchReg( u32 reg ) void SetBranchImm( u32 imm ) { + u32* ptr; branch = 1; assert( imm ); @@ -2148,10 +2150,8 @@ void SetBranchImm( u32 imm ) if( bExecBIOS ) CheckForBIOSEnd(); MOV32ItoR(EDX, 0); - { - u32* ptr = (u32*)(x86Ptr-4); - *ptr = (u32)JMP32((u32)Dispatcher - ( (u32)x86Ptr + 5 )); - } + ptr = (u32*)(x86Ptr-4); + *ptr = (u32)JMP32((u32)Dispatcher - ( (u32)x86Ptr + 5 )); } void SaveBranchState() @@ -2227,7 +2227,7 @@ void iFlushCall(int flushtype) } } -#define USE_FAST_BRANCHES (Config.Hacks & 1) +#define USE_FAST_BRANCHES CHECK_FASTBRANCHES //void testfpu() //{ @@ -2250,7 +2250,7 @@ void iFlushCall(int flushtype) // assert( !g_globalXMMSaved ); //} -#define EECYCLE_MULT ((Config.Hacks & 1) ? 2.25 : (9/8)) +#define EECYCLE_MULT (CHECK_EESYNC_HACK ? (CHECK_EE_IOP_EXTRA ? 3.375 : 2.25) : (9/8)) static void iBranchTest(u32 newpc, u32 cpuBranch) { @@ -2835,9 +2835,9 @@ void recRecompile( u32 startpc ) else { s_pCurBlockEx = NULL; for(i = 0; i < EE_NUMBLOCKS; ++i) { - if( recBlocks[(i+s_nNextBlock)&(EE_NUMBLOCKS-1)].size == 0 ) { - s_pCurBlockEx = recBlocks+((i+s_nNextBlock)&(EE_NUMBLOCKS-1)); - s_nNextBlock = (i+s_nNextBlock+1)&(EE_NUMBLOCKS-1); + if( recBlocks[(i+s_nNextBlock)%EE_NUMBLOCKS].size == 0 ) { + s_pCurBlockEx = recBlocks+(i+s_nNextBlock)%EE_NUMBLOCKS; + s_nNextBlock = (i+s_nNextBlock+1)%EE_NUMBLOCKS; break; } } diff --git a/pcsx2/x86/ix86/ix86.c b/pcsx2/x86/ix86/ix86.c index 0529ea6d26..b5f4a8927d 100644 --- a/pcsx2/x86/ix86/ix86.c +++ b/pcsx2/x86/ix86/ix86.c @@ -72,16 +72,16 @@ void WriteRmOffset(x86IntRegType to, int offset) if( (to&7) == ESP ) { if( offset == 0 ) { ModRM( 0, 0, 4 ); - SibSB( 0, ESP, 4 ); + ModRM( 0, ESP, 4 ); } else if( offset < 128 && offset >= -128 ) { ModRM( 1, 0, 4 ); - SibSB( 0, ESP, 4 ); + ModRM( 0, ESP, 4 ); write8(offset); } else { ModRM( 2, 0, 4 ); - SibSB( 0, ESP, 4 ); + ModRM( 0, ESP, 4 ); write32(offset); } } @@ -1117,56 +1117,32 @@ void ADD64RtoR( x86IntRegType to, x86IntRegType from ) void ADD32ItoR( x86IntRegType to, u32 from ) { RexB(0, to); - if(from < 0x80) - { - write8( 0x83 ); + if ( to == EAX) { + write8( 0x05 ); + } + else { + write8( 0x81 ); ModRM( 3, 0, to ); - write8( from ); - } - else - { - if ( to == EAX) { - write8( 0x05 ); - } - else { - write8( 0x81 ); - ModRM( 3, 0, to ); - } - write32( from ); } + write32( from ); } /* add imm32 to m32 */ void ADD32ItoM( uptr to, u32 from ) { - if(from < 0x80) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write8( from ); - } else { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); - } + write8( 0x81 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 8) ); + write32( from ); } // add imm32 to [r32+off] void ADD32ItoRmOffset( x86IntRegType to, u32 from, int offset) { - RexB(0,to); - if(from < 0x80) - { - write8( 0x83 ); - WriteRmOffset(to,offset); - write8(from); - } else { - write8( 0x81 ); - WriteRmOffset(to,offset); - write32(from); - } + RexB(0,to); + write8( 0x81 ); + WriteRmOffset(to,offset); + write32(from); } /* add r32 to r32 */ @@ -1207,46 +1183,28 @@ void ADD16RtoR( x86IntRegType to , x86IntRegType from ) /* add imm16 to r16 */ void ADD16ItoR( x86IntRegType to, u16 from ) { - RexB(0,to); - write8( 0x66 ); + RexB(0,to); if ( to == EAX) - { + { write8( 0x05 ); - write16( from ); } - else if(from < 0x80) - { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8( from ); - } - else - { + else + { write8( 0x81 ); ModRM( 3, 0, to ); - write16( from ); } + write16( from ); } /* add imm16 to m16 */ void ADD16ItoM( uptr to, u16 from ) { write8( 0x66 ); - if(from < 0x80) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write8( from ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); - } + write8( 0x81 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* add r16 to m16 */ @@ -2328,20 +2286,13 @@ void AND64I32toM( uptr to, u32 from ) void AND32ItoR( x86IntRegType to, u32 from ) { RexB(0,to); - if(from < 0x80) - { - AND32I8toR(to, (u8)from); - } - else - { - if ( to == EAX ) { - write8( 0x25 ); - } else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - } - write32( from ); + if ( to == EAX ) { + write8( 0x25 ); + } else { + write8( 0x81 ); + ModRM( 3, 0x4, to ); } + write32( from ); } /* and sign ext imm8 to r32 */ @@ -2356,17 +2307,10 @@ void AND32I8toR( x86IntRegType to, u8 from ) /* and imm32 to m32 */ void AND32ItoM( uptr to, u32 from ) { - if(from < 0x80) - { - AND32I8toM(to, (u8)from); - } - else - { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); - } + write8( 0x81 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 8) ); + write32( from ); } /* and sign ext imm8 to m32 */ @@ -2416,38 +2360,24 @@ void AND16RtoR( x86IntRegType to, x86IntRegType from ) /* and imm16 to r16 */ void AND16ItoR( x86IntRegType to, u16 from ) { - RexB(0,to); - write8(0x66); + RexB(0,to); if ( to == EAX ) { write8( 0x25 ); - write16( from ); - } else if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8( from ); } else { write8( 0x81 ); ModRM( 3, 0x4, to ); - write16( from ); } + write16( from ); } /* and imm16 to m16 */ void AND16ItoM( uptr to, u16 from ) { - write8(0x66); - if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write8( from ); - } else { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); - } + write16( 0x8166 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* and r16 to m16 */