diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index 05c1f32868..8a71d245c4 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -81,8 +81,8 @@ extern SessionOverrideFlags g_Session; #define DEFAULT_eeOptions 0x01 #define DEFAULT_vuOptions 0x01 //------------ DEFAULT sseMXCSR VALUES!!! --------------- -#define DEFAULT_sseMXCSR 0x7fc0 //FPU rounding, DaZ, "chop" - Note: Dont enable FtZ by default, it breaks games! E.g. Enthusia (Refraction) -#define DEFAULT_sseVUMXCSR 0x7fc0 //VU rounding, DaZ, "chop" +#define DEFAULT_sseMXCSR 0xffc0 //FPU rounding > DaZ, FtZ, "chop" +#define DEFAULT_sseVUMXCSR 0xffc0 //VU rounding > DaZ, FtZ, "chop" #define CHECK_FRAMELIMIT (Config.Options&PCSX2_FRAMELIMIT_MASK) diff --git a/pcsx2/Elfheader.cpp b/pcsx2/Elfheader.cpp index 40420c2b2f..206ec773f1 100644 --- a/pcsx2/Elfheader.cpp +++ b/pcsx2/Elfheader.cpp @@ -571,7 +571,6 @@ int loadElfFile(const char *filename) } #include "VU.h" -extern int g_FFXHack; extern int path3hack; int g_VUGameFixes = 0; @@ -580,7 +579,6 @@ void LoadGameSpecificSettings() { // default g_VUGameFixes = 0; - g_FFXHack = 0; switch(ElfCRC) { case 0xb99379b7: // erementar gerad (discolored chars) @@ -589,33 +587,6 @@ void LoadGameSpecificSettings() case 0xa08c4057: //Sprint Cars (SLUS) case 0x8b0725d5: //Flinstones Bedrock Racing (SLES) path3hack = 1; // We can move this to patch files right now - break; - - case 0xb4414ea1: // ffx(rus) - case 0xee97db5b: // ffx(rus) - case 0xaec495cc: // ffx(rus) - case 0x6a4efe60: // ffx(j) - case 0xA39517AB: // ffx(e) - case 0xBB3D833A: // ffx(u) - case 0x941bb7d9: // ffx(g) - case 0xD9FC6310: // ffx int(j) - case 0xa39517ae: // ffx(f) - case 0xa39517a9: // ffx(i) - case 0x658597e2: // ffx int - case 0x941BB7DE: // ffx(s) - case 0x3866CA7E: // ffx(asia) - case 0x48FE0C71: // ffx2 (u) - case 0x9aac530d: // ffx2 (g) - case 0x9AAC5309: // ffx2 (e) - case 0x8A6D7F14: // ffx2 (j) - case 0x9AAC530B: // ffx2 (i) - case 0x9AAC530A: // ffx2 (f) - case 0x9aac530c: // ffx2 (f) - case 0xe1fd9a2d: // ffx2 last mission (?) - case 0x93f9b89a: // ffx2 demo (g) - case 0x304C115C: // harvest moon - awl - case 0xF0A6D880: // harvest moon - sth - g_FFXHack = 1; break; } } diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 631f4ecef7..843179f228 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -41,8 +41,6 @@ using namespace R5900; static bool m_gsOpened = false; -int g_FFXHack=0; - #ifdef PCSX2_DEVBUILD // GS Playback diff --git a/pcsx2/MTGS.cpp b/pcsx2/MTGS.cpp index 4b658f04d9..a17e561d0b 100644 --- a/pcsx2/MTGS.cpp +++ b/pcsx2/MTGS.cpp @@ -327,7 +327,7 @@ __forceinline u32 mtgsThreadObject::_gifTransferDummy( GIF_PATH pathidx, const u } else if(path.tag.nloop == 0) { - if(pathidx == 0 && g_FFXHack) + if(pathidx == 0) continue; eop = true; diff --git a/pcsx2/Patch.cpp b/pcsx2/Patch.cpp index 30414ac479..6993f83455 100644 --- a/pcsx2/Patch.cpp +++ b/pcsx2/Patch.cpp @@ -653,7 +653,8 @@ int AddPatch(int Mode, int Place, int Address, int Size, u64 data) void patchFunc_ffxhack( char * cmd, char * param ) { - g_FFXHack = 1; + //Keeping this as a dummy a while :p + //g_FFXHack = 1; } void patchFunc_xkickdelay( char * cmd, char * param ) diff --git a/pcsx2/Patch.h b/pcsx2/Patch.h index 3604944e66..d6c85209f9 100644 --- a/pcsx2/Patch.h +++ b/pcsx2/Patch.h @@ -110,7 +110,6 @@ int AddPatch(int Mode, int Place, int Address, int Size, u64 data); extern void SetFastMemory(int); // iR5900LoadStore.c extern int path3hack; -extern int g_FFXHack; //extern int g_VUGameFixes; extern int g_ZeroGSOptions; extern u32 g_sseMXCSR; diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 7d50fedb66..8ea0fef6a6 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -316,7 +316,6 @@ void _vuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { /* VU Upper instructions */ /******************************/ #ifndef INT_VUDOUBLEHACK -static u32 d; float vuDouble(u32 f) { switch(f & 0x7f800000){ @@ -324,10 +323,13 @@ float vuDouble(u32 f) f &= 0x80000000; return *(float*)&f; break; - case 0x7f800000: + case 0x7f800000: + { + u32 d; d = (f & 0x80000000)|0x7f7fffff; return *(float*)&d; break; + } default: return *(float*)&f; break; @@ -2718,7 +2720,8 @@ void _vuRegsFSSET(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->VFread0 = 0; VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << REG_STATUS_FLAG; - VUregsn->VIread = 0;//1 << REG_STATUS_FLAG; this kills speed + //VUregsn->VIread = 0; // 1 << REG_STATUS_FLAG; this kills speed. Todo: Orly? (rama) + VUregsn->VIread = 1 << REG_STATUS_FLAG; } void _vuRegsFMAND(VURegs * VU, _VURegsNum *VUregsn) { diff --git a/pcsx2/x86/iVUmicroUpper.cpp b/pcsx2/x86/iVUmicroUpper.cpp index 7dfa578c62..26e3f1604d 100644 --- a/pcsx2/x86/iVUmicroUpper.cpp +++ b/pcsx2/x86/iVUmicroUpper.cpp @@ -333,12 +333,23 @@ void recUpdateFlags(VURegs * VU, int reg, int info) static PCSX2_ALIGNED16(u32 VU_addsuband[2][4]); static PCSX2_ALIGNED16(u32 VU_addsub_reg[2][4]); +static u32 tempECX; + void VU_ADD_SUB(u32 regd, u32 regt, int is_sub, int info) { u8 *localptr[4][8]; - int temp1 = _allocX86reg(ECX, X86TYPE_TEMP, 0, 0); //receives regd + + MOV32RtoM((uptr)&tempECX, ECX); + + int temp1 = ECX; //receives regd int temp2 = ALLOCTEMPX86(0); + if (temp2 == ECX) + { + temp2 = ALLOCTEMPX86(0); + _freeX86reg(ECX); + } + SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); @@ -413,98 +424,26 @@ void VU_ADD_SUB(u32 regd, u32 regt, int is_sub, int info) SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); - _freeX86reg(temp1); _freeX86reg(temp2); -} -void VU_ADD_SUB_SSE4(u32 regd, u32 regt, int is_sub, int info) -{ - u8 *localptr[4][8]; - int temp1 = _allocX86reg(ECX, X86TYPE_TEMP, 0, 0); //receives regd - int temp2 = ALLOCTEMPX86(0); - - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); - - SSE2_PSLLD_I8_to_XMM(regd, 1); - SSE2_PSLLD_I8_to_XMM(regt, 1); - - SSE2_PSRLD_I8_to_XMM(regd, 24); - SSE2_PSRLD_I8_to_XMM(regt, 24); - - SSE2_PSUBD_XMM_to_XMM(regd, regt); - -#define PERFORM_SSE4(i) \ - \ - SSE_PEXTRW_XMM_to_R32(temp1, regd, i*2); \ - MOVSX32R16toR(temp1, temp1); \ - CMP32ItoR(temp1, 25);\ - localptr[i][0] = JGE8(0);\ - CMP32ItoR(temp1, 0);\ - localptr[i][1] = JG8(0);\ - localptr[i][2] = JE8(0);\ - CMP32ItoR(temp1, -25);\ - localptr[i][3] = JLE8(0);\ - \ - NEG32R(temp1); \ - DEC32R(temp1);\ - MOV32ItoR(temp2, 0xffffffff); \ - SHL32CLtoR(temp2); \ - SSE4_PINSRD_R32_to_XMM(regd, temp2, i); \ - localptr[i][4] = JMP8(0);\ - \ - x86SetJ8(localptr[i][0]);\ - MOV32ItoR(temp2, 0xffffffff); \ - SSE4_PINSRD_R32_to_XMM(regd, temp2, i); \ - SHL32ItoR(temp2, 31); \ - SSE4_PINSRD_R32_to_XMM(regt, temp2, i); \ - localptr[i][5] = JMP8(0);\ - \ - x86SetJ8(localptr[i][1]);\ - DEC32R(temp1);\ - MOV32ItoR(temp2, 0xffffffff);\ - SSE4_PINSRD_R32_to_XMM(regd, temp2, i); \ - SHL32CLtoR(temp2); \ - SSE4_PINSRD_R32_to_XMM(regt, temp2, i); \ - localptr[i][6] = JMP8(0);\ - \ - x86SetJ8(localptr[i][3]);\ - MOV32ItoR(temp2, 0x80000000); \ - SSE4_PINSRD_R32_to_XMM(regd, temp2, i); \ - localptr[i][7] = JMP8(0);\ - \ - x86SetJ8(localptr[i][2]);\ - \ - x86SetJ8(localptr[i][4]);\ - x86SetJ8(localptr[i][5]);\ - x86SetJ8(localptr[i][6]);\ - x86SetJ8(localptr[i][7]); - - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - PERFORM_SSE4(0); - PERFORM_SSE4(1); - PERFORM_SSE4(2); - PERFORM_SSE4(3); -#undef PERFORM_SSE4 - - SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); //regd contains mask - SSE_ANDPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); //regt contains mask - - if (is_sub) SSE_SUBPS_XMM_to_XMM(regd, regt); - else SSE_ADDPS_XMM_to_XMM(regd, regt); - - SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); - - _freeX86reg(temp1); - _freeX86reg(temp2); + MOV32MtoR(ECX, (uptr)&tempECX); } void VU_ADD_SUB_SS(u32 regd, u32 regt, int is_sub, int is_mem, int info) { u8 *localptr[8]; u32 addrt = regt; //for case is_mem - int temp1 = _allocX86reg(ECX, X86TYPE_TEMP, 0, 0); //receives regd //_allocX86reg(ECX, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode); + + MOV32RtoM((uptr)&tempECX, ECX); + + int temp1 = ECX; //receives regd int temp2 = ALLOCTEMPX86(0); + + if (temp2 == ECX) + { + temp2 = ALLOCTEMPX86(0); + _freeX86reg(ECX); + } SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); if (!is_mem) SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); @@ -617,167 +556,44 @@ void VU_ADD_SUB_SS(u32 regd, u32 regt, int is_sub, int is_mem, int info) SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); } - _freeX86reg(temp1); _freeX86reg(temp2); -} -void VU_ADD_SUB_SS_SSE4(u32 regd, u32 regt, int is_sub, int is_mem, int info) -{ - u8 *localptr[8]; - u32 addrt = regt; //for case is_mem - int temp1 = _allocX86reg(ECX, X86TYPE_TEMP, 0, 0); //receives regd //_allocX86reg(ECX, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode); - int temp2 = ALLOCTEMPX86(0); - - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); - if (!is_mem) SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); - - SSE2_MOVD_XMM_to_R(temp1, regd); - SHR32ItoR(temp1, 23); - - if (is_mem) { - MOV32MtoR(temp2, addrt); - MOV32RtoM((uptr)&VU_addsub_reg[1][0], temp2); - SHR32ItoR(temp2, 23); - } - else { - SSE2_MOVD_XMM_to_R(temp2, regt); - SHR32ItoR(temp2, 23); - } - - AND32ItoR(temp1, 0xff); - AND32ItoR(temp2, 0xff); - - SUB32RtoR(temp1, temp2); //temp1 = exponent difference - - CMP32ItoR(temp1, 25); - localptr[0] = JGE8(0); - CMP32ItoR(temp1, 0); - localptr[1] = JG8(0); - localptr[2] = JE8(0); - CMP32ItoR(temp1, -25); - localptr[3] = JLE8(0); - - NEG32R(temp1); - DEC32R(temp1); - MOV32ItoR(temp2, 0xffffffff); - SHL32CLtoR(temp2); - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - SSE4_PINSRD_R32_to_XMM(regd, temp2, 0); - if (!is_mem) - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - localptr[4] = JMP8(0); - - x86SetJ8(localptr[0]); - MOV32ItoR(temp2, 0x80000000); - if (is_mem) - AND32RtoM((uptr)&VU_addsub_reg[1][0], temp2); - else { - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - SSE4_PINSRD_R32_to_XMM(regt, temp2, 0); - } - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - localptr[5] = JMP8(0); - - x86SetJ8(localptr[1]); - DEC32R(temp1); - MOV32ItoR(temp2, 0xffffffff); - SHL32CLtoR(temp2); - if (is_mem) - AND32RtoM((uptr)&VU_addsub_reg[1][0], temp2); - else { - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - SSE4_PINSRD_R32_to_XMM(regt, temp2, 0); - } - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - localptr[6] = JMP8(0); - - x86SetJ8(localptr[3]); - MOV32ItoR(temp2, 0x80000000); - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - SSE4_PINSRD_R32_to_XMM(regd, temp2, 0); - if (!is_mem) - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - localptr[7] = JMP8(0); - - x86SetJ8(localptr[2]); - x86SetJ8(localptr[4]); - x86SetJ8(localptr[5]); - x86SetJ8(localptr[6]); - x86SetJ8(localptr[7]); - - if (is_mem) - { - SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); //regd contains mask - - if (is_sub) SSE_SUBSS_M32_to_XMM(regd, (uptr)&VU_addsub_reg[1][0]); - else SSE_ADDSS_M32_to_XMM(regd, (uptr)&VU_addsub_reg[1][0]); - } - else - { - SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); //regd contains mask - SSE_ANDPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); //regt contains mask - - if (is_sub) SSE_SUBSS_XMM_to_XMM(regd, regt); - else SSE_ADDSS_XMM_to_XMM(regd, regt); - - SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); - } - - _freeX86reg(temp1); - _freeX86reg(temp2); + MOV32MtoR(ECX, (uptr)&tempECX); } void SSE_ADDPS_XMM_to_XMM_custom(int info, int regd, int regt) { if (CHECK_VUADDSUBHACK) { - if ( cpucaps.hasStreamingSIMD4Extensions ) - VU_ADD_SUB_SSE4(regd, regt, 0, info); - else - VU_ADD_SUB(regd, regt, 0, info); + VU_ADD_SUB(regd, regt, 0, info); } else SSE_ADDPS_XMM_to_XMM(regd, regt); } void SSE_SUBPS_XMM_to_XMM_custom(int info, int regd, int regt) { if (CHECK_VUADDSUBHACK) { - if ( cpucaps.hasStreamingSIMD4Extensions ) - VU_ADD_SUB_SSE4(regd, regt, 1, info); - else - VU_ADD_SUB(regd, regt, 1, info); + VU_ADD_SUB(regd, regt, 1, info); } else SSE_SUBPS_XMM_to_XMM(regd, regt); } void SSE_ADDSS_XMM_to_XMM_custom(int info, int regd, int regt) { if (CHECK_VUADDSUBHACK) { - if ( cpucaps.hasStreamingSIMD4Extensions ) - VU_ADD_SUB_SS_SSE4(regd, regt, 0, 0, info); - else - VU_ADD_SUB_SS(regd, regt, 0, 0, info); + VU_ADD_SUB_SS(regd, regt, 0, 0, info); } else SSE_ADDSS_XMM_to_XMM(regd, regt); } void SSE_SUBSS_XMM_to_XMM_custom(int info, int regd, int regt) { if (CHECK_VUADDSUBHACK) { - if ( cpucaps.hasStreamingSIMD4Extensions ) - VU_ADD_SUB_SS_SSE4(regd, regt, 1, 0, info); - else - VU_ADD_SUB_SS(regd, regt, 1, 0, info); + VU_ADD_SUB_SS(regd, regt, 1, 0, info); } else SSE_SUBSS_XMM_to_XMM(regd, regt); } void SSE_ADDSS_M32_to_XMM_custom(int info, int regd, int regt) { if (CHECK_VUADDSUBHACK) { - if ( cpucaps.hasStreamingSIMD4Extensions ) - VU_ADD_SUB_SS_SSE4(regd, regt, 0, 1, info); - else - VU_ADD_SUB_SS(regd, regt, 0, 1, info); + VU_ADD_SUB_SS(regd, regt, 0, 1, info); } else SSE_ADDSS_M32_to_XMM(regd, regt); } void SSE_SUBSS_M32_to_XMM_custom(int info, int regd, int regt) { if (CHECK_VUADDSUBHACK) { - if ( cpucaps.hasStreamingSIMD4Extensions ) - VU_ADD_SUB_SS_SSE4(regd, regt, 1, 1, info); - else - VU_ADD_SUB_SS(regd, regt, 1, 1, info); + VU_ADD_SUB_SS(regd, regt, 1, 1, info); } else SSE_SUBSS_M32_to_XMM(regd, regt); }