From 68aaf91b5d370b09b81fec34d9204984dfc28e92 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Thu, 19 Feb 2009 15:56:07 +0000 Subject: [PATCH] Tmkk managed to fix a huge hack in the superVU delay slot handling. This new code properly handles these situations now, removing the need for the magna carta gamefix, and also fixing problems in dragon quest 8(jp). Thanks again, tmkk :) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@536 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Misc.h | 1 - pcsx2/VU.h | 8 ++ pcsx2/VUops.cpp | 6 ++ pcsx2/windows/WinMain.cpp | 2 - pcsx2/windows/pcsx2.rc | 14 ++-- pcsx2/x86/iVUmicro.cpp | 163 +++++++++++++++++++++++++++++++++----- pcsx2/x86/iVUzerorec.cpp | 100 ++++++++++++----------- 7 files changed, 214 insertions(+), 80 deletions(-) diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index 34e595abde..9444196c08 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -90,7 +90,6 @@ extern SessionOverrideFlags g_Session; //------------ SPECIAL GAME FIXES!!! --------------- #define CHECK_VUADDSUBHACK (Config.GameFixes & 0x1) // Special Fix for Tri-ace games, they use an encryption algorithm that requires VU addi opcode to be bit-accurate. #define CHECK_FPUCLAMPHACK (Config.GameFixes & 0x4) // Special Fix for Tekken 5, different clamping for FPU (sets NaN to zero; doesn't clamp infinities) -#define CHECK_VUBRANCHHACK (Config.GameFixes & 0x8) // Special Fix for Magna Carta (note: Breaks Crash Bandicoot) //------------ Advanced Options!!! --------------- #define CHECK_VU_OVERFLOW (Config.vuOptions & 0x1) #define CHECK_VU_EXTRA_OVERFLOW (Config.vuOptions & 0x2) // If enabled, Operands are clamped before being used in the VU recs diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 87f23b4046..d30904f400 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -109,6 +109,13 @@ struct fmacPipe { u32 clipflag; }; +struct ialuPipe { + int enable; + int reg; + u32 sCycle; + u32 Cycle; +}; + struct VURegs { VECTOR VF[32]; // VF and VI need to be first in this struct for proper mapping REG_VI VI[32]; // needs to be 128bit x 32 (cottonvibes) @@ -140,6 +147,7 @@ struct VURegs { fmacPipe fmac[8]; fdivPipe fdiv; efuPipe efu; + ialuPipe ialu[8]; VURegs() : Mem( NULL ) diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index ee18f57c72..cb0e3a3161 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2210,6 +2210,7 @@ void _vuRegs##OP(VURegs * VU, _VURegsNum *VUregsn) { \ VUregsn->VFread1 = 0; \ VUregsn->VIwrite = 1 << _Fd_; \ VUregsn->VIread = (1 << _Fs_) | (1 << _Ft_); \ + VUregsn->cycles = 0; \ } #define VUREGS_ITIS(OP) \ @@ -2220,6 +2221,7 @@ void _vuRegs##OP(VURegs * VU, _VURegsNum *VUregsn) { \ VUregsn->VFread1 = 0; \ VUregsn->VIwrite = 1 << _Ft_; \ VUregsn->VIread = 1 << _Fs_; \ + VUregsn->cycles = 0; \ } #define VUREGS_PFS(OP, _cycles) \ @@ -2592,6 +2594,7 @@ void _vuRegsILW(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << _Ft_; VUregsn->VIread = 1 << _Fs_; + VUregsn->cycles = 3; } void _vuRegsISW(VURegs * VU, _VURegsNum *VUregsn) { @@ -2610,6 +2613,7 @@ void _vuRegsILWR(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->VFread1 = 0; VUregsn->VIwrite = (1 << _Ft_); VUregsn->VIread = (1 << _Fs_); + VUregsn->cycles = 3; } void _vuRegsISWR(VURegs * VU, _VURegsNum *VUregsn) { @@ -2908,6 +2912,7 @@ void _vuRegsXITOP(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << _Ft_; VUregsn->VIread = 0; + VUregsn->cycles = 0; } void _vuRegsXGKICK(VURegs * VU, _VURegsNum *VUregsn) { @@ -2926,4 +2931,5 @@ void _vuRegsXTOP(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << _Ft_; VUregsn->VIread = 0; + VUregsn->cycles = 0; } diff --git a/pcsx2/windows/WinMain.cpp b/pcsx2/windows/WinMain.cpp index 114f3bb803..c2c4ae2cb6 100644 --- a/pcsx2/windows/WinMain.cpp +++ b/pcsx2/windows/WinMain.cpp @@ -529,7 +529,6 @@ BOOL APIENTRY GameFixes(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) if(Config.GameFixes & 0x1) CheckDlgButton(hDlg, IDC_GAMEFIX1, TRUE); if(Config.GameFixes & 0x2) CheckDlgButton(hDlg, IDC_GAMEFIX2, TRUE); if(Config.GameFixes & 0x4) CheckDlgButton(hDlg, IDC_GAMEFIX3, TRUE); - if(Config.GameFixes & 0x8) CheckDlgButton(hDlg, IDC_GAMEFIX4, TRUE); return TRUE; case WM_COMMAND: @@ -539,7 +538,6 @@ BOOL APIENTRY GameFixes(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) newfixes |= IsDlgButtonChecked(hDlg, IDC_GAMEFIX1) ? 0x1 : 0; newfixes |= IsDlgButtonChecked(hDlg, IDC_GAMEFIX2) ? 0x2 : 0; newfixes |= IsDlgButtonChecked(hDlg, IDC_GAMEFIX3) ? 0x4 : 0; - newfixes |= IsDlgButtonChecked(hDlg, IDC_GAMEFIX4) ? 0x8 : 0; EndDialog(hDlg, TRUE); diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index 36d8486a7a..eed6f90f53 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -8,8 +8,6 @@ // Generated from the TEXTINCLUDE 2 resource. // #include "afxresmw.h" - - ///////////////////////////////////////////////////////////////////////////// #undef APSTUDIO_READONLY_SYMBOLS @@ -76,19 +74,17 @@ LANGUAGE LANG_GERMAN, SUBLANG_GERMAN // Dialog // -IDD_GAMEFIXES DIALOGEX 0, 0, 278, 130 +IDD_GAMEFIXES DIALOGEX 0, 0, 278, 119 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "Game Special Fixes" FONT 8, "MS Shell Dlg", 400, 0, 0x1 BEGIN - DEFPUSHBUTTON "OK",IDOK,85,102,50,14 - PUSHBUTTON "Cancel",IDCANCEL,139,102,50,14 + DEFPUSHBUTTON "OK",IDOK,85,85,50,14 + PUSHBUTTON "Cancel",IDCANCEL,139,85,50,14 CTEXT "Some games need special settings.\nConfigure them here.",IDC_STATIC,7,7,264,17 - GROUPBOX "PCSX2 Gamefixes",IDC_STATIC,7,31,264,92 + GROUPBOX "PCSX2 Gamefixes",IDC_STATIC,7,31,264,77 CONTROL "FPU Clamp Hack - Special fix for Tekken 5.",IDC_GAMEFIX3, "Button",BS_AUTOCHECKBOX | WS_TABSTOP,12,51,249,10 - CONTROL "VU Branch Hack - Special fix for Magna Carta; Breaks Crash Bandicoot!",IDC_GAMEFIX4, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,12,79,252,10 CONTROL "VU Add / Sub Hack - Special fix for Tri-Ace games!",IDC_GAMEFIX1, "Button",BS_AUTOCHECKBOX | WS_TABSTOP,12,65,252,10 END @@ -108,7 +104,7 @@ BEGIN RIGHTMARGIN, 271 VERTGUIDE, 12 TOPMARGIN, 7 - BOTTOMMARGIN, 123 + BOTTOMMARGIN, 112 END END #endif // APSTUDIO_INVOKED diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/iVUmicro.cpp index afa587649a..29f9b93091 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/iVUmicro.cpp @@ -137,41 +137,87 @@ PCSX2_ALIGNED16(u32 g_maxvals_XYZW[16][4])= //------------------------------------------------------------------ // VU Pipeline/Test Stalls/Analyzing Functions //------------------------------------------------------------------ -void _recvuFMACflush(VURegs * VU) { +void _recvuFMACflush(VURegs * VU, bool intermediate) { int i; for (i=0; i<8; i++) { if (VU->fmac[i].enable == 0) continue; - if ((vucycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) { -// VUM_LOG("flushing FMAC pipe[%d]\n", i); - VU->fmac[i].enable = 0; + if( intermediate ) { + if ((vucycle - VU->fmac[i].sCycle) > VU->fmac[i].Cycle) { +// VUM_LOG("flushing FMAC pipe[%d]\n", i); + VU->fmac[i].enable = 0; + } + } + else { + if ((vucycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) { +// VUM_LOG("flushing FMAC pipe[%d]\n", i); + VU->fmac[i].enable = 0; + } } } } -void _recvuFDIVflush(VURegs * VU) { +void _recvuFDIVflush(VURegs * VU, bool intermediate) { if (VU->fdiv.enable == 0) return; - if ((vucycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { -// SysPrintf("flushing FDIV pipe\n"); - VU->fdiv.enable = 0; + if( intermediate ) { + if ((vucycle - VU->fdiv.sCycle) > VU->fdiv.Cycle) { +// SysPrintf("flushing FDIV pipe\n"); + VU->fdiv.enable = 0; + } + } + else { + if ((vucycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { +// SysPrintf("flushing FDIV pipe\n"); + VU->fdiv.enable = 0; + } } } -void _recvuEFUflush(VURegs * VU) { +void _recvuEFUflush(VURegs * VU, bool intermediate) { if (VU->efu.enable == 0) return; - if ((vucycle - VU->efu.sCycle) >= VU->efu.Cycle) { -// SysPrintf("flushing FDIV pipe\n"); - VU->efu.enable = 0; + if( intermediate ) { + if ((vucycle - VU->efu.sCycle) > VU->efu.Cycle) { +// SysPrintf("flushing FDIV pipe\n"); + VU->efu.enable = 0; + } + } + else { + if ((vucycle - VU->efu.sCycle) >= VU->efu.Cycle) { +// SysPrintf("flushing FDIV pipe\n"); + VU->efu.enable = 0; + } } } -void _recvuTestPipes(VURegs * VU) { - _recvuFMACflush(VU); - _recvuFDIVflush(VU); - _recvuEFUflush(VU); +void _recvuIALUflush(VURegs * VU, bool intermediate) { + int i; + + for (i=0; i<8; i++) { + if (VU->ialu[i].enable == 0) continue; + + if( intermediate ) { + if ((vucycle - VU->ialu[i].sCycle) > VU->ialu[i].Cycle) { +// VUM_LOG("flushing IALU pipe[%d]\n", i); + VU->ialu[i].enable = 0; + } + } + else { + if ((vucycle - VU->ialu[i].sCycle) >= VU->ialu[i].Cycle) { +// VUM_LOG("flushing IALU pipe[%d]\n", i); + VU->ialu[i].enable = 0; + } + } + } +} + +void _recvuTestPipes(VURegs * VU, bool intermediate) { // intermediate = true if called by upper FMAC stall detection + _recvuFMACflush(VU, intermediate); + _recvuFDIVflush(VU, intermediate); + _recvuEFUflush(VU, intermediate); + _recvuIALUflush(VU, intermediate); } void _recvuFMACTestStall(VURegs * VU, int reg, int xyzw) { @@ -213,7 +259,28 @@ void _recvuFMACTestStall(VURegs * VU, int reg, int xyzw) { VU->fmac[i].enable = 0; vucycle+= cycle; - _recvuTestPipes(VU); + _recvuTestPipes(VU, true); // for lower instructions +} + +void _recvuIALUTestStall(VURegs * VU, int reg) { + int cycle; + int i; + u32 latency; + + for (i=0; i<8; i++) { + if (VU->ialu[i].enable == 0) continue; + if (VU->ialu[i].reg == reg) break; + } + + if (i == 8) return; + + latency = VU->ialu[i].Cycle + 1; + cycle = 0; + if( vucycle - VU->ialu[i].sCycle < latency ) + cycle = latency - (vucycle - VU->ialu[i].sCycle); + + VU->ialu[i].enable = 0; + vucycle+= cycle; } void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) { @@ -249,7 +316,54 @@ void _recvuEFUAdd(VURegs * VU, int cycles) { VU->efu.Cycle = cycles; } -void _recvuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { +void _recvuIALUAdd(VURegs * VU, int reg, int cycles) { + int i; + + /* find a free ialu pipe */ + for (i=0; i<8; i++) { + if (VU->ialu[i].enable == 1) continue; + break; + } + + if (i==8) SysPrintf("*PCSX2*: error , out of ialus\n"); + + VU->ialu[i].enable = 1; + VU->ialu[i].sCycle = vucycle; + VU->ialu[i].Cycle = cycles; + VU->ialu[i].reg = reg; +} + +void _recvuTestIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { + + int VIread0 = 0, VIread1 = 0; // max 2 integer registers are read simulataneously + int i; + + for(i=0;i<16;i++) { // find used integer(vi00-vi15) registers + if( (VUregsn->VIread >> i) & 1 ) { + if( VIread0 ) VIread1 = i; + else VIread0 = i; + } + } + + if( VIread0 ) _recvuIALUTestStall(VU, VIread0); + if( VIread1 ) _recvuIALUTestStall(VU, VIread1); +} + +void _recvuAddIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { + if (VUregsn->VIwrite && VUregsn->cycles) { + int VIWrite0 = 0; + int i; + + for(i=0;i<16;i++) { // find used(vi00-vi15) registers + if( (VUregsn->VIwrite >> i) & 1 ) { + VIWrite0 = i; + } + } + if( VIWrite0 ) _recvuIALUAdd(VU, VIWrite0, VUregsn->cycles); + } +} + +void _recvuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn, bool upper) { if( VUregsn->VFread0 && (VUregsn->VFread0 == VUregsn->VFread1) ) { _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw|VUregsn->VFr1xyzw); @@ -258,13 +372,15 @@ void _recvuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { if (VUregsn->VFread0) _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw); if (VUregsn->VFread1) _recvuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw); } + + if( !upper && VUregsn->VIread ) _recvuTestIALUStalls(VU, VUregsn); // for lower instructions which read integer reg } void _recvuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { if (VUregsn->VFwrite) _recvuFMACAdd(VU, VUregsn->VFwrite, VUregsn->VFwxyzw); else if (VUregsn->VIwrite & (1 << REG_CLIP_FLAG)) _recvuFMACAdd(VU, -REG_CLIP_FLAG, 0); // REG_CLIP_FLAG pipe - else _recvuFMACAdd(VU, 0, 0); + else _recvuFMACAdd(VU, 0, 0); // cause no data dependency with fp registers } void _recvuFlushFDIV(VURegs * VU) { @@ -315,15 +431,17 @@ void _recvuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn); break; + case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, true); break; } } void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn); break; + case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, false); break; case VUPIPE_FDIV: _recvuTestFDIVStalls(VU, VUregsn); break; case VUPIPE_EFU: _recvuTestEFUStalls(VU, VUregsn); break; + case VUPIPE_IALU: _recvuTestIALUStalls(VU, VUregsn); break; + case VUPIPE_BRANCH: _recvuTestIALUStalls(VU, VUregsn); break; } } @@ -338,6 +456,7 @@ void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break; case VUPIPE_FDIV: _recvuAddFDIVStalls(VU, VUregsn); break; case VUPIPE_EFU: _recvuAddEFUStalls(VU, VUregsn); break; + case VUPIPE_IALU: _recvuAddIALUStalls(VU, VUregsn); break; // note: only ILW and ILWR cause stall in IALU pipe } } @@ -486,7 +605,7 @@ void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs) } _recvuAddUpperStalls(VU, uregs); - _recvuTestPipes(VU); + _recvuTestPipes(VU, false); vucycle++; } diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 82ca6fd3dd..2c003b0169 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -177,6 +177,7 @@ public: s8 vfwrite[2], vfread0[2], vfread1[2], vfacc[2]; s8 vfflush[2]; // extra flush regs s8 vicached; // if >= 0, then use the cached integer s_VIBranchDelay + VuInstruction *pPrevInst; int SetCachedRegs(int upper, u32 vuxyz); void Recompile(list::iterator& itinst, u32 vuxyz); @@ -267,6 +268,7 @@ struct VUPIPELINES fmacPipe fmac[8]; fdivPipe fdiv; efuPipe efu; + ialuPipe ialu[8]; list< WRITEBACK > listWritebacks; }; @@ -822,6 +824,7 @@ static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex) memzero_obj(pipes.fmac); memzero_obj(pipes.fdiv); memzero_obj(pipes.efu); + memzero_obj(pipes.ialu); SuperVUBuildBlocks(NULL, startpc, pipes); // fill parents @@ -930,6 +933,38 @@ void SuperVUAddWritebacks(VuBaseBlock* pblock, const list& listWriteb #endif } +#ifdef SUPERVU_VIBRANCHDELAY +static VuInstruction* getDelayInst(VuInstruction* pInst) +{ + // check for the N cycle branch delay + // example of 2 cycles delay (monster house) : + // sqi vi05 + // sqi vi05 + // ibeq vi05, vi03 + // The ibeq should read the vi05 before the first sqi + + int delay = 1; + VuInstruction* pDelayInst = NULL; + VuInstruction* pTargetInst = pInst->pPrevInst; + while( 1 ) { // fixme: is 3-cycle delay really maximum? + if( pTargetInst != NULL + && pTargetInst->info.cycle+delay==pInst->info.cycle + && (pTargetInst->regs[0].pipe == VUPIPE_IALU||pTargetInst->regs[0].pipe == VUPIPE_FMAC) + && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) + && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff) + && !(pTargetInst->regs[0].VIread&((1<pPrevInst; + delay++; + } + else break; + } + if( delay > 1 ) DevCon::WriteLn("supervu: %d cycle branch delay detected: %x %x", params delay-1, pc, s_pFnHeader->startpc); + return pDelayInst; +} +#endif + static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes) { // check if block already exists @@ -1036,6 +1071,7 @@ static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const V memcpy(VU->fmac, pipes.fmac, sizeof(pipes.fmac)); memcpy(&VU->fdiv, &pipes.fdiv, sizeof(pipes.fdiv)); memcpy(&VU->efu, &pipes.efu, sizeof(pipes.efu)); + memcpy(VU->ialu, pipes.ialu, sizeof(pipes.ialu)); // memset(VU->fmac, 0, sizeof(VU->fmac)); // memset(&VU->fdiv, 0, sizeof(VU->fdiv)); // memset(&VU->efu, 0, sizeof(VU->efu)); @@ -1105,7 +1141,7 @@ static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const V // second full pass pc = startpc; branch = 0; - VuInstruction* pprevinst=NULL, *ppprevinst=NULL, *pinst = NULL; + VuInstruction* pprevinst=NULL, *pinst = NULL; while(1) { @@ -1128,59 +1164,29 @@ static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const V pblock->insts.push_back(VuInstruction()); - ppprevinst = pprevinst; - pprevinst = pinst; + pprevinst = pinst; pinst = &pblock->insts.back(); + pinst->pPrevInst = pprevinst; SuperVUAnalyzeOp(VU, &pinst->info, pinst->regs); #ifdef SUPERVU_VIBRANCHDELAY if( pinst->regs[0].pipe == VUPIPE_BRANCH && pblock->insts.size() > 1 ) { - if( pprevinst != NULL && pprevinst->info.cycle+1==pinst->info.cycle && - (pprevinst->regs[0].pipe == VUPIPE_IALU||pprevinst->regs[0].pipe == VUPIPE_FMAC) && ((pprevinst->regs[0].VIwrite & pinst->regs[0].VIread) & 0xffff) - && !(pprevinst->regs[0].VIread&((1<type |= INST_CACHE_VI; - VuInstruction* pdelayinst = pprevinst; - int lowercode = *(int*)&VU->Micro[pc-16]; + // find the correct register + u32 mask = pdelayinst->regs[0].VIwrite & pinst->regs[0].VIread; + for(int i = 0; i < 16; ++i) { + if( mask & (1<vicached = i; + break; + } + } - // check for the previous instruction. If that has the same register used, then have a 2 cycle delay! - // (monsterhouse has sqi vi05, sqi vi05, ibeq vi05, vi03). The ibeq should read the vi05 before the first sqi - if( ppprevinst != NULL && ppprevinst->info.cycle+2==pinst->info.cycle && (ppprevinst->regs[0].pipe == VUPIPE_FMAC||ppprevinst->regs[0].pipe == VUPIPE_IALU) && - ((ppprevinst->regs[0].VIwrite & pinst->regs[0].VIread) & 0xffff) && - ((ppprevinst->regs[0].VIwrite & pinst->regs[0].VIread) & 0xffff) == ((ppprevinst->regs[0].VIwrite & pprevinst->regs[0].VIread) & 0xffff) && - !(ppprevinst->regs[0].VIread&((1<startpc); - - // ignore if prev instruction is ILW or ILWR (xenosaga 2) - lowercode = *(int*)&VU->Micro[pc-24]; - pdelayinst = ppprevinst; - } - - - //SysPrintf("vurec: %x\n", pc); - // ignore if prev instruction is ILW or ILWR (xenosaga 2) - if( (lowercode>>25) != 4 // ILW - && !((lowercode>>25) == 0x40 && (lowercode&0x3ff)==0x3fe) ) { // ILWR - - //SysPrintf("branchdelay: %x: %x\n", s_pFnHeader->startpc, pc-8); - - // share the same register - if (CHECK_VUBRANCHHACK) pinst->type |= INST_CACHE_VI; - else pdelayinst->type |= INST_CACHE_VI; - - // find the correct register - u32 mask = pdelayinst->regs[0].VIwrite & pinst->regs[0].VIread; - for(int i = 0; i < 16; ++i) { - if( mask & (1<vicached = i; - break; - } - } - - pinst->vicached = pdelayinst->vicached; - } - } + pinst->vicached = pdelayinst->vicached; + } } #endif @@ -1330,10 +1336,12 @@ static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const V memcpy(newpipes.fmac, VU->fmac, sizeof(newpipes.fmac)); memcpy(&newpipes.fdiv, &VU->fdiv, sizeof(newpipes.fdiv)); memcpy(&newpipes.efu, &VU->efu, sizeof(newpipes.efu)); + memcpy(newpipes.ialu, VU->ialu, sizeof(newpipes.ialu)); for(i = 0; i < 8; ++i) newpipes.fmac[i].sCycle -= vucycle; newpipes.fdiv.sCycle -= vucycle; newpipes.efu.sCycle -= vucycle; + for(i = 0; i < 8; ++i) newpipes.ialu[i].sCycle -= vucycle; if( listWritebacks.size() > 0 ) { // flush all when jumping, send down the pipe when in branching