diff --git a/bin/GameIndex.yaml b/bin/GameIndex.yaml index da07b8ddab..4257382212 100644 --- a/bin/GameIndex.yaml +++ b/bin/GameIndex.yaml @@ -10449,6 +10449,8 @@ SLES-51860: name: "Tennis Court Smash" region: "PAL-E" compat: 3 + gameFixes: + - XGKickHack # Fixes bad Geometry. SLES-51861: name: "Bowling Xciting" region: "PAL-E" @@ -20875,6 +20877,8 @@ SLPM-62634: SLPM-62635: name: "Simple 2000 Series Vol.26 - The Love Smash! 5.1" region: "NTSC-J" + gameFixes: + - XGKickHack # Fixes bad Geometry. SLPM-62636: name: "Simple 2000 Series 2-in-1 Vol.1 - The Tennis & The Snowboard [Disc1of2]" region: "NTSC-J" @@ -33881,18 +33885,7 @@ SLUS-20419: roundModes: eeRoundMode: 0 # Fixes crash when using the Subaru. gameFixes: - - XGKickHack # Fixes SPS while ingame (with following patch) - patches: - 5838E074: - content: |- - author=kozarovv - // Rearrange XGKick timing to fix SPS - patch=1,EE,0020efa0,word,8000033c - patch=1,EE,0020efd8,word,80004efc - patch=1,EE,0020f040,word,8000033c - patch=1,EE,0020f0b0,word,80004efc - patch=1,EE,0020f118,word,8000033c - patch=1,EE,0020f188,word,80004efc + - XGKickHack # Fixes SPS while ingame SLUS-20420: name: "Star Wars - Bounty Hunter" region: "NTSC-U" diff --git a/pcsx2/Gif_Unit.h b/pcsx2/Gif_Unit.h index 615375ab13..39afa06be4 100644 --- a/pcsx2/Gif_Unit.h +++ b/pcsx2/Gif_Unit.h @@ -561,7 +561,7 @@ struct Gif_Unit } // Returns GS Packet Size in bytes - u32 GetGSPacketSize(GIF_PATH pathIdx, u8* pMem, u32 offset = 0, u32 size = ~0u) + u32 GetGSPacketSize(GIF_PATH pathIdx, u8* pMem, u32 offset = 0, u32 size = ~0u, bool flush = false) { u32 memMask = pathIdx ? ~0u : 0x3fffu; u32 curSize = 0; @@ -576,7 +576,7 @@ struct Gif_Unit } if (curSize >= size) return size; - if(!EmuConfig.Cpu.Recompiler.EnableVU1 && pathIdx == GIF_PATH_1) + if(((flush && gifTag.tag.EOP) || !flush) && (CHECK_XGKICKHACK || !EmuConfig.Cpu.Recompiler.EnableVU1)) { return curSize | ((u32)gifTag.tag.EOP << 31); } diff --git a/pcsx2/VU.h b/pcsx2/VU.h index cd9f083eca..aa4f0f2ff6 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -174,13 +174,13 @@ struct __aligned16 VURegs u8* Mem; u8* Micro; - bool xgkickenable; - bool xgkickendpacket; u32 xgkickaddr; u32 xgkickdiff; u32 xgkicksizeremaining; u32 xgkicklastcycle; u32 xgkickcyclecount; + u32 xgkickenable; + u32 xgkickendpacket; u8 VIBackupCycles; u32 VIOldValue; diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 2b897d52c1..818f0e729a 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2625,7 +2625,8 @@ void _vuXGKICKTransfer(u32 cycles, bool flush) VU1.xgkickcyclecount += cycles; VU1.xgkicklastcycle += cycles; - VUM_LOG("Adding %d cycles, total XGKick cycles to run now %d", cycles, VU1.xgkickcyclecount); + + VUM_LOG("Adding %d cycles, total XGKick cycles to run now %d flush %d enabled %d", cycles, VU1.xgkickcyclecount, flush, VU1.xgkickenable); while (VU1.xgkickenable && (flush || VU1.xgkickcyclecount >= 2)) { @@ -2633,7 +2634,8 @@ void _vuXGKICKTransfer(u32 cycles, bool flush) if (VU1.xgkicksizeremaining == 0) { - u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, VU1.Mem, VU1.xgkickaddr); + VUM_LOG("XGKICK reading new tag from %x", VU1.xgkickaddr); + u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, VU1.xgkickaddr, ~0u, flush); VU1.xgkicksizeremaining = size & 0xFFFF; VU1.xgkickendpacket = size >> 31; VU1.xgkickdiff = 0x4000 - VU1.xgkickaddr; @@ -2663,10 +2665,17 @@ void _vuXGKICKTransfer(u32 cycles, bool flush) // Would be "nicer" to do the copy until it's all up, however this really screws up PATH3 masking stuff // So lets just do it the other way :) - /*if ((transfersize * 0x10) < VU1.xgkicksizeremaining) - gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU1.Mem[VU1.xgkickaddr], transfersize * 0x10, true); + /*if (THREAD_VU1) + { + if ((transfersize * 0x10) < VU1.xgkicksizeremaining) + gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU1.Mem[VU1.xgkickaddr], transfersize * 0x10, true); + else + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[VU1.xgkickaddr], transfersize * 0x10, true); + } else*/ - gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &VU1.Mem[VU1.xgkickaddr], transfersize * 0x10, true); + //{ + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[VU1.xgkickaddr], transfersize * 0x10, true); + //} if ((VU0.VI[REG_VPU_STAT].UL & 0x100) && flush) VU1.cycle += transfersize * 2; @@ -2692,11 +2701,12 @@ void _vuXGKICKTransfer(u32 cycles, bool flush) } } } - if ((VU0.VI[REG_VPU_STAT].UL & 0x100) && flush) + if (flush) { VUM_LOG("Disabling XGKICK"); _vuTestPipes(&VU1); } + VUM_LOG("XGKick run complete Enabled %d", VU1.xgkickenable); } static __ri void _vuXGKICK(VURegs* VU) diff --git a/pcsx2/gui/Panels/GameFixesPanel.cpp b/pcsx2/gui/Panels/GameFixesPanel.cpp index ce441e6107..d04988050a 100644 --- a/pcsx2/gui/Panels/GameFixesPanel.cpp +++ b/pcsx2/gui/Panels/GameFixesPanel.cpp @@ -49,8 +49,9 @@ Panels::GameFixesPanel::GameFixesPanel( wxWindow* parent ) wxEmptyString }, { - _("VU XGkick Hack - For Erementar Gerad."), - wxEmptyString + _("VU XGkick Sync - Use accurate timing for VU XGKicks (Slower)"), + pxEt(L"Fixes graphical errors on WRC, Erementar Gerad, Tennis Court Smash and others." + ) }, { _("EE timing hack - Multi purpose hack. Try if all else fails."), diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index c25b27019c..7d998ba97a 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -341,6 +341,7 @@ __fi void mVUanalyzeLQ(mV, int Ft, int Is, bool writeIs) __fi void mVUanalyzeSQ(mV, int Fs, int It, bool writeIt) { + mVUlow.isMemWrite = true; analyzeReg1(mVU, Fs, mVUlow.VF_read[0]); analyzeVIreg1(mVU, It, mVUlow.VI_read[0]); if (writeIt) @@ -477,9 +478,15 @@ __fi void mVUanalyzeCflag(mV, int It) __fi void mVUanalyzeXGkick(mV, int Fs, int xCycles) { + mVUlow.isKick = true; + mVUregs.xgkickcycles = 0; + mVUlow.kickcycles = 0; analyzeVIreg1(mVU, Fs, mVUlow.VI_read[0]); - analyzeXGkick1(); // Stall will cause mVUincCycles() to trigger pending xgkick - analyzeXGkick2(xCycles); + if (!CHECK_XGKICKHACK) + { + analyzeXGkick1(); // Stall will cause mVUincCycles() to trigger pending xgkick + analyzeXGkick2(xCycles); + } // Note: Technically XGKICK should stall on the next instruction, // this code stalls on the same instruction. The only case where this // will be a problem with, is if you have very-specifically placed diff --git a/pcsx2/x86/microVU_Branch.inl b/pcsx2/x86/microVU_Branch.inl index 50bedba9d2..7b5d7ccfac 100644 --- a/pcsx2/x86/microVU_Branch.inl +++ b/pcsx2/x86/microVU_Branch.inl @@ -49,6 +49,7 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) mVUcycles -= 100; qInst = mVU.q; pInst = mVU.p; + mVUregs.xgkickcycles = 0; if (mVUinfo.doDivFlag) { sFLAG.doFlag = true; @@ -60,6 +61,11 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit) { mVU_XGKICK_DELAY(mVU); } + if (isVU1 && CHECK_XGKICKHACK) + { + mVUlow.kickcycles = 99; + mVU_XGKICK_SYNC(mVU, true); + } if (!isVU1) xFastCall((void*)mVU0clearlpStateJIT); else @@ -163,6 +169,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) mVUcycles -= 100; qInst = mVU.q; pInst = mVU.p; + mVUregs.xgkickcycles = 0; if (mVUinfo.doDivFlag) { sFLAG.doFlag = true; @@ -173,6 +180,11 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) { mVU_XGKICK_DELAY(mVU); } + if (isVU1 && CHECK_XGKICKHACK) + { + mVUlow.kickcycles = 99; + mVU_XGKICK_SYNC(mVU, true); + } if (!isVU1) xFastCall((void*)mVU0clearlpStateJIT); else diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index d938bdba29..f64fee75f7 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -737,6 +737,22 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) mVUup.tBit = true; } mVUsetCycles(mVU); + // Update XGKick information + if (!mVUlow.isKick) + { + mVUregs.xgkickcycles += 1 + mVUstall; + if (mVUlow.isMemWrite) + { + mVUlow.kickcycles = mVUregs.xgkickcycles; + mVUregs.xgkickcycles = 0; + } + } + else + { + mVUregs.xgkickcycles = 0; + mVUlow.kickcycles = 0; + } + mVUinfo.readQ = mVU.q; mVUinfo.writeQ = !mVU.q; mVUinfo.readP = mVU.p && isVU1; @@ -753,6 +769,8 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) } branchWarning(mVU); + mVUlow.kickcycles = mVUregs.xgkickcycles; + mVUregs.xgkickcycles = 0; break; } else if (branch == 1) @@ -771,11 +789,17 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) if (mVUup.mBit && !branch && !mVUup.eBit) { mVUregs.needExactMatch |= 7; + mVUlow.kickcycles = mVUregs.xgkickcycles; + mVUregs.xgkickcycles = 0; break; } if (mVUinfo.isEOB) + { + mVUlow.kickcycles = mVUregs.xgkickcycles; + mVUregs.xgkickcycles = 0; break; + } incPC(1); } @@ -806,6 +830,12 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) { xOR(ptr32[&mVU.regs().flags], VUFLAG_MFLAGSET); } + + if (isVU1 && mVUlow.kickcycles && CHECK_XGKICKHACK) + { + mVU_XGKICK_SYNC(mVU, false); + } + mVUexecuteInstruction(mVU); if (!mVUinfo.isBdelay && !mVUlow.branch) //T/D Bit on branch is handled after the branch, branch delay slots are executed. { diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 609f4d93dd..7fbeb651ea 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -49,11 +49,12 @@ union __aligned16 microRegInfo u8 viBackUp; // VI reg number that was written to on branch-delay slot u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending) u8 r; - u8 mbitinblock; }; - u32 quick32[3]; + u32 quick32[4]; }; + u32 xgkickcycles; + u8 mbitinblock; u8 vi15v; // 'vi15' constant is valid u16 vi15; // Constant Prop Info for vi15 @@ -64,12 +65,12 @@ union __aligned16 microRegInfo }; }; - u128 full128[160 / sizeof(u128)]; - u64 full64[160 / sizeof(u64)]; - u32 full32[160 / sizeof(u32)]; + u128 full128[176 / sizeof(u128)]; + u64 full64[176 / sizeof(u64)]; + u32 full32[176 / sizeof(u32)]; }; -static_assert(sizeof(microRegInfo) == 160, "microRegInfo was not 160 bytes"); +static_assert(sizeof(microRegInfo) == 176, "microRegInfo was not 176 bytes"); struct microProgram; struct microJumpCache @@ -139,6 +140,7 @@ struct microLowerOp microVIreg VI_read[2]; // VI regs read by this instruction microConstInfo constJump; // Constant Reg Info for JR/JARL instructions u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JR, 10 = JALR) + u32 kickcycles; // Number of xgkick cycles accumulated by this instruction bool badBranch; // This instruction is a Branch who has another branch in its Delay Slot bool evilBranch; // This instruction is a Branch in a Branch Delay Slot (Instruction after badBranch) bool isNOP; // This instruction is a NOP @@ -148,6 +150,8 @@ struct microLowerOp bool memReadIs; // Read Is (VI reg) from memory (used by branches) bool memReadIt; // Read If (VI reg) from memory (used by branches) bool readFlags; // Current Instruction reads Status, Mac, or Clip flags + bool isMemWrite; // Current Instruction writes to VU memory + bool isKick; // Op is a kick so don't count kick cycles }; struct microFlagInst @@ -314,6 +318,14 @@ public: } } + bool checkCachedReg(int regId) + { + if (regId < xmmTotal) + return xmmMap[regId].VFreg >= 0; + else + return false; + } + void clearReg(const xmm& reg) { clearReg(reg.Id); } void clearReg(int regId) { diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index cc8ba10f16..50fff17711 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1148,6 +1148,7 @@ mVUop(mVU_ISW) { pass1 { + mVUlow.isMemWrite = true; analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); } @@ -1173,6 +1174,7 @@ mVUop(mVU_ISWR) { pass1 { + mVUlow.isMemWrite = true; analyzeVIreg1(mVU, _Is_, mVUlow.VI_read[0]); analyzeVIreg1(mVU, _It_, mVUlow.VI_read[1]); } @@ -1544,7 +1546,7 @@ void __fastcall mVU_XGKICK_(u32 addr) { addr = (addr & 0x3ff) * 16; u32 diff = 0x4000 - addr; - u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, addr); + u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, addr, ~0u, true); if (size > diff) { @@ -1558,6 +1560,92 @@ void __fastcall mVU_XGKICK_(u32 addr) } } +void __fastcall _vuXGKICKTransfermVU(bool flush) +{ + while (VU1.xgkickenable && (flush || VU1.xgkickcyclecount >= 2)) + { + u32 transfersize = 0; + + if (VU1.xgkicksizeremaining == 0) + { + //VUM_LOG("XGKICK reading new tag from %x", VU1.xgkickaddr); + u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, VU1.xgkickaddr, ~0u, flush); + VU1.xgkicksizeremaining = size & 0xFFFF; + VU1.xgkickendpacket = size >> 31; + VU1.xgkickdiff = 0x4000 - VU1.xgkickaddr; + + if (VU1.xgkicksizeremaining == 0) + { + //VUM_LOG("Invalid GS packet size returned, cancelling XGKick"); + VU1.xgkickenable = false; + break; + } + //else + //VUM_LOG("XGKICK New tag size %d bytes EOP %d", VU1.xgkicksizeremaining, VU1.xgkickendpacket); + } + + if (!flush) + { + transfersize = std::min(VU1.xgkicksizeremaining, VU1.xgkickcyclecount * 8); + transfersize = std::min(transfersize, VU1.xgkickdiff); + } + else + { + transfersize = VU1.xgkicksizeremaining; + transfersize = std::min(transfersize, VU1.xgkickdiff); + } + + //VUM_LOG("XGKICK Transferring %x bytes from %x size %x", transfersize * 0x10, VU1.xgkickaddr, VU1.xgkicksizeremaining); + + // Would be "nicer" to do the copy until it's all up, however this really screws up PATH3 masking stuff + // So lets just do it the other way :) + if (THREAD_VU1) + { + if (transfersize < VU1.xgkicksizeremaining) + gifUnit.gifPath[GIF_PATH_1].CopyGSPacketData(&VU1.Mem[VU1.xgkickaddr], transfersize, true); + else + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[VU1.xgkickaddr], transfersize, true); + } + else + { + gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[VU1.xgkickaddr], transfersize, true); + } + + if (flush) + VU1.cycle += transfersize / 8; + + VU1.xgkickcyclecount -= transfersize / 8; + + VU1.xgkickaddr = (VU1.xgkickaddr + transfersize) & 0x3FFF; + VU1.xgkicksizeremaining -= transfersize; + VU1.xgkickdiff = 0x4000 - VU1.xgkickaddr; + + if (VU1.xgkickendpacket && !VU1.xgkicksizeremaining) + // VUM_LOG("XGKICK next addr %x left size %x", VU1.xgkickaddr, VU1.xgkicksizeremaining); + //else + { + //VUM_LOG("XGKICK transfer finished"); + VU1.xgkickenable = false; + // Check if VIF is waiting for the GIF to not be busy + } + } + //VUM_LOG("XGKick run complete Enabled %d", VU1.xgkickenable); +} + +static __fi void mVU_XGKICK_SYNC(mV, bool flush) +{ + xTEST(ptr32[&VU1.xgkickenable], 0x1); + xForwardJZ32 skipxgkick; + xADD(ptr32[&VU1.xgkickcyclecount], mVUlow.kickcycles); + xCMP(ptr32[&VU1.xgkickcyclecount], 2); + xForwardJL32 needcycles; + mVUbackupRegs(mVU, true, true); + xFastCall(_vuXGKICKTransfermVU, flush); + mVUrestoreRegs(mVU, true, true); + needcycles.SetTarget(); + skipxgkick.SetTarget(); +} + static __fi void mVU_XGKICK_DELAY(mV) { mVUbackupRegs(mVU); @@ -1579,18 +1667,42 @@ mVUop(mVU_XGKICK) mVUlow.isNOP = true; return; } - mVUanalyzeXGkick(mVU, _Is_, mVU_XGKICK_CYCLES); + mVUanalyzeXGkick(mVU, _Is_, 1); } - pass2 + pass2 { + if (CHECK_XGKICKHACK) + { + mVUlow.kickcycles = 99; + mVU_XGKICK_SYNC(mVU, true); + mVUlow.kickcycles = 0; + } if (mVUinfo.doXGKICK) // check for XGkick Transfer { mVU_XGKICK_DELAY(mVU); mVUinfo.doXGKICK = false; } - - mVUallocVIa(mVU, gprT1, _Is_); - xMOV(ptr32[&mVU.VIxgkick], gprT1); + + if (!CHECK_XGKICKHACK) + { + mVUallocVIa(mVU, gprT1, _Is_); + xMOV(ptr32[&mVU.VIxgkick], gprT1); + } + else + { + xMOV(ptr32[&VU1.xgkickenable], 1); + xMOV(ptr32[&VU1.xgkickendpacket], 0); + xMOV(ptr32[&VU1.xgkicksizeremaining], 0); + xMOV(ptr32[&VU1.xgkickcyclecount], 0); + xMOV(gprT2, ptr32[&mVU.totalCycles]); + xSUB(gprT2, ptr32[&mVU.cycles]); + xADD(gprT2, ptr32[&VU1.cycle]); + xMOV(ptr32[&VU1.xgkicklastcycle], gprT2); + mVUallocVIa(mVU, gprT1, _Is_); + xAND(gprT1, 0x3FF); + xSHL(gprT1, 4); + xMOV(ptr32[&VU1.xgkickaddr], gprT1); + } mVU.profiler.EmitOp(opXGKICK); } pass3 { mVUlog("XGKICK vi%02d", _Fs_); } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 753372d580..85f2e0be9e 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -354,17 +354,6 @@ static const bool doDBitHandling = false; // This hack only updates the Status Flag on blocks that will read it. // Most blocks do not read status flags, so this is a big speedup. -//------------------------------------------------------------------ -// Unknown Data -//------------------------------------------------------------------ - -// XG Kick Transfer Delay Amount -#define mVU_XGKICK_CYCLES ((CHECK_XGKICKHACK) ? 6 : 1) -// Its unknown at recompile time how long the xgkick transfer will take -// so give it a value that makes games happy :) (SO3 is fine at 1 cycle delay) - -//------------------------------------------------------------------ - extern void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW = false); extern void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW); extern void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw); diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 66cb9bef29..120e8147d3 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -145,13 +145,14 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) //------------------------------------------------------------------ // Backup Volatile Regs (EAX, ECX, EDX, MM0~7, XMM0~7, are all volatile according to 32bit Win/Linux ABI) -__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) +__fi void mVUbackupRegs(microVU& mVU, bool toMemory = false, bool onlyNeeded = false) { if (toMemory) { for (int i = 0; i < 8; i++) { - xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i)); + if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i) + xMOVAPS(ptr128[&mVU.xmmBackup[i][0]], xmm(i)); } } else @@ -162,13 +163,14 @@ __fi void mVUbackupRegs(microVU& mVU, bool toMemory = false) } // Restore Volatile Regs -__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false) +__fi void mVUrestoreRegs(microVU& mVU, bool fromMemory = false, bool onlyNeeded = false) { if (fromMemory) { for (int i = 0; i < 8; i++) { - xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]); + if (!onlyNeeded || mVU.regAlloc->checkCachedReg(i) || xmmPQ.Id == i) + xMOVAPS(xmm(i), ptr128[&mVU.xmmBackup[i][0]]); } } else