From 009d6ba5e6ff7cdf4c366ba81a5fddc6a7aae512 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 1 Sep 2010 04:57:10 +0000 Subject: [PATCH] microVU: - Code refactoring (mostly changing macros to functions/constants...) - Made it so the disable-regAlloc option flushes every 32bit instruction, instead of every 64bit instruction (upper+lower instruction pair) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3713 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 - pcsx2/x86/microVU.cpp | 3 +- pcsx2/x86/microVU.h | 8 +- pcsx2/x86/microVU_Analyze.inl | 2 +- pcsx2/x86/microVU_Compile.inl | 142 ++++++++++------- pcsx2/x86/microVU_Flags.inl | 2 +- pcsx2/x86/microVU_IR.h | 177 +++++++++++++++++++-- pcsx2/x86/microVU_IR.inl | 165 ------------------- pcsx2/x86/microVU_Misc.h | 10 +- 9 files changed, 258 insertions(+), 255 deletions(-) delete mode 100644 pcsx2/x86/microVU_IR.inl diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 505300254f..76f9660f7e 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -647,10 +647,6 @@ RelativePath="..\..\x86\microVU_IR.h" > - - diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index a047f1e6fe..97b4fc3e68 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -24,7 +24,6 @@ #include "microVU_Misc.inl" #include "microVU_Log.inl" #include "microVU_Analyze.inl" -#include "microVU_IR.inl" #include "microVU_Alloc.inl" #include "microVU_Upper.inl" #include "microVU_Lower.inl" @@ -103,7 +102,7 @@ void microVU::init(uint vuIndex) { dispCache = NULL; cache = NULL; cacheSize = mVUcacheSize; - regAlloc = new microRegAlloc(this); + regAlloc = new microRegAlloc(index); for (u32 i = 0; i < (progSize / 2); i++) { prog.prog[i] = new deque(); diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 289afa2d44..ef09c08990 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -90,7 +90,7 @@ public: for (int i = 0; i <= listI; i++) { if ((linkI->block->pState.q == pState->q) && (linkI->block->pState.p == pState->p) - && ((linkI->block->pState.vi15 == pState->vi15) || !CHECK_VU_CONSTPROP) + && ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp) && (linkI->block->pState.flags == pState->flags) && (linkI->block->pState.xgkick == pState->xgkick) && (linkI->block->pState.viBackUp == pState->viBackUp) @@ -229,12 +229,6 @@ struct microVU { return (((prog.IRinfo.curPC + 4) + (Imm11() * 2)) & progMemMask) * 4; } - __ri void loadIreg(const xmm& reg, int xyzw) - { - xMOVSSZX(reg, ptr32[&getVI(REG_I)]); - if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0); - } - void init(uint vuIndex); void reset(); void close(); diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 0bc65f3931..1e4f8c6301 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -470,7 +470,7 @@ __fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) { __ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) { mVUbranchCheck(mVU); mVUlow.branch = (isJALR) ? 10 : 9; - if (mVUconstReg[Is].isValid && CHECK_VU_CONSTPROP) { + if (mVUconstReg[Is].isValid && doConstProp) { mVUlow.constJump.isValid = 1; mVUlow.constJump.regValue = mVUconstReg[Is].regValue; //DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 8516e3628c..caa9063a88 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -15,31 +15,19 @@ #pragma once -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ - -#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } -#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; } -#define tCycles(dest, src) { dest = aMax(dest, src); } -#define incP() { mVU->p = (mVU->p+1) & 1; } -#define incQ() { mVU->q = (mVU->q+1) & 1; } -#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); } -#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); } - //------------------------------------------------------------------ // Messages Called at Execution Time... //------------------------------------------------------------------ -static void __fastcall mVUbadOp0(mV) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); } -static void __fastcall mVUbadOp1(mV) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); } -static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); } -static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); } -static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); } -static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); } +static void __fastcall mVUbadOp0(mV, u32 PC) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); } +static void __fastcall mVUbadOp1(mV, u32 PC) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); } +static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); } +static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); } +static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); } +static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); } //------------------------------------------------------------------ -// Helper Functions +// Program Range Checking and Setting up Ranges //------------------------------------------------------------------ // Used by mVUsetupRange @@ -106,13 +94,13 @@ static void mVUsetupRange(microVU* mVU, s32 pc, bool isStartPC) { } } -static __fi void startLoop(mV) { - if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); } - if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); } - if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); } - memzero(mVUinfo); - memzero(mVUregsTemp); -} +//------------------------------------------------------------------ +// Execute VU Opcode/Instruction (Upper and Lower) +//------------------------------------------------------------------ + +__ri void doUpperOp(mV) { mVUopU(mVU, 1); mVUdivSet(mVU); } +__ri void doLowerOp(mV) { incPC(-1); mVUopL(mVU, 1); incPC(1); } +__ri void flushRegs(mV) { if (!doRegAlloc) mVU->regAlloc->flushAll(); } static void doIbit(mV) { if (mVUup.iBit) { @@ -126,7 +114,7 @@ static void doIbit(mV) { } else tempI = curI; - xMOV(ptr32[&mVU->regs().VI[REG_I].UL], tempI); + xMOV(ptr32[&mVU->getVI(REG_I)], tempI); incPC(1); } } @@ -150,16 +138,27 @@ static void doSwapOp(mV) { mVU->regAlloc->clearNeeded(t3); incPC(1); - doUpperOp(); + doUpperOp(mVU); const xmm& t4 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf); xMOVAPS(t4, t2); mVU->regAlloc->clearNeeded(t4); mVU->regAlloc->clearNeeded(t2); } - else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } + else { mVUopL(mVU, 1); incPC(1); flushRegs(mVU); doUpperOp(mVU); } } +static void mVUexecuteInstruction(mV) { + if (mVUlow.isNOP) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doIbit(mVU); } + elif(!mVUinfo.swapOps) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doLowerOp(mVU); } + else doSwapOp(mVU); + flushRegs(mVU); +} + +//------------------------------------------------------------------ +// Warnings / Errors / Illegal Instructions +//------------------------------------------------------------------ + // If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2) static __fi void mVUcheckBadOp(mV) { if (mVUinfo.isBadOp && mVUcount == 0) { @@ -172,6 +171,7 @@ static __fi void mVUcheckBadOp(mV) { static __fi void handleBadOp(mV, int count) { if (mVUinfo.isBadOp && count == 0) { xMOV(gprT2, (uptr)mVU); + xMOV(gprT3, xPC); if (!isVU1) xCALL(mVUbadOp0); else xCALL(mVUbadOp1); } @@ -211,8 +211,21 @@ static __ri void eBitWarning(mV) { incPC(-2); } +//------------------------------------------------------------------ +// Cycles / Pipeline State / Early Exit from Execution +//------------------------------------------------------------------ + +__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; } +__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); } +__fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); } +__fi void incP(mV) { mVU->p ^= 1; } +__fi void incQ(mV) { mVU->q ^= 1; } + // Optimizes the End Pipeline State Removing Unnecessary Info -static __fi void mVUoptimizePipeState(mV) { +// If the cycles remaining is just '1', we don't have to transfer it to the next block +// because mVU automatically decrements this number at the start of its loop, +// so essentially '1' will be the same as '0'... +static void mVUoptimizePipeState(mV) { for (int i = 0; i < 32; i++) { optimizeReg(mVUregs.VF[i].x); optimizeReg(mVUregs.VF[i].y); @@ -222,12 +235,12 @@ static __fi void mVUoptimizePipeState(mV) { for (int i = 0; i < 16; i++) { optimizeReg(mVUregs.VI[i]); } - if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(); } } - if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(); } } + if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } } + if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(mVU); } } mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info } -__fi void mVUincCycles(mV, int x) { +static void mVUincCycles(mV, int x) { mVUcycles += x; for (int z = 31; z > 0; z--) { calcCycles(mVUregs.VF[z].x, x); @@ -241,11 +254,11 @@ __fi void mVUincCycles(mV, int x) { if (mVUregs.q) { if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } } else { calcCycles(mVUregs.q, x); } - if (!mVUregs.q) { incQ(); } + if (!mVUregs.q) { incQ(mVU); } } if (mVUregs.p) { calcCycles(mVUregs.p, x); - if (!mVUregs.p || mVUregsTemp.p) { incP(); } + if (!mVUregs.p || mVUregsTemp.p) { incP(mVU); } } if (mVUregs.xgkick) { calcCycles(mVUregs.xgkick, x); @@ -254,14 +267,13 @@ __fi void mVUincCycles(mV, int x) { calcCycles(mVUregs.r, x); } -#define cmpVFregs(VFreg1, VFreg2, xVar) { \ - if (VFreg1.reg == VFreg2.reg) { \ - if ((VFreg1.x && VFreg2.x) \ - || (VFreg1.y && VFreg2.y) \ - || (VFreg1.z && VFreg2.z) \ - || (VFreg1.w && VFreg2.w)) \ - { xVar = 1; } \ - } \ +// Helps check if upper/lower ops read/write to same regs... +void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar) { + if (VFreg1.reg == VFreg2.reg) { + if ((VFreg1.x && VFreg2.x) || (VFreg1.y && VFreg2.y) + || (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w)) + { xVar = 1; } + } } void mVUsetCycles(mV) { @@ -299,6 +311,15 @@ void mVUsetCycles(mV) { tCycles(mVUregs.xgkick, mVUregsTemp.xgkick); } +// Prints Start/End PC of blocks executed, for debugging... +static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) { + if (mVUdebugNow) { + xMOV(gprT2, xPC); + if (isEndPC) xCALL(mVUprintPC2); + else xCALL(mVUprintPC1); + } +} + // vu0 is allowed to exit early, so are dev builds (for inf loops) __fi bool doEarlyExit(microVU* mVU) { return IsDevBuild || !isVU1; @@ -312,15 +333,6 @@ static __fi void mVUsavePipelineState(microVU* mVU) { } } -// Prints Start/End PC of blocks executed, for debugging... -static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) { - if (mVUdebugNow) { - xMOV(gprT2, xPC); - if (isEndPC) xCALL(mVUprintPC2); - else xCALL(mVUprintPC1); - } -} - // Test cycles to see if we need to exit-early... static void mVUtestCycles(microVU* mVU) { iPC = mVUstartPC; @@ -332,8 +344,8 @@ static void mVUtestCycles(microVU* mVU) { // xFowardJZ32 vu0jmp; // xMOV(gprT2, (uptr)mVU); // xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation - mVUsavePipelineState(mVU); - mVUendProgram(mVU, NULL, 0); + mVUsavePipelineState(mVU); + mVUendProgram(mVU, NULL, 0); // vu0jmp.SetTarget(); } else { @@ -347,6 +359,19 @@ static void mVUtestCycles(microVU* mVU) { xSUB(ptr32[&mVU->cycles], mVUcycles); } +//------------------------------------------------------------------ +// Initializing +//------------------------------------------------------------------ + +// This gets run at the start of every loop of mVU's first pass +static __fi void startLoop(mV) { + if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); } + if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); } + if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); } + memzero(mVUinfo); + memzero(mVUregsTemp); +} + // Initialize VI Constants (vi15 propagates through blocks) static __fi void mVUinitConstValues(microVU* mVU) { for (int i = 0; i < 16; i++) { @@ -393,7 +418,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) { // First Pass iPC = startPC / 4; - mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range + mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range mVU->regAlloc->reset(); // Reset regAlloc mVUinitFirstPass(mVU, pState, thisPtr); for (int branch = 0; mVUcount < endCount; mVUcount++) { @@ -419,7 +444,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) { } // Fix up vi15 const info for propagation through blocks - mVUregs.vi15 = (mVUconstReg[15].isValid && CHECK_VU_CONSTPROP) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0; + mVUregs.vi15 = (mVUconstReg[15].isValid && doConstProp) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0; mVUsetFlags(mVU, mFC); // Sets Up Flag instances mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking @@ -434,11 +459,8 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) { for (; x < endCount; x++) { if (mVUinfo.isEOB) { handleBadOp(mVU, x); x = 0xffff; } if (mVUup.mBit) { xOR(ptr32[&mVU->regs().flags], VUFLAG_MFLAGSET); } - if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); } - else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); } - else { doSwapOp(mVU); } + mVUexecuteInstruction(mVU); if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } - if (!doRegAlloc) { mVU->regAlloc->flushAll(); } if (isEvilBlock) { mVUsetupRange(mVU, xPC, 0); normJumpCompile(mVU, mFC, 1); return thisPtr; } else if (!mVUinfo.isBdelay) { incPC(1); } else { diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index 5519f5af1f..9672d79135 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -286,7 +286,7 @@ void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) { __fi void mVUsetFlagInfo(mV) { branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr); incPC(1); } branchType2 { // This case can possibly be turned off via a hack for a small speedup... - if (!mVUlow.constJump.isValid || !CHECK_VU_CONSTPROP) { mVUregs.needExactMatch |= 0x7; } + if (!mVUlow.constJump.isValid || !doConstProp) { mVUregs.needExactMatch |= 0x7; } else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8)); } } branchType3 { diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index c56f53bc4f..e07d5253c2 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -170,39 +170,194 @@ struct microMapXMM { bool isNeeded; // Is needed for current instruction }; -#define xmmTotal 7 // Don't allocate PQ? class microRegAlloc { protected: + static const u32 xmmTotal = 7; // Don't allocate PQ? microMapXMM xmmMap[xmmTotal]; - int counter; - microVU* mVU; + int counter; // Current allocation count + int index; // VU0 or VU1 + + // Helper functions to get VU regs + VURegs& regs() const { return ::vuRegs[index]; } + __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; } + __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; } + + __ri void loadIreg(const xmm& reg, int xyzw) { + xMOVSSZX(reg, ptr32[&getVI(REG_I)]); + if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0); + } - int findFreeRegRec(int startIdx); - int findFreeReg(); + int findFreeRegRec(int startIdx) { + for (int i = startIdx; i < xmmTotal; i++) { + if (!xmmMap[i].isNeeded) { + int x = findFreeRegRec(i+1); + if (x == -1) return i; + return ((xmmMap[i].count < xmmMap[x].count) ? i : x); + } + } + return -1; + } + + int findFreeReg() { + for (int i = 0; i < xmmTotal; i++) { + if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) { + return i; // Reg is not needed and was a temp reg + } + } + int x = findFreeRegRec(0); + pxAssumeDev( x >= 0, "microVU register allocation failure!" ); + return x; + } public: - microRegAlloc(microVU* _mVU); - + microRegAlloc(int _index) { + index = _index; + } + void reset() { for (int i = 0; i < xmmTotal; i++) { clearReg(i); } counter = 0; } + void flushAll(bool clearState = 1) { for (int i = 0; i < xmmTotal; i++) { writeBackReg(xmm(i)); if (clearState) clearReg(i); } } - void clearReg(int regId); + void clearReg(const xmm& reg) { clearReg(reg.Id); } + void clearReg(int regId) { + microMapXMM& clear( xmmMap[regId] ); + clear.VFreg = -1; + clear.count = 0; + clear.xyzw = 0; + clear.isNeeded = 0; + } + void clearRegVF(int VFreg) { for (int i = 0; i < xmmTotal; i++) { if (xmmMap[i].VFreg == VFreg) clearReg(i); } } - void writeBackReg(const xmm& reg, bool invalidateRegs = 1); - void clearNeeded(const xmm& reg); - const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1); + + void writeBackReg(const xmm& reg, bool invalidateRegs = 1) { + microMapXMM& write( xmmMap[reg.Id] ); + + if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0 + if (write.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg); + else if (write.VFreg == 32) mVUsaveReg(reg, ptr[®s().ACC], write.xyzw, 1); + else mVUsaveReg(reg, ptr[&getVF(write.VFreg)], write.xyzw, 1); + if (invalidateRegs) { + for (int i = 0; i < xmmTotal; i++) { + microMapXMM& imap (xmmMap[i]); + if ((i == reg.Id) || imap.isNeeded) continue; + if (imap.VFreg == write.VFreg) { + if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg); + clearReg(i); // Invalidate any Cached Regs of same vf Reg + } + } + } + if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified + write.count = counter; + write.xyzw = 0; + write.isNeeded = 0; + return; + } + } + clearReg(reg); // Clear Reg + } + + void clearNeeded(const xmm& reg) { + + if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return; + + microMapXMM& clear (xmmMap[reg.Id]); + clear.isNeeded = 0; + if (clear.xyzw) { // Reg was modified + if (clear.VFreg > 0) { + int mergeRegs = 0; + if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes + for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg + if (i == reg.Id) continue; + microMapXMM& imap (xmmMap[i]); + if (imap.VFreg == clear.VFreg) { + if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg); + if (mergeRegs == 1) { + mVUmergeRegs(xmm(i), reg, clear.xyzw, 1); + imap.xyzw = 0xf; + imap.count = counter; + mergeRegs = 2; + } + else clearReg(i); + } + } + if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged + else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge + } + else clearReg(reg); // If Reg was temp or vf0, then invalidate itself + } + } + + const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) { + counter++; + if (vfLoadReg >= 0) { // Search For Cached Regs + for (int i = 0; i < xmmTotal; i++) { + const xmm& xmmi(xmm::GetInstance(i)); + microMapXMM& imap (xmmMap[i]); + if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified + || (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0 + int z = i; + if (vfWriteReg >= 0) { // Reg will be modified + if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg + z = findFreeReg(); + const xmm& xmmz(xmm::GetInstance(z)); + writeBackReg(xmmz); + if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi); + else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1); + else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2); + else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3); + else if (z != i) xMOVAPS (xmmz, xmmi); + imap.count = counter; // Reg i was used, so update counter + } + else { // Don't clone reg, but shuffle to adjust for SS ops + if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); } + if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1); + else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2); + else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3); + } + xmmMap[z].VFreg = vfWriteReg; + xmmMap[z].xyzw = xyzw; + } + xmmMap[z].count = counter; + xmmMap[z].isNeeded = 1; + return xmm::GetInstance(z); + } + } + } + int x = findFreeReg(); + const xmm& xmmx = xmm::GetInstance(x); + writeBackReg(xmmx); + + if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading) + if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); } + else if (vfLoadReg == 33) loadIreg (xmmx, xyzw); + else if (vfLoadReg == 32) mVUloadReg(xmmx, ptr[®s().ACC], xyzw); + else if (vfLoadReg >= 0) mVUloadReg(xmmx, ptr[&getVF(vfLoadReg)], xyzw); + xmmMap[x].VFreg = vfWriteReg; + xmmMap[x].xyzw = xyzw; + } + else { // Reg Will Not Be Modified (always load full reg for caching) + if (vfLoadReg == 33) loadIreg(xmmx, 0xf); + else if (vfLoadReg == 32) xMOVAPS (xmmx, ptr128[®s().ACC]); + else if (vfLoadReg >= 0) xMOVAPS (xmmx, ptr128[&getVF(vfLoadReg)]); + xmmMap[x].VFreg = vfLoadReg; + xmmMap[x].xyzw = 0; + } + xmmMap[x].count = counter; + xmmMap[x].isNeeded = 1; + return xmmx; + } }; diff --git a/pcsx2/x86/microVU_IR.inl b/pcsx2/x86/microVU_IR.inl deleted file mode 100644 index f5613df531..0000000000 --- a/pcsx2/x86/microVU_IR.inl +++ /dev/null @@ -1,165 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#pragma once - - -int microRegAlloc::findFreeRegRec(int startIdx) { - for (int i = startIdx; i < xmmTotal; i++) { - if (!xmmMap[i].isNeeded) { - int x = findFreeRegRec(i+1); - if (x == -1) return i; - return ((xmmMap[i].count < xmmMap[x].count) ? i : x); - } - } - return -1; -} -int microRegAlloc::findFreeReg() { - for (int i = 0; i < xmmTotal; i++) { - if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) { - return i; // Reg is not needed and was a temp reg - } - } - int x = findFreeRegRec(0); - pxAssumeDev( x >= 0, "microVU register allocation failure!" ); - return x; -} - -microRegAlloc::microRegAlloc(microVU* _mVU) { - mVU = _mVU; -} - -void microRegAlloc::clearReg(int regId) { - microMapXMM& clear( xmmMap[regId] ); - clear.VFreg = -1; - clear.count = 0; - clear.xyzw = 0; - clear.isNeeded = 0; -} -void microRegAlloc::writeBackReg(const xmm& reg, bool invalidateRegs) { - microMapXMM& write( xmmMap[reg.Id] ); - - if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0 - if (write.VFreg == 33) xMOVSS(ptr32[&mVU->getVI(REG_I)], reg); - else if (write.VFreg == 32) mVUsaveReg(reg, ptr[&mVU->regs().ACC], write.xyzw, 1); - else mVUsaveReg(reg, ptr[&mVU->getVF(write.VFreg)], write.xyzw, 1); - if (invalidateRegs) { - for (int i = 0; i < xmmTotal; i++) { - microMapXMM& imap (xmmMap[i]); - if ((i == reg.Id) || imap.isNeeded) continue; - if (imap.VFreg == write.VFreg) { - if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg); - clearReg(i); // Invalidate any Cached Regs of same vf Reg - } - } - } - if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified - write.count = counter; - write.xyzw = 0; - write.isNeeded = 0; - return; - } - } - clearReg(reg); // Clear Reg -} -void microRegAlloc::clearNeeded(const xmm& reg) -{ - if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return; - - microMapXMM& clear (xmmMap[reg.Id]); - clear.isNeeded = 0; - if (clear.xyzw) { // Reg was modified - if (clear.VFreg > 0) { - int mergeRegs = 0; - if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes - for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg - if (i == reg.Id) continue; - microMapXMM& imap (xmmMap[i]); - if (imap.VFreg == clear.VFreg) { - if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg); - if (mergeRegs == 1) { - mVUmergeRegs(xmm(i), reg, clear.xyzw, 1); - imap.xyzw = 0xf; - imap.count = counter; - mergeRegs = 2; - } - else clearReg(i); - } - } - if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged - else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge - } - else clearReg(reg); // If Reg was temp or vf0, then invalidate itself - } -} -const xmm& microRegAlloc::allocReg(int vfLoadReg, int vfWriteReg, int xyzw, bool cloneWrite) { - counter++; - if (vfLoadReg >= 0) { // Search For Cached Regs - for (int i = 0; i < xmmTotal; i++) { - const xmm& xmmi(xmm::GetInstance(i)); - microMapXMM& imap (xmmMap[i]); - if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified - || (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0 - int z = i; - if (vfWriteReg >= 0) { // Reg will be modified - if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg - z = findFreeReg(); - const xmm& xmmz(xmm::GetInstance(z)); - writeBackReg(xmmz); - if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi); - else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1); - else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2); - else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3); - else if (z != i) xMOVAPS (xmmz, xmmi); - imap.count = counter; // Reg i was used, so update counter - } - else { // Don't clone reg, but shuffle to adjust for SS ops - if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); } - if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1); - else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2); - else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3); - } - xmmMap[z].VFreg = vfWriteReg; - xmmMap[z].xyzw = xyzw; - } - xmmMap[z].count = counter; - xmmMap[z].isNeeded = 1; - return xmm::GetInstance(z); - } - } - } - int x = findFreeReg(); - const xmm& xmmx = xmm::GetInstance(x); - writeBackReg(xmmx); - - if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading) - if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); } - else if (vfLoadReg == 33) mVU->loadIreg(xmmx, xyzw); - else if (vfLoadReg == 32) mVUloadReg (xmmx, ptr[&mVU->regs().ACC], xyzw); - else if (vfLoadReg >= 0) mVUloadReg (xmmx, ptr[&mVU->getVF(vfLoadReg)], xyzw); - xmmMap[x].VFreg = vfWriteReg; - xmmMap[x].xyzw = xyzw; - } - else { // Reg Will Not Be Modified (always load full reg for caching) - if (vfLoadReg == 33) mVU->loadIreg(xmmx, 0xf); - else if (vfLoadReg == 32) xMOVAPS(xmmx, ptr128[&mVU->regs().ACC]); - else if (vfLoadReg >= 0) xMOVAPS(xmmx, ptr128[&mVU->getVF(vfLoadReg)]); - xmmMap[x].VFreg = vfLoadReg; - xmmMap[x].xyzw = 0; - } - xmmMap[x].count = counter; - xmmMap[x].isNeeded = 1; - return xmmx; -} diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 904af60859..0aa263cdde 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -248,12 +248,14 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); //------------------------------------------------------------------ // Reg Alloc -#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction +static const bool doRegAlloc = 1; // Set to 0 to flush every 32bit Instruction // This turns off reg alloc for the most part, but reg alloc will still -// be done between Upper/Lower and within instructions... +// be done within instructions... Also on doSwapOp() regAlloc is needed between +// Lower and Upper instructions, so in this case it flushes after the full +// 64bit instruction (lower and upper) // No Flag Optimizations -#define noFlagOpts 0 // Set to 1 to disable all flag setting optimizations +static const bool noFlagOpts = 0; // Set to 1 to disable all flag setting optimizations // Note: The flag optimizations this disables should all be harmless, so // this option is mainly just for debugging... it effectively forces mVU // to always update Mac and Status Flags (both sticky and non-sticky) whenever @@ -261,7 +263,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); // flag instances between blocks... // Constant Propagation -#define CHECK_VU_CONSTPROP 0 // Set to 1 to turn on vi15 const propagation +static const bool doConstProp = 0; // Set to 1 to turn on vi15 const propagation // Enables Constant Propagation for Jumps based on vi15 'link-register' // allowing us to know many indirect jump target addresses. // Makes GoW a lot slower due to extra recompilation time and extra code-gen!