diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
index 505300254f..76f9660f7e 100644
--- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
+++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
@@ -647,10 +647,6 @@
RelativePath="..\..\x86\microVU_IR.h"
>
-
-
diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp
index a047f1e6fe..97b4fc3e68 100644
--- a/pcsx2/x86/microVU.cpp
+++ b/pcsx2/x86/microVU.cpp
@@ -24,7 +24,6 @@
#include "microVU_Misc.inl"
#include "microVU_Log.inl"
#include "microVU_Analyze.inl"
-#include "microVU_IR.inl"
#include "microVU_Alloc.inl"
#include "microVU_Upper.inl"
#include "microVU_Lower.inl"
@@ -103,7 +102,7 @@ void microVU::init(uint vuIndex) {
dispCache = NULL;
cache = NULL;
cacheSize = mVUcacheSize;
- regAlloc = new microRegAlloc(this);
+ regAlloc = new microRegAlloc(index);
for (u32 i = 0; i < (progSize / 2); i++) {
prog.prog[i] = new deque();
diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h
index 289afa2d44..ef09c08990 100644
--- a/pcsx2/x86/microVU.h
+++ b/pcsx2/x86/microVU.h
@@ -90,7 +90,7 @@ public:
for (int i = 0; i <= listI; i++) {
if ((linkI->block->pState.q == pState->q)
&& (linkI->block->pState.p == pState->p)
- && ((linkI->block->pState.vi15 == pState->vi15) || !CHECK_VU_CONSTPROP)
+ && ((linkI->block->pState.vi15 == pState->vi15) || !doConstProp)
&& (linkI->block->pState.flags == pState->flags)
&& (linkI->block->pState.xgkick == pState->xgkick)
&& (linkI->block->pState.viBackUp == pState->viBackUp)
@@ -229,12 +229,6 @@ struct microVU {
return (((prog.IRinfo.curPC + 4) + (Imm11() * 2)) & progMemMask) * 4;
}
- __ri void loadIreg(const xmm& reg, int xyzw)
- {
- xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
- if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
- }
-
void init(uint vuIndex);
void reset();
void close();
diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl
index 0bc65f3931..1e4f8c6301 100644
--- a/pcsx2/x86/microVU_Analyze.inl
+++ b/pcsx2/x86/microVU_Analyze.inl
@@ -470,7 +470,7 @@ __fi void mVUanalyzeNormBranch(mV, int It, bool isBAL) {
__ri void mVUanalyzeJump(mV, int Is, int It, bool isJALR) {
mVUbranchCheck(mVU);
mVUlow.branch = (isJALR) ? 10 : 9;
- if (mVUconstReg[Is].isValid && CHECK_VU_CONSTPROP) {
+ if (mVUconstReg[Is].isValid && doConstProp) {
mVUlow.constJump.isValid = 1;
mVUlow.constJump.regValue = mVUconstReg[Is].regValue;
//DevCon.Status("microVU%d: Constant JR/JALR Address Optimization", mVU->index);
diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl
index 8516e3628c..caa9063a88 100644
--- a/pcsx2/x86/microVU_Compile.inl
+++ b/pcsx2/x86/microVU_Compile.inl
@@ -15,31 +15,19 @@
#pragma once
-//------------------------------------------------------------------
-// Helper Macros
-//------------------------------------------------------------------
-
-#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }
-#define optimizeReg(rState) { rState = (rState==1) ? 0 : rState; }
-#define tCycles(dest, src) { dest = aMax(dest, src); }
-#define incP() { mVU->p = (mVU->p+1) & 1; }
-#define incQ() { mVU->q = (mVU->q+1) & 1; }
-#define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); }
-#define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); }
-
//------------------------------------------------------------------
// Messages Called at Execution Time...
//------------------------------------------------------------------
-static void __fastcall mVUbadOp0(mV) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); }
-static void __fastcall mVUbadOp1(mV) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", xPC, mVU->prog.cur); }
-static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); }
-static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", xPC, mVU->prog.cur); }
-static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
-static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
+static void __fastcall mVUbadOp0(mV, u32 PC) { Console.Error("microVU0 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
+static void __fastcall mVUbadOp1(mV, u32 PC) { Console.Error("microVU1 Warning: Exiting... Block started with illegal opcode. [%04x] [%x]", PC, mVU->prog.cur->idx); }
+static void __fastcall mVUwarning0(mV) { Console.Error("microVU0 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
+static void __fastcall mVUwarning1(mV) { Console.Error("microVU1 Warning: Exiting from Possible Infinite Loop [%04x] [%x]", mVU->prog.cur->idx); }
+static void __fastcall mVUprintPC1(u32 PC) { Console.WriteLn("Block Start PC = 0x%04x", PC); }
+static void __fastcall mVUprintPC2(u32 PC) { Console.WriteLn("Block End PC = 0x%04x", PC); }
//------------------------------------------------------------------
-// Helper Functions
+// Program Range Checking and Setting up Ranges
//------------------------------------------------------------------
// Used by mVUsetupRange
@@ -106,13 +94,13 @@ static void mVUsetupRange(microVU* mVU, s32 pc, bool isStartPC) {
}
}
-static __fi void startLoop(mV) {
- if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); }
- if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); }
- if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); }
- memzero(mVUinfo);
- memzero(mVUregsTemp);
-}
+//------------------------------------------------------------------
+// Execute VU Opcode/Instruction (Upper and Lower)
+//------------------------------------------------------------------
+
+__ri void doUpperOp(mV) { mVUopU(mVU, 1); mVUdivSet(mVU); }
+__ri void doLowerOp(mV) { incPC(-1); mVUopL(mVU, 1); incPC(1); }
+__ri void flushRegs(mV) { if (!doRegAlloc) mVU->regAlloc->flushAll(); }
static void doIbit(mV) {
if (mVUup.iBit) {
@@ -126,7 +114,7 @@ static void doIbit(mV) {
}
else tempI = curI;
- xMOV(ptr32[&mVU->regs().VI[REG_I].UL], tempI);
+ xMOV(ptr32[&mVU->getVI(REG_I)], tempI);
incPC(1);
}
}
@@ -150,16 +138,27 @@ static void doSwapOp(mV) {
mVU->regAlloc->clearNeeded(t3);
incPC(1);
- doUpperOp();
+ doUpperOp(mVU);
const xmm& t4 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf);
xMOVAPS(t4, t2);
mVU->regAlloc->clearNeeded(t4);
mVU->regAlloc->clearNeeded(t2);
}
- else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
+ else { mVUopL(mVU, 1); incPC(1); flushRegs(mVU); doUpperOp(mVU); }
}
+static void mVUexecuteInstruction(mV) {
+ if (mVUlow.isNOP) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doIbit(mVU); }
+ elif(!mVUinfo.swapOps) { incPC(1); doUpperOp(mVU); flushRegs(mVU); doLowerOp(mVU); }
+ else doSwapOp(mVU);
+ flushRegs(mVU);
+}
+
+//------------------------------------------------------------------
+// Warnings / Errors / Illegal Instructions
+//------------------------------------------------------------------
+
// If 1st op in block is a bad opcode, then don't compile rest of block (Dawn of Mana Level 2)
static __fi void mVUcheckBadOp(mV) {
if (mVUinfo.isBadOp && mVUcount == 0) {
@@ -172,6 +171,7 @@ static __fi void mVUcheckBadOp(mV) {
static __fi void handleBadOp(mV, int count) {
if (mVUinfo.isBadOp && count == 0) {
xMOV(gprT2, (uptr)mVU);
+ xMOV(gprT3, xPC);
if (!isVU1) xCALL(mVUbadOp0);
else xCALL(mVUbadOp1);
}
@@ -211,8 +211,21 @@ static __ri void eBitWarning(mV) {
incPC(-2);
}
+//------------------------------------------------------------------
+// Cycles / Pipeline State / Early Exit from Execution
+//------------------------------------------------------------------
+
+__fi void optimizeReg(u8& rState) { rState = (rState==1) ? 0 : rState; }
+__fi void calcCycles(u8& reg, u8 x) { reg = ((reg > x) ? (reg - x) : 0); }
+__fi void tCycles(u8& dest, u8& src) { dest = aMax(dest, src); }
+__fi void incP(mV) { mVU->p ^= 1; }
+__fi void incQ(mV) { mVU->q ^= 1; }
+
// Optimizes the End Pipeline State Removing Unnecessary Info
-static __fi void mVUoptimizePipeState(mV) {
+// If the cycles remaining is just '1', we don't have to transfer it to the next block
+// because mVU automatically decrements this number at the start of its loop,
+// so essentially '1' will be the same as '0'...
+static void mVUoptimizePipeState(mV) {
for (int i = 0; i < 32; i++) {
optimizeReg(mVUregs.VF[i].x);
optimizeReg(mVUregs.VF[i].y);
@@ -222,12 +235,12 @@ static __fi void mVUoptimizePipeState(mV) {
for (int i = 0; i < 16; i++) {
optimizeReg(mVUregs.VI[i]);
}
- if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(); } }
- if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(); } }
+ if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(mVU); } }
+ if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(mVU); } }
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
}
-__fi void mVUincCycles(mV, int x) {
+static void mVUincCycles(mV, int x) {
mVUcycles += x;
for (int z = 31; z > 0; z--) {
calcCycles(mVUregs.VF[z].x, x);
@@ -241,11 +254,11 @@ __fi void mVUincCycles(mV, int x) {
if (mVUregs.q) {
if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } }
else { calcCycles(mVUregs.q, x); }
- if (!mVUregs.q) { incQ(); }
+ if (!mVUregs.q) { incQ(mVU); }
}
if (mVUregs.p) {
calcCycles(mVUregs.p, x);
- if (!mVUregs.p || mVUregsTemp.p) { incP(); }
+ if (!mVUregs.p || mVUregsTemp.p) { incP(mVU); }
}
if (mVUregs.xgkick) {
calcCycles(mVUregs.xgkick, x);
@@ -254,14 +267,13 @@ __fi void mVUincCycles(mV, int x) {
calcCycles(mVUregs.r, x);
}
-#define cmpVFregs(VFreg1, VFreg2, xVar) { \
- if (VFreg1.reg == VFreg2.reg) { \
- if ((VFreg1.x && VFreg2.x) \
- || (VFreg1.y && VFreg2.y) \
- || (VFreg1.z && VFreg2.z) \
- || (VFreg1.w && VFreg2.w)) \
- { xVar = 1; } \
- } \
+// Helps check if upper/lower ops read/write to same regs...
+void cmpVFregs(microVFreg& VFreg1, microVFreg& VFreg2, bool& xVar) {
+ if (VFreg1.reg == VFreg2.reg) {
+ if ((VFreg1.x && VFreg2.x) || (VFreg1.y && VFreg2.y)
+ || (VFreg1.z && VFreg2.z) || (VFreg1.w && VFreg2.w))
+ { xVar = 1; }
+ }
}
void mVUsetCycles(mV) {
@@ -299,6 +311,15 @@ void mVUsetCycles(mV) {
tCycles(mVUregs.xgkick, mVUregsTemp.xgkick);
}
+// Prints Start/End PC of blocks executed, for debugging...
+static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
+ if (mVUdebugNow) {
+ xMOV(gprT2, xPC);
+ if (isEndPC) xCALL(mVUprintPC2);
+ else xCALL(mVUprintPC1);
+ }
+}
+
// vu0 is allowed to exit early, so are dev builds (for inf loops)
__fi bool doEarlyExit(microVU* mVU) {
return IsDevBuild || !isVU1;
@@ -312,15 +333,6 @@ static __fi void mVUsavePipelineState(microVU* mVU) {
}
}
-// Prints Start/End PC of blocks executed, for debugging...
-static void mVUdebugPrintBlocks(microVU* mVU, bool isEndPC) {
- if (mVUdebugNow) {
- xMOV(gprT2, xPC);
- if (isEndPC) xCALL(mVUprintPC2);
- else xCALL(mVUprintPC1);
- }
-}
-
// Test cycles to see if we need to exit-early...
static void mVUtestCycles(microVU* mVU) {
iPC = mVUstartPC;
@@ -332,8 +344,8 @@ static void mVUtestCycles(microVU* mVU) {
// xFowardJZ32 vu0jmp;
// xMOV(gprT2, (uptr)mVU);
// xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation
- mVUsavePipelineState(mVU);
- mVUendProgram(mVU, NULL, 0);
+ mVUsavePipelineState(mVU);
+ mVUendProgram(mVU, NULL, 0);
// vu0jmp.SetTarget();
}
else {
@@ -347,6 +359,19 @@ static void mVUtestCycles(microVU* mVU) {
xSUB(ptr32[&mVU->cycles], mVUcycles);
}
+//------------------------------------------------------------------
+// Initializing
+//------------------------------------------------------------------
+
+// This gets run at the start of every loop of mVU's first pass
+static __fi void startLoop(mV) {
+ if (curI & _Mbit_) { Console.WriteLn(Color_Green, "microVU%d: M-bit set!", getIndex); }
+ if (curI & _Dbit_) { DevCon.WriteLn (Color_Green, "microVU%d: D-bit set!", getIndex); }
+ if (curI & _Tbit_) { DevCon.WriteLn (Color_Green, "microVU%d: T-bit set!", getIndex); }
+ memzero(mVUinfo);
+ memzero(mVUregsTemp);
+}
+
// Initialize VI Constants (vi15 propagates through blocks)
static __fi void mVUinitConstValues(microVU* mVU) {
for (int i = 0; i < 16; i++) {
@@ -393,7 +418,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
// First Pass
iPC = startPC / 4;
- mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
+ mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
mVU->regAlloc->reset(); // Reset regAlloc
mVUinitFirstPass(mVU, pState, thisPtr);
for (int branch = 0; mVUcount < endCount; mVUcount++) {
@@ -419,7 +444,7 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
}
// Fix up vi15 const info for propagation through blocks
- mVUregs.vi15 = (mVUconstReg[15].isValid && CHECK_VU_CONSTPROP) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
+ mVUregs.vi15 = (mVUconstReg[15].isValid && doConstProp) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
@@ -434,11 +459,8 @@ void* mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
for (; x < endCount; x++) {
if (mVUinfo.isEOB) { handleBadOp(mVU, x); x = 0xffff; }
if (mVUup.mBit) { xOR(ptr32[&mVU->regs().flags], VUFLAG_MFLAGSET); }
- if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); }
- else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); }
- else { doSwapOp(mVU); }
+ mVUexecuteInstruction(mVU);
if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); }
- if (!doRegAlloc) { mVU->regAlloc->flushAll(); }
if (isEvilBlock) { mVUsetupRange(mVU, xPC, 0); normJumpCompile(mVU, mFC, 1); return thisPtr; }
else if (!mVUinfo.isBdelay) { incPC(1); }
else {
diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl
index 5519f5af1f..9672d79135 100644
--- a/pcsx2/x86/microVU_Flags.inl
+++ b/pcsx2/x86/microVU_Flags.inl
@@ -286,7 +286,7 @@ void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) {
__fi void mVUsetFlagInfo(mV) {
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr); incPC(1); }
branchType2 { // This case can possibly be turned off via a hack for a small speedup...
- if (!mVUlow.constJump.isValid || !CHECK_VU_CONSTPROP) { mVUregs.needExactMatch |= 0x7; }
+ if (!mVUlow.constJump.isValid || !doConstProp) { mVUregs.needExactMatch |= 0x7; }
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8)); }
}
branchType3 {
diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h
index c56f53bc4f..e07d5253c2 100644
--- a/pcsx2/x86/microVU_IR.h
+++ b/pcsx2/x86/microVU_IR.h
@@ -170,39 +170,194 @@ struct microMapXMM {
bool isNeeded; // Is needed for current instruction
};
-#define xmmTotal 7 // Don't allocate PQ?
class microRegAlloc {
protected:
+ static const u32 xmmTotal = 7; // Don't allocate PQ?
microMapXMM xmmMap[xmmTotal];
- int counter;
- microVU* mVU;
+ int counter; // Current allocation count
+ int index; // VU0 or VU1
+
+ // Helper functions to get VU regs
+ VURegs& regs() const { return ::vuRegs[index]; }
+ __fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
+ __fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
+
+ __ri void loadIreg(const xmm& reg, int xyzw) {
+ xMOVSSZX(reg, ptr32[&getVI(REG_I)]);
+ if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0);
+ }
- int findFreeRegRec(int startIdx);
- int findFreeReg();
+ int findFreeRegRec(int startIdx) {
+ for (int i = startIdx; i < xmmTotal; i++) {
+ if (!xmmMap[i].isNeeded) {
+ int x = findFreeRegRec(i+1);
+ if (x == -1) return i;
+ return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
+ }
+ }
+ return -1;
+ }
+
+ int findFreeReg() {
+ for (int i = 0; i < xmmTotal; i++) {
+ if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) {
+ return i; // Reg is not needed and was a temp reg
+ }
+ }
+ int x = findFreeRegRec(0);
+ pxAssumeDev( x >= 0, "microVU register allocation failure!" );
+ return x;
+ }
public:
- microRegAlloc(microVU* _mVU);
-
+ microRegAlloc(int _index) {
+ index = _index;
+ }
+
void reset() {
for (int i = 0; i < xmmTotal; i++) {
clearReg(i);
}
counter = 0;
}
+
void flushAll(bool clearState = 1) {
for (int i = 0; i < xmmTotal; i++) {
writeBackReg(xmm(i));
if (clearState) clearReg(i);
}
}
- void clearReg(int regId);
+
void clearReg(const xmm& reg) { clearReg(reg.Id); }
+ void clearReg(int regId) {
+ microMapXMM& clear( xmmMap[regId] );
+ clear.VFreg = -1;
+ clear.count = 0;
+ clear.xyzw = 0;
+ clear.isNeeded = 0;
+ }
+
void clearRegVF(int VFreg) {
for (int i = 0; i < xmmTotal; i++) {
if (xmmMap[i].VFreg == VFreg) clearReg(i);
}
}
- void writeBackReg(const xmm& reg, bool invalidateRegs = 1);
- void clearNeeded(const xmm& reg);
- const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1);
+
+ void writeBackReg(const xmm& reg, bool invalidateRegs = 1) {
+ microMapXMM& write( xmmMap[reg.Id] );
+
+ if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
+ if (write.VFreg == 33) xMOVSS(ptr32[&getVI(REG_I)], reg);
+ else if (write.VFreg == 32) mVUsaveReg(reg, ptr[®s().ACC], write.xyzw, 1);
+ else mVUsaveReg(reg, ptr[&getVF(write.VFreg)], write.xyzw, 1);
+ if (invalidateRegs) {
+ for (int i = 0; i < xmmTotal; i++) {
+ microMapXMM& imap (xmmMap[i]);
+ if ((i == reg.Id) || imap.isNeeded) continue;
+ if (imap.VFreg == write.VFreg) {
+ if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
+ clearReg(i); // Invalidate any Cached Regs of same vf Reg
+ }
+ }
+ }
+ if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
+ write.count = counter;
+ write.xyzw = 0;
+ write.isNeeded = 0;
+ return;
+ }
+ }
+ clearReg(reg); // Clear Reg
+ }
+
+ void clearNeeded(const xmm& reg) {
+
+ if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
+
+ microMapXMM& clear (xmmMap[reg.Id]);
+ clear.isNeeded = 0;
+ if (clear.xyzw) { // Reg was modified
+ if (clear.VFreg > 0) {
+ int mergeRegs = 0;
+ if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
+ for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
+ if (i == reg.Id) continue;
+ microMapXMM& imap (xmmMap[i]);
+ if (imap.VFreg == clear.VFreg) {
+ if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
+ if (mergeRegs == 1) {
+ mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
+ imap.xyzw = 0xf;
+ imap.count = counter;
+ mergeRegs = 2;
+ }
+ else clearReg(i);
+ }
+ }
+ if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
+ else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
+ }
+ else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
+ }
+ }
+
+ const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) {
+ counter++;
+ if (vfLoadReg >= 0) { // Search For Cached Regs
+ for (int i = 0; i < xmmTotal; i++) {
+ const xmm& xmmi(xmm::GetInstance(i));
+ microMapXMM& imap (xmmMap[i]);
+ if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
+ || (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
+ int z = i;
+ if (vfWriteReg >= 0) { // Reg will be modified
+ if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
+ z = findFreeReg();
+ const xmm& xmmz(xmm::GetInstance(z));
+ writeBackReg(xmmz);
+ if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
+ else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
+ else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
+ else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
+ else if (z != i) xMOVAPS (xmmz, xmmi);
+ imap.count = counter; // Reg i was used, so update counter
+ }
+ else { // Don't clone reg, but shuffle to adjust for SS ops
+ if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
+ if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
+ else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
+ else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
+ }
+ xmmMap[z].VFreg = vfWriteReg;
+ xmmMap[z].xyzw = xyzw;
+ }
+ xmmMap[z].count = counter;
+ xmmMap[z].isNeeded = 1;
+ return xmm::GetInstance(z);
+ }
+ }
+ }
+ int x = findFreeReg();
+ const xmm& xmmx = xmm::GetInstance(x);
+ writeBackReg(xmmx);
+
+ if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
+ if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
+ else if (vfLoadReg == 33) loadIreg (xmmx, xyzw);
+ else if (vfLoadReg == 32) mVUloadReg(xmmx, ptr[®s().ACC], xyzw);
+ else if (vfLoadReg >= 0) mVUloadReg(xmmx, ptr[&getVF(vfLoadReg)], xyzw);
+ xmmMap[x].VFreg = vfWriteReg;
+ xmmMap[x].xyzw = xyzw;
+ }
+ else { // Reg Will Not Be Modified (always load full reg for caching)
+ if (vfLoadReg == 33) loadIreg(xmmx, 0xf);
+ else if (vfLoadReg == 32) xMOVAPS (xmmx, ptr128[®s().ACC]);
+ else if (vfLoadReg >= 0) xMOVAPS (xmmx, ptr128[&getVF(vfLoadReg)]);
+ xmmMap[x].VFreg = vfLoadReg;
+ xmmMap[x].xyzw = 0;
+ }
+ xmmMap[x].count = counter;
+ xmmMap[x].isNeeded = 1;
+ return xmmx;
+ }
};
diff --git a/pcsx2/x86/microVU_IR.inl b/pcsx2/x86/microVU_IR.inl
deleted file mode 100644
index f5613df531..0000000000
--- a/pcsx2/x86/microVU_IR.inl
+++ /dev/null
@@ -1,165 +0,0 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2010 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#pragma once
-
-
-int microRegAlloc::findFreeRegRec(int startIdx) {
- for (int i = startIdx; i < xmmTotal; i++) {
- if (!xmmMap[i].isNeeded) {
- int x = findFreeRegRec(i+1);
- if (x == -1) return i;
- return ((xmmMap[i].count < xmmMap[x].count) ? i : x);
- }
- }
- return -1;
-}
-int microRegAlloc::findFreeReg() {
- for (int i = 0; i < xmmTotal; i++) {
- if (!xmmMap[i].isNeeded && (xmmMap[i].VFreg < 0)) {
- return i; // Reg is not needed and was a temp reg
- }
- }
- int x = findFreeRegRec(0);
- pxAssumeDev( x >= 0, "microVU register allocation failure!" );
- return x;
-}
-
-microRegAlloc::microRegAlloc(microVU* _mVU) {
- mVU = _mVU;
-}
-
-void microRegAlloc::clearReg(int regId) {
- microMapXMM& clear( xmmMap[regId] );
- clear.VFreg = -1;
- clear.count = 0;
- clear.xyzw = 0;
- clear.isNeeded = 0;
-}
-void microRegAlloc::writeBackReg(const xmm& reg, bool invalidateRegs) {
- microMapXMM& write( xmmMap[reg.Id] );
-
- if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0
- if (write.VFreg == 33) xMOVSS(ptr32[&mVU->getVI(REG_I)], reg);
- else if (write.VFreg == 32) mVUsaveReg(reg, ptr[&mVU->regs().ACC], write.xyzw, 1);
- else mVUsaveReg(reg, ptr[&mVU->getVF(write.VFreg)], write.xyzw, 1);
- if (invalidateRegs) {
- for (int i = 0; i < xmmTotal; i++) {
- microMapXMM& imap (xmmMap[i]);
- if ((i == reg.Id) || imap.isNeeded) continue;
- if (imap.VFreg == write.VFreg) {
- if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg);
- clearReg(i); // Invalidate any Cached Regs of same vf Reg
- }
- }
- }
- if (write.xyzw == 0xf) { // Make Cached Reg if All Vectors were Modified
- write.count = counter;
- write.xyzw = 0;
- write.isNeeded = 0;
- return;
- }
- }
- clearReg(reg); // Clear Reg
-}
-void microRegAlloc::clearNeeded(const xmm& reg)
-{
- if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return;
-
- microMapXMM& clear (xmmMap[reg.Id]);
- clear.isNeeded = 0;
- if (clear.xyzw) { // Reg was modified
- if (clear.VFreg > 0) {
- int mergeRegs = 0;
- if (clear.xyzw < 0xf) { mergeRegs = 1; } // Try to merge partial writes
- for (int i = 0; i < xmmTotal; i++) { // Invalidate any other read-only regs of same vfReg
- if (i == reg.Id) continue;
- microMapXMM& imap (xmmMap[i]);
- if (imap.VFreg == clear.VFreg) {
- if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: clearNeeded() [%d]", imap.VFreg);
- if (mergeRegs == 1) {
- mVUmergeRegs(xmm(i), reg, clear.xyzw, 1);
- imap.xyzw = 0xf;
- imap.count = counter;
- mergeRegs = 2;
- }
- else clearReg(i);
- }
- }
- if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged
- else if (mergeRegs) writeBackReg(reg); // Write Back Partial Writes if couldn't merge
- }
- else clearReg(reg); // If Reg was temp or vf0, then invalidate itself
- }
-}
-const xmm& microRegAlloc::allocReg(int vfLoadReg, int vfWriteReg, int xyzw, bool cloneWrite) {
- counter++;
- if (vfLoadReg >= 0) { // Search For Cached Regs
- for (int i = 0; i < xmmTotal; i++) {
- const xmm& xmmi(xmm::GetInstance(i));
- microMapXMM& imap (xmmMap[i]);
- if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified
- || (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0
- int z = i;
- if (vfWriteReg >= 0) { // Reg will be modified
- if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg
- z = findFreeReg();
- const xmm& xmmz(xmm::GetInstance(z));
- writeBackReg(xmmz);
- if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi);
- else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1);
- else if (xyzw == 2) xPSHUF.D(xmmz, xmmi, 2);
- else if (xyzw == 1) xPSHUF.D(xmmz, xmmi, 3);
- else if (z != i) xMOVAPS (xmmz, xmmi);
- imap.count = counter; // Reg i was used, so update counter
- }
- else { // Don't clone reg, but shuffle to adjust for SS ops
- if ((vfLoadReg != vfWriteReg) || (xyzw != 0xf)) { writeBackReg(xmmi); }
- if (xyzw == 4) xPSHUF.D(xmmi, xmmi, 1);
- else if (xyzw == 2) xPSHUF.D(xmmi, xmmi, 2);
- else if (xyzw == 1) xPSHUF.D(xmmi, xmmi, 3);
- }
- xmmMap[z].VFreg = vfWriteReg;
- xmmMap[z].xyzw = xyzw;
- }
- xmmMap[z].count = counter;
- xmmMap[z].isNeeded = 1;
- return xmm::GetInstance(z);
- }
- }
- }
- int x = findFreeReg();
- const xmm& xmmx = xmm::GetInstance(x);
- writeBackReg(xmmx);
-
- if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
- if ((vfLoadReg == 0) && !(xyzw & 1)) { xPXOR(xmmx, xmmx); }
- else if (vfLoadReg == 33) mVU->loadIreg(xmmx, xyzw);
- else if (vfLoadReg == 32) mVUloadReg (xmmx, ptr[&mVU->regs().ACC], xyzw);
- else if (vfLoadReg >= 0) mVUloadReg (xmmx, ptr[&mVU->getVF(vfLoadReg)], xyzw);
- xmmMap[x].VFreg = vfWriteReg;
- xmmMap[x].xyzw = xyzw;
- }
- else { // Reg Will Not Be Modified (always load full reg for caching)
- if (vfLoadReg == 33) mVU->loadIreg(xmmx, 0xf);
- else if (vfLoadReg == 32) xMOVAPS(xmmx, ptr128[&mVU->regs().ACC]);
- else if (vfLoadReg >= 0) xMOVAPS(xmmx, ptr128[&mVU->getVF(vfLoadReg)]);
- xmmMap[x].VFreg = vfLoadReg;
- xmmMap[x].xyzw = 0;
- }
- xmmMap[x].count = counter;
- xmmMap[x].isNeeded = 1;
- return xmmx;
-}
diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h
index 904af60859..0aa263cdde 100644
--- a/pcsx2/x86/microVU_Misc.h
+++ b/pcsx2/x86/microVU_Misc.h
@@ -248,12 +248,14 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
//------------------------------------------------------------------
// Reg Alloc
-#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction
+static const bool doRegAlloc = 1; // Set to 0 to flush every 32bit Instruction
// This turns off reg alloc for the most part, but reg alloc will still
-// be done between Upper/Lower and within instructions...
+// be done within instructions... Also on doSwapOp() regAlloc is needed between
+// Lower and Upper instructions, so in this case it flushes after the full
+// 64bit instruction (lower and upper)
// No Flag Optimizations
-#define noFlagOpts 0 // Set to 1 to disable all flag setting optimizations
+static const bool noFlagOpts = 0; // Set to 1 to disable all flag setting optimizations
// Note: The flag optimizations this disables should all be harmless, so
// this option is mainly just for debugging... it effectively forces mVU
// to always update Mac and Status Flags (both sticky and non-sticky) whenever
@@ -261,7 +263,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
// flag instances between blocks...
// Constant Propagation
-#define CHECK_VU_CONSTPROP 0 // Set to 1 to turn on vi15 const propagation
+static const bool doConstProp = 0; // Set to 1 to turn on vi15 const propagation
// Enables Constant Propagation for Jumps based on vi15 'link-register'
// allowing us to know many indirect jump target addresses.
// Makes GoW a lot slower due to extra recompilation time and extra code-gen!