From 81009002bb99578b4fc74885982ff7107987f531 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 18 Jul 2009 13:57:21 +0000 Subject: [PATCH] microVU: more work in progress stuff... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1542 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 2 + pcsx2/x86/microVU.h | 3 +- pcsx2/x86/microVU_Alloc.inl | 241 ---------------------------------- pcsx2/x86/microVU_Execute.inl | 2 - pcsx2/x86/microVU_IR.h | 139 ++++++++++++++++++++ pcsx2/x86/microVU_Lower.inl | 8 +- pcsx2/x86/microVU_Misc.h | 10 +- pcsx2/x86/microVU_Misc.inl | 18 ++- pcsx2/x86/microVU_Upper.inl | 41 ++++++ 9 files changed, 201 insertions(+), 263 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 6d7d490657..c2830d04e8 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -50,6 +50,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr, int vuIndex) { mVU->prog.max = mMaxProg - 1; mVU->prog.prog = (microProgram*)_aligned_malloc(sizeof(microProgram)*(mVU->prog.max+1), 64); mVU->prog.progList = new int[mMaxProg]; + mVU->regAlloc = new microRegAlloc(mVU->regs); mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init"); mVU->cache = SysMmapEx((vuIndex ? 0x5f240000 : 0x5e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); @@ -113,6 +114,7 @@ microVUt(void) mVUclose(mV) { safe_aligned_free(mVU->prog.prog); } safe_delete_array(mVU->prog.progList); + safe_delete(mVU->regAlloc); } // Clears Block Data in specified range diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 13abfd572a..87eae91357 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -150,7 +150,8 @@ struct microVU { u32 progSize; // VU Micro Memory Size (in u32's) u32 cacheSize; // VU Cache Size - microProgManager prog; // Micro Program Data + microProgManager prog; // Micro Program Data + microRegAlloc* regAlloc; // Reg Alloc Class FILE* logFile; // Log File Pointer VURegs* regs; // VU Regs Struct diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 4ba5f11e8e..f35b240d72 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -756,244 +756,3 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) { if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \ else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \ } - -//------------------------------------------------------------------ -// Reg Alloc -//------------------------------------------------------------------ - -struct microXMM { - int reg; // VF Reg Number Stored - int xyzw; // xyzw to write back - int count; // Count of when first cached - bool isNeeded; // Is needed for current instruction - bool isTemp; // Is Temp Reg -}; - -#define xmmTotal 6 // Don't allocate PQ/ACC? -class microRegAlloc { -private: - microXMM xmmReg[xmmTotal]; - VURegs* vuRegs; - int counter; - void clearReg(int reg) { - xmmReg[reg].reg = 0; - xmmReg[reg].count = 0; - xmmReg[reg].isNeeded = 0; - xmmReg[reg].isTemp = 1; - } - int findFreeRegRec(int startIdx) { - for (int i = startIdx; i < xmmTotal; i++) { - if (!xmmReg[i].isNeeded) { - if ((i+1) >= xmmTotal) return i; - int x = findFreeRegRec(i+1); - if (x == -1) return i; - return ((xmmReg[i].count < xmmReg[x].count) ? i : x); - } - } - return -1; - } - int findFreeReg() { - for (int i = 0; i < xmmTotal; i++) { - if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) { - return i; // Reg is not needed and was a temp reg - } - } - int x = findFreeRegRec(0); - if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; } - return x; - } - -public: - microRegAlloc(VURegs* vuRegsPtr) { - vuRegs = vuRegsPtr; - reset(); - } - void reset() { - for (int i = 0; i < xmmTotal; i++) { - clearReg(i); - } - counter = 0; - } - void writeBackReg(int reg) { - if (xmmReg[reg].reg && xmmReg[reg].xyzw) { - mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1); - for (int i = 0; i < xmmTotal; i++) { - if (i = reg) continue; - if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) { - clearReg(i); // Invalidate any Cached Regs - } - } - if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg - xmmReg[reg].count = counter; - xmmReg[reg].xyzw = 0; - xmmReg[reg].isNeeded = 0; - xmmReg[reg].isTemp = 0; - return; - } - } - clearReg(reg); // Clear Written Back Reg - } - void clearNeeded(int reg) { - xmmReg[reg].isNeeded = 0; - } - int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) { - counter++; - for (int i = 0; i < xmmTotal; i++) { - if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) { - if (writeBack) { - int z = findFreeReg(); - writeBackReg(z); - if (xyzw == 8) SSE2_SHUFPD_XMM_to_XMM(z, i, 0); - else if (xyzw == 4) SSE2_SHUFPD_XMM_to_XMM(z, i, 1); - else if (xyzw == 2) SSE2_SHUFPD_XMM_to_XMM(z, i, 2); - else if (xyzw == 1) SSE2_SHUFPD_XMM_to_XMM(z, i, 3); - else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i); - xmmReg[z].reg = vfWriteBack; - xmmReg[z].count = counter; - xmmReg[z].xyzw = xyzw; - xmmReg[z].isNeeded = 1; - xmmReg[z].isTemp = 1; - return z; - } - xmmReg[i].isNeeded = 1; - return i; - } - } - int x = findFreeReg(); - writeBackReg(x); - if (vfReg >= 0) { - if (writeBack) { - mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw); - xmmReg[x].reg = vfWriteBack; - xmmReg[x].count = counter; - xmmReg[x].xyzw = xyzw; - xmmReg[x].isNeeded = 1; - xmmReg[x].isTemp = 1; - } - else { - SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x); - xmmReg[x].reg = vfReg; - xmmReg[x].count = counter; - xmmReg[x].xyzw = 0; - xmmReg[x].isNeeded = 1; - xmmReg[x].isTemp = 0; - } - } - else { // Is Temp Reg - xmmReg[x].reg = 0; - xmmReg[x].count = counter; - xmmReg[x].xyzw = 0; - xmmReg[x].isNeeded = 1; - xmmReg[x].isTemp = 1; - } - return x; - } -}; - - -/* -struct microXMM { - int reg; // VF Reg Number Stored - int xyzw; // Current xyzw Order - int validXYZW; // Vectors that are valid - bool isNeeded; // Is needed for current instruction - bool isTemp; // Is Temp Reg - bool isWritten; // The reg has been written to -}; - -#define xmmTotal 7 // Don't allocate Last XMM Reg for PQ instances (will change this later) -class microRegAlloc { -private: - microXMM xmmReg[xmmTotal]; - VURegs* vuRegs; - int vfStats[32]; - int findFreeReg(int startIdx) { - for (int i = startIdx; i < xmmTotal; i++) { - if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) { - return i; // Reg is not needed and was a temp reg - } - } - for (int i = startIdx; i < xmmTotal; i++) { - if (!xmmReg[i].isNeeded) { - if ((i+1) >= xmmTotal) return i; - int x = findFreeReg(i+1); - if (x == -1) return i; - return ((vfStats[xmmReg[i].reg] < vfStats[xmmReg[x].reg]) ? i : x); - } - } - return -1; - } - -public: - microRegAlloc(VURegs* vuRegsPtr) { - vuRegs = vuRegsPtr; - reset(); - } - void reset() { - for (int i = 0; i < xmmTotal; i++) { - xmmReg[i].reg = 0; - xmmReg[i].isNeeded = 0; - xmmReg[i].isTemp = 1; - } - } - void loadStats(int* vfRegData) { - for (int i = 0; i < 32; i++) { - vfStats[i] = vfRegData[i]; - } - } - void changeXYZW(int reg, int newXYZW) { - if (xmmReg[reg].xyzw != newXYZW) { - int shuffleReg = 0; - int xyzw = xmmReg[reg].xyzw; - if (((xyzw >> 6) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (3<<0); - else if (((xyzw >> 4) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (2<<0); - else if (((xyzw >> 2) & 3) == ((newXYZW >> 0) & 3)) shuffleReg |= (1<<0); - if (((xyzw >> 6) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (3<<2); - else if (((xyzw >> 4) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (2<<2); - else if (((xyzw >> 2) & 3) == ((newXYZW >> 2) & 3)) shuffleReg |= (1<<2); - if (((xyzw >> 6) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (3<<4); - else if (((xyzw >> 4) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (2<<4); - else if (((xyzw >> 2) & 3) == ((newXYZW >> 4) & 3)) shuffleReg |= (1<<4); - if (((xyzw >> 6) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (3<<6); - else if (((xyzw >> 4) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (2<<6); - else if (((xyzw >> 2) & 3) == ((newXYZW >> 6) & 3)) shuffleReg |= (1<<6); - SSE2_PSHUFD_XMM_to_XMM(reg, reg, shuffleReg); - } - } - int writeBack(int reg) { - if (!xmmReg[reg].isTemp && xmmReg[reg].reg && xmmReg[reg].isWritten) { - changeXYZW(reg, 0xe4); - SSE_MOVAPS_XMM_to_M128(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0]); // Write Back Reg to Mem - } - } - int allocReg(int vfReg, bool cache, int xyzw, int vfWriteBack) { - for (int i = 0; i < xmmTotal; i++) { - if (!xmmReg[i].isTemp && xmmReg[i].reg == vfReg) { - if ((xmmReg[i].validXYZW != 0xf) && xmmReg[i].isWritten) { - // write back partial reg - //if (!forWriting) { get reg from mem } - } - xmmReg[i].isWritten |= (forWriting) ? 1 : 0; - xmmReg[i].isNeeded = 1; - return i; - } - } - int x = findFreeReg(0); - if (!xmmReg[x].isTemp && xmmReg[x].reg && xmmReg[x].isWritten) { - changeXYZW(x, 0xe4); - SSE_MOVAPS_XMM_to_M128(x, (uptr)&vuRegs->VF[xmmReg[x].reg].UL[0]); // Write Back Reg to Mem - } - if (vfReg != -1) { - SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[xmmReg[x].reg].UL[0], x); // Load Reg from Mem - xmmReg[x].isNeeded = 1; - xmmReg[x].isTemp = 0; - xmmReg[x].xyzw = 0xe4; - } - else { // Is Temp Reg - xmmReg[x].isNeeded = 1; - xmmReg[x].isTemp = 1; - } - return x; - } -}; -*/ diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 14e4918e2c..add34043df 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -69,8 +69,6 @@ void mVUdispatcherA(mV) { SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1); SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]); - SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); - SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL); SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL); SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 2d1beaf49c..d60d8dabaf 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -156,3 +156,142 @@ struct microIR { u32 startPC; // Start PC for Cur Block u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags }; + +//------------------------------------------------------------------ +// Reg Alloc +//------------------------------------------------------------------ + +void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW); +void mVUloadReg(int reg, uptr offset, int xyzw); + +struct microXMM { + int reg; // VF Reg Number Stored + int xyzw; // xyzw to write back + int count; // Count of when first cached + bool isNeeded; // Is needed for current instruction + bool isTemp; // Is Temp Reg +}; + +#define xmmTotal 6 // Don't allocate PQ/ACC? +class microRegAlloc { +private: + microXMM xmmReg[xmmTotal]; + VURegs* vuRegs; + int counter; + void clearReg(int reg) { + xmmReg[reg].reg = 0; + xmmReg[reg].count = 0; + xmmReg[reg].isNeeded = 0; + xmmReg[reg].isTemp = 1; + } + int findFreeRegRec(int startIdx) { + for (int i = startIdx; i < xmmTotal; i++) { + if (!xmmReg[i].isNeeded) { + if ((i+1) >= xmmTotal) return i; + int x = findFreeRegRec(i+1); + if (x == -1) return i; + return ((xmmReg[i].count < xmmReg[x].count) ? i : x); + } + } + return -1; + } + int findFreeReg() { + for (int i = 0; i < xmmTotal; i++) { + if (!xmmReg[i].isNeeded && xmmReg[i].isTemp) { + return i; // Reg is not needed and was a temp reg + } + } + int x = findFreeRegRec(0); + if (x < 0) { DevCon::Error("microVU Allocation Error!"); return 0; } + return x; + } + +public: + microRegAlloc(VURegs* vuRegsPtr) { + vuRegs = vuRegsPtr; + reset(); + } + void reset() { + for (int i = 0; i < xmmTotal; i++) { + clearReg(i); + } + counter = 0; + } + void writeBackReg(int reg) { + if (xmmReg[reg].reg && (xmmReg[reg].xyzw || (xmmReg[reg].reg >= 32))) { + if (xmmReg[reg].reg == 32) SSE_MOVAPS_XMM_to_M128((uptr)&vuRegs->ACC.UL[0], reg); + else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1); + for (int i = 0; i < xmmTotal; i++) { + if (i = reg) continue; + if (!xmmReg[i].isTemp && xmmReg[i].reg == xmmReg[reg].reg) { + clearReg(i); // Invalidate any Cached Regs + } + } + if (xmmReg[reg].xyzw == 0xf) { // Make Cached Reg + xmmReg[reg].count = counter; + xmmReg[reg].xyzw = 0; + xmmReg[reg].isNeeded = 0; + xmmReg[reg].isTemp = 0; + return; + } + } + clearReg(reg); // Clear Written Back Reg + } + void clearNeeded(int reg) { + xmmReg[reg].isNeeded = 0; + } + int allocReg(int vfReg = -1, bool writeBack = 0, int xyzw = 0, int vfWriteBack = 0) { + counter++; + for (int i = 0; i < xmmTotal; i++) { + if ((vfReg >= 0) && (!xmmReg[i].isTemp) && (xmmReg[i].reg == vfReg)) { + if (writeBack) { + int z = findFreeReg(); + writeBackReg(z); + if (xyzw == 8) SSE2_PSHUFD_XMM_to_XMM(z, i, 0); + else if (xyzw == 4) SSE2_PSHUFD_XMM_to_XMM(z, i, 1); + else if (xyzw == 2) SSE2_PSHUFD_XMM_to_XMM(z, i, 2); + else if (xyzw == 1) SSE2_PSHUFD_XMM_to_XMM(z, i, 3); + else if (z != i) SSE_MOVAPS_XMM_to_XMM (z, i); + xmmReg[z].reg = vfWriteBack; + xmmReg[z].count = counter; + xmmReg[z].xyzw = xyzw; + xmmReg[z].isNeeded = 1; + xmmReg[z].isTemp = 1; + return z; + } + xmmReg[i].count = counter; + xmmReg[i].isNeeded = 1; + return i; + } + } + int x = findFreeReg(); + writeBackReg(x); + if (vfReg >= 0) { + if (writeBack) { + mVUloadReg(x, (uptr)&vuRegs->VF[vfReg].UL[0], xyzw); + xmmReg[x].reg = vfWriteBack; + xmmReg[x].count = counter; + xmmReg[x].xyzw = xyzw; + xmmReg[x].isNeeded = 1; + xmmReg[x].isTemp = 1; + } + else { + if (vfReg == 32) SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->ACC.UL[0], x); + else SSE_MOVAPS_M128_to_XMM((uptr)&vuRegs->VF[vfReg].UL[0], x); + xmmReg[x].reg = vfReg; + xmmReg[x].count = counter; + xmmReg[x].xyzw = 0; + xmmReg[x].isNeeded = 1; + xmmReg[x].isTemp = 0; + } + } + else { // Is Temp Reg + xmmReg[x].reg = 0; + xmmReg[x].count = counter; + xmmReg[x].xyzw = 0; + xmmReg[x].isNeeded = 1; + xmmReg[x].isTemp = 1; + } + return x; + } +}; diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 670fdc4da5..7bd2cdd09f 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -60,9 +60,9 @@ mVUop(mVU_DIV) { MOV32ItoM((uptr)&mVU->divFlag, divD); // Zero divide (only when not 0/0) x86SetJ8(bjmp); - SSE_XORPS_XMM_to_XMM(xmmFs, xmmFt); + SSE_XORPS_XMM_to_XMM (xmmFs, xmmFt); SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); - SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // If division by zero, then xmmFs = +/- fmax + SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // If division by zero, then xmmFs = +/- fmax djmp = JMP8(0); x86SetJ8(cjmp); @@ -87,7 +87,7 @@ mVUop(mVU_SQRT) { MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags testNeg(xmmFt, gprT1, ajmp); // Check for negative sqrt - if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive) + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(xmmFt, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive) SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt); if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt); @@ -119,7 +119,7 @@ mVUop(mVU_RSQRT) { x86SetJ8(cjmp); SSE_ANDPS_M128_to_XMM(xmmFs, (uptr)mVU_signbit); - SSE_ORPS_XMM_to_XMM(xmmFs, xmmMax); // xmmFs = +/-Max + SSE_ORPS_M128_to_XMM (xmmFs, (uptr)mVU_maxvals); // xmmFs = +/-Max djmp = JMP8(0); x86SetJ8(ajmp); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 1bac483032..894897633d 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -118,10 +118,10 @@ declareAllVariables #define xmmT1 0 // Temp Reg #define xmmFs 1 // Holds the Value of Fs (writes back result Fd) #define xmmFt 2 // Holds the Value of Ft -#define xmmACC 3 // Holds ACC -#define xmmMax 4 // Holds mVU_maxvals -#define xmmMin 5 // Holds mVU_minvals -#define xmmT2 6 // Temp Reg? +#define xmmT2 3 // Temp Reg? +#define xmmT3 4 // Temp Reg? +#define xmmT4 5 // Temp Reg? +#define xmmACC 6 // Holds ACC #define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs #define gprT1 0 // Temp Reg @@ -141,7 +141,7 @@ declareAllVariables // Recursive Inline #ifndef __LINUX__ -#define __recInline __forceinline +#define __recInline __releaseinline #else #define __recInline inline #endif diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 81534987f1..5cf883e359 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -26,12 +26,12 @@ void mVUclamp1(int reg, int regT1, int xyzw) { switch (xyzw) { case 1: case 2: case 4: case 8: - SSE_MINSS_XMM_to_XMM(reg, xmmMax); - SSE_MAXSS_XMM_to_XMM(reg, xmmMin); + SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals); break; default: - SSE_MINPS_XMM_to_XMM(reg, xmmMax); - SSE_MAXPS_XMM_to_XMM(reg, xmmMin); + SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals); break; } } @@ -43,15 +43,15 @@ void mVUclamp2(int reg, int regT1, int xyzw) { case 1: case 2: case 4: case 8: SSE_MOVSS_XMM_to_XMM(regT1, reg); SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); - SSE_MINSS_XMM_to_XMM(reg, xmmMax); - SSE_MAXSS_XMM_to_XMM(reg, xmmMin); + SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXSS_M32_to_XMM(reg, (uptr)mVU_minvals); SSE_ORPS_XMM_to_XMM(reg, regT1); break; default: SSE_MOVAPS_XMM_to_XMM(regT1, reg); SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVU_signbit); - SSE_MINPS_XMM_to_XMM(reg, xmmMax); - SSE_MAXPS_XMM_to_XMM(reg, xmmMin); + SSE_MINPS_M128_to_XMM(reg, (uptr)mVU_maxvals); + SSE_MAXPS_M128_to_XMM(reg, (uptr)mVU_minvals); SSE_ORPS_XMM_to_XMM(reg, regT1); break; } @@ -294,8 +294,6 @@ microVUt(void) mVUbackupRegs(mV) { microVUt(void) mVUrestoreRegs(mV) { SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]); SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->xmmPQb[0]); - SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); - SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); MOV32ItoR(gprR, Roffset); // Restore gprR } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 380a2a4727..abae420b08 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -440,6 +440,47 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, int regT2, int xyzw, bool #define mVU_FMAC28(operation, OPname) { mVU_FMAC6 (operation, OPname); pass1 { sFLAG.doFlag = 0; } } #define mVU_FMAC29(operation, OPname) { mVU_FMAC3 (operation, OPname); pass1 { sFLAG.doFlag = 0; } } +#define opCase1 if (opCase == 1) // Normal +#define opCase2 if (opCase == 2) // BC Opcodes +#define opCase3 if (opCase == 3) // I Opcodes +#define opCase4 if (opCase == 4) // Q Opcodes + +#define shuffleXYZW(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4)))) + +static void (*SSE_PS[]) (x86SSERegType, x86SSERegType) = { + SSE_ADDPS_XMM_to_XMM, // 0 + SSE_SUBPS_XMM_to_XMM, // 1 + SSE_MULPS_XMM_to_XMM, // 2 + SSE_MAXPS_XMM_to_XMM, // 3 + SSE_MINPS_XMM_to_XMM // 4 +}; + +static void (*SSE_SS[]) (x86SSERegType, x86SSERegType) = { + SSE_ADDSS_XMM_to_XMM, // 0 + SSE_SUBSS_XMM_to_XMM, // 1 + SSE_MULSS_XMM_to_XMM, // 2 + SSE_MAXSS_XMM_to_XMM, // 3 + SSE_MINSS_XMM_to_XMM // 4 +}; + +void mVU_FMACa(microVU* mVU, int opCase, int opType, bool updateFlags) { + int Fs, Ft; + opCase1 { Ft = mVU->regAlloc->allocReg(_Ft_); if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } } + opCase2 { Ft = mVU->regAlloc->allocReg(_Ft_); mVU->regAlloc->clearNeeded(Ft); Ft = mVU->regAlloc->allocReg(); } + opCase3 { Ft = mVU->regAlloc->allocReg(); getIreg(Ft, 1); } + opCase4 { Ft = mVU->regAlloc->allocReg(); getQreg(Ft); } + + Fs = mVU->regAlloc->allocReg(_Fs_, 1, _X_Y_Z_W, _Fd_); + + if (_XYZW_SS) SSE_SS[opType](Fs, Ft); + else SSE_PS[opType](Fs, Ft); + + opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleXYZW(_X_Y_Z_W)); } } + + mVU->regAlloc->clearNeeded(Ft); + mVU->regAlloc->writeBackReg(Fs); +} + //------------------------------------------------------------------ // Micro VU Micromode Upper instructions //------------------------------------------------------------------