diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index 951520e58c..054585f071 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -363,8 +363,6 @@ template< typename T > void xWrite( T val ); bool operator==( const xRegisterSSE& src ) const { return this->Id == src.Id; } bool operator!=( const xRegisterSSE& src ) const { return this->Id != src.Id; } - void operator=( xRegisterSSE src ) { Id = src.Id; } - xRegisterSSE& operator++() { ++Id &= (iREGCNT_XMM-1); @@ -376,6 +374,14 @@ template< typename T > void xWrite( T val ); --Id &= (iREGCNT_XMM-1); return *this; } + + static const xRegisterSSE* const m_tbl_xmmRegs[iREGCNT_XMM]; + + static const xRegisterSSE& GetInstance(uint id) + { + pxAssume(id < iREGCNT_XMM); + return *m_tbl_xmmRegs[id]; + } }; class xRegisterCL : public xRegister8 diff --git a/common/src/x86emitter/x86emitter.cpp b/common/src/x86emitter/x86emitter.cpp index 610a2a0347..8999d9dc14 100644 --- a/common/src/x86emitter/x86emitter.cpp +++ b/common/src/x86emitter/x86emitter.cpp @@ -138,6 +138,14 @@ const xRegister8 const xRegisterCL cl; +const xRegisterSSE *const xRegisterSSE::m_tbl_xmmRegs[iREGCNT_XMM] = +{ + &xmm0, &xmm1, + &xmm2, &xmm3, + &xmm4, &xmm5, + &xmm6, &xmm7 +}; + const char *const x86_regnames_gpr8[8] = { "al", "cl", "dl", "bl", diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index ba491a36bd..e2d38fa638 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -36,7 +36,7 @@ _f static x32 getFlagReg(int fInst) } } -_f void setBitSFLAG(x32 reg, x32 regT, int bitTest, int bitSet) +_f void setBitSFLAG(const x32& reg, const x32& regT, int bitTest, int bitSet) { xTEST(regT, bitTest); xForwardJZ8 skip; @@ -44,7 +44,7 @@ _f void setBitSFLAG(x32 reg, x32 regT, int bitTest, int bitSet) skip.SetTarget(); } -_f void setBitFSEQ(x32 reg, int bitX) +_f void setBitFSEQ(const x32& reg, int bitX) { xTEST(reg, bitX); xForwardJump8 skip(Jcc_Zero); @@ -52,18 +52,18 @@ _f void setBitFSEQ(x32 reg, int bitX) skip.SetTarget(); } -_f void mVUallocSFLAGa(x32 reg, int fInstance) +_f void mVUallocSFLAGa(const x32& reg, int fInstance) { xMOV(reg, getFlagReg(fInstance)); } -_f void mVUallocSFLAGb(x32 reg, int fInstance) +_f void mVUallocSFLAGb(const x32& reg, int fInstance) { xMOV(getFlagReg(fInstance), reg); } // Normalize Status Flag -_f void mVUallocSFLAGc(x32 reg, x32 regT, int fInstance) +_f void mVUallocSFLAGc(const x32& reg, const x32& regT, int fInstance) { xXOR(reg, reg); mVUallocSFLAGa(regT, fInstance); @@ -107,25 +107,25 @@ _f void mVUallocSFLAGd(u32* memAddr, bool setAllflags) { } } -_f void mVUallocMFLAGa(mV, x32 reg, int fInstance) +_f void mVUallocMFLAGa(mV, const x32& reg, int fInstance) { xMOVZX(reg, ptr16[&mVU->macFlag[fInstance]]); } -_f void mVUallocMFLAGb(mV, x32 reg, int fInstance) +_f void mVUallocMFLAGb(mV, const x32& reg, int fInstance) { //xAND(reg, 0xffff); if (fInstance < 4) xMOV(ptr32[&mVU->macFlag[fInstance]], reg); // microVU else xMOV(ptr32[&mVU->regs->VI[REG_MAC_FLAG].UL], reg); // macroVU } -_f void mVUallocCFLAGa(mV, x32 reg, int fInstance) +_f void mVUallocCFLAGa(mV, const x32& reg, int fInstance) { if (fInstance < 4) xMOV(reg, ptr32[&mVU->clipFlag[fInstance]]); // microVU else xMOV(reg, ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL]); // macroVU } -_f void mVUallocCFLAGb(mV, x32 reg, int fInstance) +_f void mVUallocCFLAGb(mV, const x32& reg, int fInstance) { if (fInstance < 4) xMOV(ptr32[&mVU->clipFlag[fInstance]], reg); // microVU else xMOV(ptr32[&mVU->regs->VI[REG_CLIP_FLAG].UL], reg); // macroVU @@ -135,7 +135,7 @@ _f void mVUallocCFLAGb(mV, x32 reg, int fInstance) // VI Reg Allocators //------------------------------------------------------------------ -_f void mVUallocVIa(mV, x32 GPRreg, int _reg_, bool signext = false) +_f void mVUallocVIa(mV, const x32& GPRreg, int _reg_, bool signext = false) { if (!_reg_) xXOR(GPRreg, GPRreg); @@ -146,7 +146,7 @@ _f void mVUallocVIa(mV, x32 GPRreg, int _reg_, bool signext = false) xMOVZX(GPRreg, ptr16[&mVU->regs->VI[_reg_].UL]); } -_f void mVUallocVIb(mV, x32 GPRreg, int _reg_) +_f void mVUallocVIb(mV, const x32& GPRreg, int _reg_) { if (mVUlow.backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) xMOVZX(gprT3, ptr16[&mVU->regs->VI[_reg_].UL]); @@ -160,19 +160,19 @@ _f void mVUallocVIb(mV, x32 GPRreg, int _reg_) // P/Q Reg Allocators //------------------------------------------------------------------ -_f void getPreg(mV, xmm reg) +_f void getPreg(mV, const xmm& reg) { mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -_f void getQreg(xmm reg, int qInstance) +_f void getQreg(const xmm& reg, int qInstance) { mVUunpack_xyzw(reg, xmmPQ, qInstance); /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -_f void writeQreg(xmm reg, int qInstance) +_f void writeQreg(const xmm& reg, int qInstance) { if (qInstance) { if (!x86caps.hasStreamingSIMD4Extensions) { diff --git a/pcsx2/x86/microVU_Clamp.inl b/pcsx2/x86/microVU_Clamp.inl index a72a7568c1..c2a77056cf 100644 --- a/pcsx2/x86/microVU_Clamp.inl +++ b/pcsx2/x86/microVU_Clamp.inl @@ -34,7 +34,7 @@ const __aligned16 u32 sse4_maxvals[2][4] = { // gotten a NaN value, then something went wrong; and the NaN's sign // is not to be trusted. Games like positive values better usually, // and its faster... so just always make NaNs into positive infinity. -void mVUclamp1(xmm reg, xmm regT1, int xyzw, bool bClampE = 0) { +void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) { if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) { switch (xyzw) { case 1: case 2: case 4: case 8: @@ -54,7 +54,7 @@ void mVUclamp1(xmm reg, xmm regT1, int xyzw, bool bClampE = 0) { // Note 2: Using regalloc here seems to contaminate some regs in certain games. // Must be some specific case I've overlooked (or I used regalloc improperly on an opcode) // so we just use a temporary mem location for our backup for now... (non-sse4 version only) -void mVUclamp2(microVU* mVU, xmm reg, xmm regT1in, int xyzw, bool bClampE = 0) { +void mVUclamp2(microVU* mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) { if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) { if (x86caps.hasStreamingSIMD4Extensions) { int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1; @@ -62,8 +62,8 @@ void mVUclamp2(microVU* mVU, xmm reg, xmm regT1in, int xyzw, bool bClampE = 0) { xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]); return; } - //xmm regT1 = regT1b ? mVU->regAlloc->allocReg() : regT1in; - xmm regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in; + //const xmm& regT1 = regT1b ? mVU->regAlloc->allocReg() : regT1in; + const xmm& regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in; if (regT1 != regT1in) xMOVAPS(ptr128[mVU->xmmCTemp], regT1); switch (xyzw) { case 1: case 2: case 4: case 8: @@ -88,7 +88,7 @@ void mVUclamp2(microVU* mVU, xmm reg, xmm regT1in, int xyzw, bool bClampE = 0) { } // Used for operand clamping on every SSE instruction (add/sub/mul/div) -void mVUclamp3(microVU* mVU, xmm reg, xmm regT1, int xyzw) { +void mVUclamp3(microVU* mVU, const xmm& reg, const xmm& regT1, int xyzw) { if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1); } @@ -98,6 +98,6 @@ void mVUclamp3(microVU* mVU, xmm reg, xmm regT1, int xyzw) { // emulated opcodes (causing crashes). Since we're clamping the operands // with mVUclamp3, we should almost never be getting a NaN result, // but this clamp is just a precaution just-in-case. -void mVUclamp4(xmm reg, xmm regT1, int xyzw) { +void mVUclamp4(const xmm& reg, const xmm& regT1, int xyzw) { if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1); } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 82e45c336c..8ad7dcb94b 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -134,25 +134,32 @@ void doIbit(mV) { void doSwapOp(mV) { if (mVUinfo.backupVF && !mVUlow.noWriteVF) { DevCon.WriteLn(Color_Green, "microVU%d: Backing Up VF Reg [%04x]", getIndex, xPC); - xmm t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg); - xmm t2 = mVU->regAlloc->allocReg(); - xMOVAPS(t2, t1); - mVU->regAlloc->clearNeeded(t1); + + const xmm& t2 = mVU->regAlloc->allocReg(); + + { + const xmm& t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg); + xMOVAPS(t2, t1); + mVU->regAlloc->clearNeeded(t1); + } mVUopL(mVU, 1); - t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0); - xXOR.PS(t2, t1); - xXOR.PS(t1, t2); - xXOR.PS(t2, t1); - mVU->regAlloc->clearNeeded(t1); + { + const xmm& t1 = mVU->regAlloc->allocReg(mVUlow.VF_write.reg, mVUlow.VF_write.reg, 0xf, 0); + xXOR.PS(t2, t1); + xXOR.PS(t1, t2); + xXOR.PS(t2, t1); + mVU->regAlloc->clearNeeded(t1); + } incPC(1); doUpperOp(); - - t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf); - xMOVAPS(t1, t2); - mVU->regAlloc->clearNeeded(t1); + { + const xmm& t1 = mVU->regAlloc->allocReg(-1, mVUlow.VF_write.reg, 0xf); + xMOVAPS(t1, t2); + mVU->regAlloc->clearNeeded(t1); + } mVU->regAlloc->clearNeeded(t2); } else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } @@ -330,8 +337,8 @@ void mVUtestCycles(microVU* mVU) { // xFowardJZ32 vu0jmp; // xMOV(gprT2, (uptr)mVU); // xCALL(mVUwarning0); // VU0 is allowed early exit for COP2 Interlock Simulation - mVUsavePipelineState(mVU); - mVUendProgram(mVU, NULL, 0); + mVUsavePipelineState(mVU); + mVUendProgram(mVU, NULL, 0); // vu0jmp.SetTarget(); } else { diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index a6e748f464..24034f7b8d 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -204,18 +204,19 @@ public: } void reset() { for (int i = 0; i < xmmTotal; i++) { - clearReg(xmm(i)); + clearReg(i); } counter = 0; } void flushAll(bool clearState = 1) { for (int i = 0; i < xmmTotal; i++) { writeBackReg(xmm(i)); - if (clearState) clearReg(xmm(i)); + if (clearState) clearReg(i); } } - void clearReg(xmm reg) { - microMapXMM& clear( xmmMap[reg.Id] ); + void clearReg(const xmm& reg) { clearReg(reg.Id); } + void clearReg(int regId) { + microMapXMM& clear( xmmMap[regId] ); clear.VFreg = -1; clear.count = 0; clear.xyzw = 0; @@ -223,10 +224,10 @@ public: } void clearRegVF(int VFreg) { for (int i = 0; i < xmmTotal; i++) { - if (xmmMap[i].VFreg == VFreg) clearReg(xmm(i)); + if (xmmMap[i].VFreg == VFreg) clearReg(i); } } - void writeBackReg(xmm reg, bool invalidateRegs = 1) { + void writeBackReg(const xmm& reg, bool invalidateRegs = 1) { microMapXMM& write( xmmMap[reg.Id] ); if ((write.VFreg > 0) && write.xyzw) { // Reg was modified and not Temp or vf0 @@ -239,7 +240,7 @@ public: if ((i == reg.Id) || imap.isNeeded) continue; if (imap.VFreg == write.VFreg) { if (imap.xyzw && imap.xyzw < 0xf) DevCon.Error("microVU Error: writeBackReg() [%d]", imap.VFreg); - clearReg(xmm(i)); // Invalidate any Cached Regs of same vf Reg + clearReg(i); // Invalidate any Cached Regs of same vf Reg } } } @@ -252,7 +253,7 @@ public: } clearReg(reg); // Clear Reg } - void clearNeeded(xmm reg) + void clearNeeded(const xmm& reg) { if ((reg.Id < 0) || (reg.Id >= xmmTotal)) return; @@ -273,7 +274,7 @@ public: imap.count = counter; mergeRegs = 2; } - else clearReg(xmm(i)); + else clearReg(i); } } if (mergeRegs == 2) clearReg(reg); // Clear Current Reg if Merged @@ -282,11 +283,11 @@ public: else clearReg(reg); // If Reg was temp or vf0, then invalidate itself } } - xmm allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) { + const xmm& allocReg(int vfLoadReg = -1, int vfWriteReg = -1, int xyzw = 0, bool cloneWrite = 1) { counter++; if (vfLoadReg >= 0) { // Search For Cached Regs for (int i = 0; i < xmmTotal; i++) { - xmm xmmi(i); + const xmm& xmmi(xmm::GetInstance(i)); microMapXMM& imap (xmmMap[i]); if ((imap.VFreg == vfLoadReg) && (!imap.xyzw // Reg Was Not Modified || (imap.VFreg && (imap.xyzw==0xf)))) { // Reg Had All Vectors Modified and != VF0 @@ -294,7 +295,7 @@ public: if (vfWriteReg >= 0) { // Reg will be modified if (cloneWrite) { // Clone Reg so as not to use the same Cached Reg z = findFreeReg(); - xmm xmmz(z); + const xmm& xmmz(xmm::GetInstance(z)); writeBackReg(xmmz); if (z!=i && xyzw==8) xMOVAPS (xmmz, xmmi); else if (xyzw == 4) xPSHUF.D(xmmz, xmmi, 1); @@ -314,12 +315,12 @@ public: } xmmMap[z].count = counter; xmmMap[z].isNeeded = 1; - return xmm(z); + return xmm::GetInstance(z); } } } int x = findFreeReg(); - xmm xmmx(x); + const xmm& xmmx = xmm::GetInstance(x); writeBackReg(xmmx); if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading) diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 503bf2ef07..eadfa82621 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -24,7 +24,7 @@ //------------------------------------------------------------------ // Test if Vector is +/- Zero -_f static void testZero(xmm xmmReg, xmm xmmTemp, x32 gprTemp) +_f static void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprTemp) { xXOR.PS(xmmTemp, xmmTemp); xCMPEQ.SS(xmmTemp, xmmReg); @@ -36,7 +36,7 @@ _f static void testZero(xmm xmmReg, xmm xmmTemp, x32 gprTemp) } // Test if Vector is Negative (Set Flags and Makes Positive) -_f static void testNeg(mV, xmm xmmReg, x32 gprTemp) +_f static void testNeg(mV, const xmm& xmmReg, const x32& gprTemp) { xMOVMSKPS(gprTemp, xmmReg); xTEST(gprTemp, 1); @@ -52,8 +52,8 @@ mVUop(mVU_DIV) { xmm Ft; if (_Ftf_) Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); else Ft = mVU->regAlloc->allocReg(_Ft_); - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - xmm t1 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& t1 = mVU->regAlloc->allocReg(); testZero(Ft, t1, gprT1); // Test if Ft is zero xForwardJZ8 cjmp; // Skip if not zero @@ -89,7 +89,7 @@ mVUop(mVU_DIV) { mVUop(mVU_SQRT) { pass1 { mVUanalyzeFDIV(mVU, 0, 0, _Ft_, _Ftf_, 7); } pass2 { - xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); + const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); xMOV(ptr32[&mVU->divFlag], 0); // Clear I/D flags testNeg(mVU, Ft, gprT1); // Check for negative sqrt @@ -106,9 +106,9 @@ mVUop(mVU_SQRT) { mVUop(mVU_RSQRT) { pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 13); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); - xmm t1 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); + const xmm& t1 = mVU->regAlloc->allocReg(); xMOV(ptr32[&mVU->divFlag], 0); // Clear I/D flags testNeg(mVU, Ft, gprT1); // Check for negative sqrt @@ -156,7 +156,7 @@ mVUop(mVU_RSQRT) { } // ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d) -_f static void mVU_EATAN_(mV, xmm PQ, xmm Fs, xmm t1, xmm t2) { +_f static void mVU_EATAN_(mV, const xmm& PQ, const xmm& Fs, const xmm& t1, const xmm& t2) { xMOVSS(PQ, Fs); xMUL.SS(PQ, ptr32[&mVUglob.T1[0]]); xMOVAPS(t2, Fs); @@ -174,9 +174,9 @@ _f static void mVU_EATAN_(mV, xmm PQ, xmm Fs, xmm t1, xmm t2) { mVUop(mVU_EATAN) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 54); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - xmm t1 = mVU->regAlloc->allocReg(); - xmm t2 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& t1 = mVU->regAlloc->allocReg(); + const xmm& t2 = mVU->regAlloc->allocReg(); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xMOVSS (xmmPQ, Fs); xSUB.SS(Fs, ptr32[&mVUglob.one[0]]); @@ -193,9 +193,9 @@ mVUop(mVU_EATAN) { mVUop(mVU_EATANxy) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); } pass2 { - xmm t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); - xmm Fs = mVU->regAlloc->allocReg(); - xmm t2 = mVU->regAlloc->allocReg(); + const xmm& t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); + const xmm& Fs = mVU->regAlloc->allocReg(); + const xmm& t2 = mVU->regAlloc->allocReg(); xPSHUF.D(Fs, t1, 0x01); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xMOVSS (xmmPQ, Fs); @@ -213,9 +213,9 @@ mVUop(mVU_EATANxy) { mVUop(mVU_EATANxz) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 54); } pass2 { - xmm t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); - xmm Fs = mVU->regAlloc->allocReg(); - xmm t2 = mVU->regAlloc->allocReg(); + const xmm& t1 = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); + const xmm& Fs = mVU->regAlloc->allocReg(); + const xmm& t2 = mVU->regAlloc->allocReg(); xPSHUF.D(Fs, t1, 0x02); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xMOVSS (xmmPQ, Fs); @@ -240,9 +240,9 @@ mVUop(mVU_EATANxz) { mVUop(mVU_EEXP) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 44); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - xmm t1 = mVU->regAlloc->allocReg(); - xmm t2 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& t1 = mVU->regAlloc->allocReg(); + const xmm& t2 = mVU->regAlloc->allocReg(); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xMOVSS (xmmPQ, Fs); xMUL.SS (xmmPQ, ptr32[mVUglob.E1]); @@ -272,7 +272,7 @@ mVUop(mVU_EEXP) { } // sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2 -_f void mVU_sumXYZ(mV, xmm PQ, xmm Fs) { +_f void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) { if( x86caps.hasStreamingSIMD4Extensions ) { xDP.PS(Fs, Fs, 0x71); xMOVSS(PQ, Fs); @@ -290,7 +290,7 @@ _f void mVU_sumXYZ(mV, xmm PQ, xmm Fs) { mVUop(mVU_ELENG) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); xSQRT.SS (xmmPQ, xmmPQ); @@ -303,7 +303,7 @@ mVUop(mVU_ELENG) { mVUop(mVU_ERCPR) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xMOVSS (xmmPQ, Fs); xMOVSSZX (Fs, ptr32[&mVUglob.one[0]]); @@ -318,7 +318,7 @@ mVUop(mVU_ERCPR) { mVUop(mVU_ERLENG) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 24); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); xSQRT.SS (xmmPQ, xmmPQ); @@ -334,7 +334,7 @@ mVUop(mVU_ERLENG) { mVUop(mVU_ERSADD) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); xMOVSSZX (Fs, ptr32[&mVUglob.one[0]]); @@ -349,7 +349,7 @@ mVUop(mVU_ERSADD) { mVUop(mVU_ERSQRT) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 18); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xAND.PS (Fs, ptr128[&mVUglob.absclip[0]]); xSQRT.SS (xmmPQ, Fs); @@ -365,7 +365,7 @@ mVUop(mVU_ERSQRT) { mVUop(mVU_ESADD) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 11); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(mVU, xmmPQ, Fs); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back @@ -384,9 +384,9 @@ mVUop(mVU_ESADD) { mVUop(mVU_ESIN) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 29); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - xmm t1 = mVU->regAlloc->allocReg(); - xmm t2 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& t1 = mVU->regAlloc->allocReg(); + const xmm& t2 = mVU->regAlloc->allocReg(); xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xMOVSS (xmmPQ, Fs); xMOVAPS (t1, Fs); @@ -412,7 +412,7 @@ mVUop(mVU_ESIN) { mVUop(mVU_ESQRT) { pass1 { mVUanalyzeEFU1(mVU, _Fs_, _Fsf_, 12); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xPSHUF.D(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xAND.PS (Fs, ptr128[&mVUglob.absclip[0]]); xSQRT.SS(xmmPQ, Fs); @@ -425,8 +425,8 @@ mVUop(mVU_ESQRT) { mVUop(mVU_ESUM) { pass1 { mVUanalyzeEFU2(mVU, _Fs_, 12); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); - xmm t1 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& t1 = mVU->regAlloc->allocReg(); xPSHUF.D (xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance xPSHUF.D (t1, Fs, 0x1b); SSE_ADDPS(mVU, Fs, t1); @@ -719,7 +719,7 @@ mVUop(mVU_ISUBIU) { mVUop(mVU_MFIR) { pass1 { if (!_Ft_) { mVUlow.isNOP = 1; } analyzeVIreg1(_Is_, mVUlow.VI_read[0]); analyzeReg2(_Ft_, mVUlow.VF_write, 1); } pass2 { - xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); mVUallocVIa(mVU, eax, _Is_); xMOVSX(eax, ax); xMOVDZX(Ft, eax); @@ -732,7 +732,7 @@ mVUop(mVU_MFIR) { mVUop(mVU_MFP) { pass1 { mVUanalyzeMFP(mVU, _Ft_); } pass2 { - xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); getPreg(mVU, Ft); mVU->regAlloc->clearNeeded(Ft); } @@ -742,7 +742,7 @@ mVUop(mVU_MFP) { mVUop(mVU_MOVE) { pass1 { mVUanalyzeMOVE(mVU, _Fs_, _Ft_); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W); mVU->regAlloc->clearNeeded(Fs); } pass3 { mVUlog("MOVE.%s vf%02d, vf%02d", _XYZW_String, _Ft_, _Fs_); } @@ -751,8 +751,8 @@ mVUop(mVU_MOVE) { mVUop(mVU_MR32) { pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_); - xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_); + const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0)))); else xPSHUF.D(Ft, Fs, 0x39); mVU->regAlloc->clearNeeded(Ft); @@ -764,7 +764,7 @@ mVUop(mVU_MR32) { mVUop(mVU_MTIR) { pass1 { if (!_It_) { mVUlow.isNOP = 1; } analyzeReg5(_Fs_, _Fsf_, mVUlow.VF_read[0]); analyzeVIreg2(_It_, mVUlow.VI_write, 1); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xMOVD(gprT1, Fs); mVUallocVIb(mVU, gprT1, _It_); mVU->regAlloc->clearNeeded(Fs); @@ -868,7 +868,7 @@ mVUop(mVU_LQ) { } else ptr += getVUmem(_Imm11_); - xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); mVUloadReg(Ft, ptr, _X_Y_Z_W); mVU->regAlloc->clearNeeded(Ft); } @@ -887,7 +887,7 @@ mVUop(mVU_LQD) { ptr += ecx; } if (!mVUlow.noWriteVF) { - xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); mVUloadReg(Ft, ptr, _X_Y_Z_W); mVU->regAlloc->clearNeeded(Ft); } @@ -908,7 +908,7 @@ mVUop(mVU_LQI) { ptr += ecx; } if (!mVUlow.noWriteVF) { - xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); mVUloadReg(Ft, ptr, _X_Y_Z_W); mVU->regAlloc->clearNeeded(Ft); } @@ -932,7 +932,7 @@ mVUop(mVU_SQ) { } else ptr += getVUmem(_Imm11_); - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1); mVU->regAlloc->clearNeeded(Fs); } @@ -950,7 +950,7 @@ mVUop(mVU_SQD) { mVUaddrFix(mVU, ecx); ptr += ecx; } - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1); mVU->regAlloc->clearNeeded(Fs); } @@ -969,7 +969,7 @@ mVUop(mVU_SQI) { mVUaddrFix(mVU, ecx); ptr += ecx; } - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); mVUsaveReg(Fs, ptr, _X_Y_Z_W, 1); mVU->regAlloc->clearNeeded(Fs); } @@ -984,7 +984,7 @@ mVUop(mVU_RINIT) { pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); } pass2 { if (_Fs_ || (_Fsf_ == 3)) { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xMOVD(gprT1, Fs); xAND(gprT1, 0x007fffff); xOR (gprT1, 0x3f800000); @@ -996,9 +996,9 @@ mVUop(mVU_RINIT) { pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); } } -_f void mVU_RGET_(mV, x32 Rreg) { +_f void mVU_RGET_(mV, const x32& Rreg) { if (!mVUlow.noWriteVF) { - xmm Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); xMOVDZX(Ft, Rreg); if (!_XYZW_SS) mVUunpack_xyzw(Ft, Ft, 0); mVU->regAlloc->clearNeeded(Ft); @@ -1039,7 +1039,7 @@ mVUop(mVU_RXOR) { pass1 { mVUanalyzeR1(mVU, _Fs_, _Fsf_); } pass2 { if (_Fs_ || (_Fsf_ == 3)) { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); xMOVD(gprT1, Fs); xAND(gprT1, 0x7fffff); xXOR(ptr32[Rmem], gprT1); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 2ff239fd2c..e0bfb057c7 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -93,21 +93,20 @@ typedef xRegister32 x32; #define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12))) #define offsetReg ((_X) ? (0) : ((_Y) ? (1) : ((_Z) ? 2: 3))) -const xmm - xmmT1 = xmm(0), // Used for regAlloc - xmmT2 = xmm(1), // Used for regAlloc - xmmT3 = xmm(2), // Used for regAlloc - xmmT4 = xmm(3), // Used for regAlloc - xmmT5 = xmm(4), // Used for regAlloc - xmmT6 = xmm(5), // Used for regAlloc - xmmT7 = xmm(6), // Used for regAlloc - xmmPQ = xmm(7); // Holds the Value and Backup Values of P and Q regs +#define xmmT1 xmm0 // Used for regAlloc +#define xmmT2 xmm1 // Used for regAlloc +#define xmmT3 xmm2 // Used for regAlloc +#define xmmT4 xmm3 // Used for regAlloc +#define xmmT5 xmm4 // Used for regAlloc +#define xmmT6 xmm5 // Used for regAlloc +#define xmmT7 xmm6 // Used for regAlloc +#define xmmPQ xmm7 // Holds the Value and Backup Values of P and Q regs -const x32 - gprT1 = x32(0), // eax - Temp Reg - gprT2 = x32(1), // ecx - Temp Reg - gprT3 = x32(2), // edx - Temp Reg - gprF[4] = {x32(3), x32(5), x32(6), x32(7)}; // ebx, ebp, esi, edi - Status Flags +#define gprT1 eax // eax - Temp Reg +#define gprT2 ecx // ecx - Temp Reg +#define gprT3 edx // edx - Temp Reg + +const x32 gprF[4] = {x32(3), x32(5), x32(6), x32(7)}; // ebx, ebp, esi, edi - Status Flags // Function Params #define mP microVU* mVU, int recPass @@ -297,7 +296,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); } \ } -void mVUmergeRegs(xmm dest, xmm src, int xyzw, bool modXYZW=false); -void mVUsaveReg(xmm reg, xAddressVoid ptr, int xyzw, bool modXYZW); -void mVUloadReg(xmm reg, xAddressVoid ptr, int xyzw); -void mVUloadIreg(xmm reg, int xyzw, VURegs* vuRegs); \ No newline at end of file +extern void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW=false); +extern void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW); +extern void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw); +extern void mVUloadIreg(const xmm& reg, int xyzw, VURegs* vuRegs); \ No newline at end of file diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 6f69905605..b4c503fdb6 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -19,7 +19,7 @@ // Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging... //------------------------------------------------------------------ -void mVUunpack_xyzw(xmm dstreg, xmm srcreg, int xyzw) +void mVUunpack_xyzw(const xmm& dstreg, const xmm& srcreg, int xyzw) { switch ( xyzw ) { case 0: xPSHUF.D(dstreg, srcreg, 0x00); break; // XXXX @@ -29,7 +29,7 @@ void mVUunpack_xyzw(xmm dstreg, xmm srcreg, int xyzw) } } -void mVUloadReg(xmm reg, xAddressVoid ptr, int xyzw) +void mVUloadReg(const xmm& reg, xAddressVoid ptr, int xyzw) { switch( xyzw ) { case 8: xMOVSSZX(reg, ptr32[ptr]); break; // X @@ -40,14 +40,14 @@ void mVUloadReg(xmm reg, xAddressVoid ptr, int xyzw) } } -void mVUloadIreg(xmm reg, int xyzw, VURegs* vuRegs) +void mVUloadIreg(const xmm& reg, int xyzw, VURegs* vuRegs) { xMOVSSZX(reg, ptr32[&vuRegs->VI[REG_I].UL]); if (!_XYZWss(xyzw)) xSHUF.PS(reg, reg, 0); } // Modifies the Source Reg! -void mVUsaveReg(xmm reg, xAddressVoid ptr, int xyzw, bool modXYZW) +void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW) { /*xMOVAPS(xmmT2, ptr128[ptr]); if (modXYZW && (xyzw == 8 || xyzw == 4 || xyzw == 2 || xyzw == 1)) { @@ -143,7 +143,7 @@ void mVUsaveReg(xmm reg, xAddressVoid ptr, int xyzw, bool modXYZW) } // Modifies the Source Reg! (ToDo: Optimize modXYZW = 1 cases) -void mVUmergeRegs(xmm dest, xmm src, int xyzw, bool modXYZW) +void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW) { xyzw &= 0xf; if ( (dest != src) && (xyzw != 0) ) { @@ -214,7 +214,7 @@ void mVUmergeRegs(xmm dest, xmm src, int xyzw, bool modXYZW) //------------------------------------------------------------------ // Transforms the Address in gprReg to valid VU0/VU1 Address -_f void mVUaddrFix(mV, x32 gprReg) +_f void mVUaddrFix(mV, const x32& gprReg) { if (isVU1) { xAND(gprReg, 0x3ff); // wrap around @@ -259,10 +259,10 @@ static const __aligned16 SSEMaskPair MIN_MAX = // Warning: Modifies t1 and t2 -void MIN_MAX_PS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in, bool min) +void MIN_MAX_PS(microVU* mVU, const xmm& to, const xmm& from, const xmm& t1in, const xmm& t2in, bool min) { - xmm t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in; - xmm t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in; + const xmm& t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in; + const xmm& t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in; // ZW xPSHUF.D(t1, to, 0xfa); xPAND (t1, ptr128[MIN_MAX.mask1]); @@ -289,9 +289,9 @@ void MIN_MAX_PS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in, bool min) } // Warning: Modifies to's upper 3 vectors, and t1 -void MIN_MAX_SS(mV, xmm to, xmm from, xmm t1in, bool min) +void MIN_MAX_SS(mV, const xmm& to, const xmm& from, const xmm& t1in, bool min) { - xmm t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in; + const xmm& t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in; xSHUF.PS(to, from, 0); xPAND (to, ptr128[MIN_MAX.mask1]); xPOR (to, ptr128[MIN_MAX.mask2]); @@ -302,10 +302,10 @@ void MIN_MAX_SS(mV, xmm to, xmm from, xmm t1in, bool min) } // Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2 -void ADD_SS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in) +void ADD_SS(microVU* mVU, const xmm& to, const xmm& from, const xmm& t1in, const xmm& t2in) { - xmm t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in; - xmm t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in; + const xmm& t1 = t1in.IsEmpty() ? mVU->regAlloc->allocReg() : t1in; + const xmm& t2 = t2in.IsEmpty() ? mVU->regAlloc->allocReg() : t2in; xMOVAPS(t1, to); xMOVAPS(t2, from); @@ -379,66 +379,66 @@ void ADD_SS(microVU* mVU, xmm to, xmm from, xmm t1in, xmm t2in) mVUclamp4(to, t1, (isPS)?0xf:0x8); \ } -void SSE_MAXPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_MAXPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { if (CHECK_VU_MINMAXHACK) { xMAX.PS(to, from); } else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); } } -void SSE_MINPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_MINPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { if (CHECK_VU_MINMAXHACK) { xMIN.PS(to, from); } else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); } } -void SSE_MAXSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_MAXSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { if (CHECK_VU_MINMAXHACK) { xMAX.SS(to, from); } else { MIN_MAX_SS(mVU, to, from, t1, 0); } } -void SSE_MINSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_MINSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { if (CHECK_VU_MINMAXHACK) { xMIN.SS(to, from); } else { MIN_MAX_SS(mVU, to, from, t1, 1); } } -void SSE_ADD2SS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_ADD2SS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { if (!CHECK_VUADDSUBHACK) { clampOp(xADD.SS, 0); } else { ADD_SS(mVU, to, from, t1, t2); } } // FIXME: why do we need two identical definitions with different names? -void SSE_ADD2PS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_ADD2PS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xADD.PS, 1); } -void SSE_ADDPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_ADDPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xADD.PS, 1); } -void SSE_ADDSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_ADDSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xADD.SS, 0); } -void SSE_SUBPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_SUBPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xSUB.PS, 1); } -void SSE_SUBSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_SUBSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xSUB.SS, 0); } -void SSE_MULPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_MULPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xMUL.PS, 1); } -void SSE_MULSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_MULSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xMUL.SS, 0); } -void SSE_DIVPS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_DIVPS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xDIV.PS, 1); } -void SSE_DIVSS(mV, xmm to, xmm from, xmm t1 = xEmptyReg, xmm t2 = xEmptyReg) +void SSE_DIVSS(mV, const xmm& to, const xmm& from, const xmm& t1 = xEmptyReg, const xmm& t2 = xEmptyReg) { clampOp(xDIV.SS, 0); } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 8f0453cfef..5f6cfd5853 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -24,28 +24,32 @@ #define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { xSHL(gprReg, ADD_XYZW); } } // Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations -static void mVUupdateFlags(mV, xmm reg, xmm regT1 = xEmptyReg, xmm regT2 = xEmptyReg, bool modXYZW = 1) { - x32 mReg = gprT1, sReg = getFlagReg(sFLAG.write); - bool regT1b = false, regT2b = false; +static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, const xmm& regT2in = xEmptyReg, bool modXYZW = 1) { + const x32& mReg = gprT1; + const x32& sReg = getFlagReg(sFLAG.write); + bool regT1b = regT1in.IsEmpty(), regT2b = false; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); if (!sFLAG.doFlag && !mFLAG.doFlag) { return; } - if (regT1.IsEmpty()) { - regT1 = mVU->regAlloc->allocReg(); - regT1b = true; - } + const xmm& regT1 = regT1b ? mVU->regAlloc->allocReg() : regT1in; - if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) { - if (regT2.IsEmpty()) { - regT2 = mVU->regAlloc->allocReg(); + xmm regT2 = reg; + if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) + { + regT2 = regT2in; + if (regT2.IsEmpty()) + { + regT2 = mVU->regAlloc->allocReg(); regT2b = true; } + xPSHUF.D(regT2, reg, 0x1B); // Flip wzyx to xyzw } else regT2 = reg; + if (sFLAG.doFlag) { mVUallocSFLAGa(sReg, sFLAG.lastWrite); // Get Prev Status Flag if (sFLAG.doNonSticky) xAND(sReg, 0xfffc00ff); // Clear O,U,S,Z flags @@ -58,12 +62,12 @@ static void mVUupdateFlags(mV, xmm reg, xmm regT1 = xEmptyReg, xmm regT2 = xEmpt xCMPEQ.PS(regT1, regT2); // Set all F's if each vector is zero xMOVMSKPS(gprT2, regT1); // Used for Zero Flag Calculation - xAND(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation + xAND(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation xSHL(mReg, 4 + ADD_XYZW); //-------------------------Check for Zero flags------------------------------ - xAND(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation + xAND(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation if (mFLAG.doFlag) { SHIFT_XYZW(gprT2); } xOR(mReg, gprT2); @@ -85,7 +89,7 @@ static void mVUupdateFlags(mV, xmm reg, xmm regT1 = xEmptyReg, xmm regT2 = xEmpt // Helper Macros and Functions //------------------------------------------------------------------ -static void (*SSE_PS[]) (microVU*, xmm, xmm, xmm, xmm) = { +static void (*const SSE_PS[]) (microVU*, const xmm&, const xmm&, const xmm&, const xmm&) = { SSE_ADDPS, // 0 SSE_SUBPS, // 1 SSE_MULPS, // 2 @@ -94,7 +98,7 @@ static void (*SSE_PS[]) (microVU*, xmm, xmm, xmm, xmm) = { SSE_ADD2PS // 5 }; -static void (*SSE_SS[]) (microVU*, xmm, xmm, xmm, xmm) = { +static void (*const SSE_SS[]) (microVU*, const xmm&, const xmm&, const xmm&, const xmm&) = { SSE_ADDSS, // 0 SSE_SUBSS, // 1 SSE_MULSS, // 2 @@ -131,7 +135,7 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) { bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) { opCase1 { if ((opType == 1) && (_Ft_ == _Fs_)) { - xmm Fs = mVU->regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(-1, isACC ? 32 : _Fd_, _X_Y_Z_W); xPXOR(Fs, Fs); // Set to Positive 0 mVUupdateFlags(mVU, Fs); mVU->regAlloc->clearNeeded(Fs); @@ -225,7 +229,7 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op if (_XYZW_SS && _X_Y_Z_W != 8) xPSHUF.D(ACC, ACC, shuffleSS(_X_Y_Z_W)); } else { - xmm tempACC = mVU->regAlloc->allocReg(); + const xmm& tempACC = mVU->regAlloc->allocReg(); xMOVAPS(tempACC, ACC); SSE_PS[opType](mVU, tempACC, Fs, tempFt, xEmptyReg); mVUmergeRegs(ACC, tempACC, _X_Y_Z_W); @@ -304,7 +308,7 @@ mVUop(mVU_ABS) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; - xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); xAND.PS(Fs, ptr128[mVUglob.absclip]); mVU->regAlloc->clearNeeded(Fs); } @@ -315,8 +319,8 @@ mVUop(mVU_ABS) { mVUop(mVU_OPMULA) { pass1 { mVUanalyzeFMAC1(mVU, 0, _Fs_, _Ft_); } pass2 { - xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 32, _X_Y_Z_W); xPSHUF.D(Fs, Fs, 0xC9); // WXZY xPSHUF.D(Ft, Ft, 0xD2); // WYXZ @@ -333,9 +337,9 @@ mVUop(mVU_OPMULA) { mVUop(mVU_OPMSUB) { pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); } pass2 { - xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf); - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); - xmm ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); + const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); + const xmm& ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); xPSHUF.D(Fs, Fs, 0xC9); // WXZY xPSHUF.D(Ft, Ft, 0xD2); // WYXZ @@ -356,9 +360,9 @@ static void mVU_FTOIx(mP, const float* addr, const char* opName) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; - xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); - xmm t1 = mVU->regAlloc->allocReg(); - xmm t2 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); + const xmm& t1 = mVU->regAlloc->allocReg(); + const xmm& t2 = mVU->regAlloc->allocReg(); // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() xMOVAPS(t1, Fs); @@ -383,7 +387,7 @@ static void mVU_ITOFx(mP, const float* addr, const char* opName) { pass1 { mVUanalyzeFMAC2(mVU, _Fs_, _Ft_); } pass2 { if (!_Ft_) return; - xmm Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, _Ft_, _X_Y_Z_W, !((_Fs_ == _Ft_) && (_X_Y_Z_W == 0xf))); xCVTDQ2PS(Fs, Fs); if (addr) { xMUL.PS(Fs, ptr128[addr]); } @@ -398,9 +402,9 @@ static void mVU_ITOFx(mP, const float* addr, const char* opName) { mVUop(mVU_CLIP) { pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); } pass2 { - xmm Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); - xmm Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1); - xmm t1 = mVU->regAlloc->allocReg(); + const xmm& Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); + const xmm& Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1); + const xmm& t1 = mVU->regAlloc->allocReg(); mVUunpack_xyzw(Ft, Ft, 0); mVUallocCFLAGa(mVU, gprT1, cFLAG.lastWrite); diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index 76967321c1..b69c6490e1 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -33,7 +33,7 @@ typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src); #include "newVif_BlockBuffer.h" #include "newVif_HashBucket.h" -extern void mVUmergeRegs(xRegisterSSE dest, xRegisterSSE src, int xyzw, bool modXYZW = 0); +extern void mVUmergeRegs(const xRegisterSSE& dest, const xRegisterSSE& src, int xyzw, bool modXYZW = 0); extern void _nVifUnpack (int idx, u8 *data, u32 size, bool isFill); extern void dVifUnpack (int idx, u8 *data, u32 size, bool isFill); extern void dVifReset (int idx);