diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 885c4e6fea..55dbc6278e 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -33,12 +33,12 @@ PCSX2_ALIGNED16(const u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; -PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = { 16.0, 16.0, 16.0, 16.0 }; -PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = { 4096.0, 4096.0, 4096.0, 4096.0 }; -PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = { 32768.0, 32768.0, 32768.0, 32768.0 }; -PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; -PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; -PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; +PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0}; +PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0}; +PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0}; +PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = {0.0625f, 0.0625f, 0.0625f, 0.0625f}; +PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = {0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625}; +PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125}; //------------------------------------------------------------------ @@ -270,6 +270,7 @@ __declspec(naked) void __fastcall endVU0(u32 startPC, u32 cycles) { pop ebx; ldmxcsr g_sseMXCSR + emms ret } diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 9c85fd7e10..9d717f2e47 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -38,19 +38,21 @@ struct microAllocInfo { u8 p; u8 r; u16 info[pSize];// bit 0 = NOP? - // bit 1 = Read Fd from backup memory? - // bit 2 = Read Fs from backup memory? - // bit 3 = Read Ft from backup memory? - // bit 4 = ACC1 or ACC2? + // bit 1 = Used with bit 2 to make a 2-bit key for ACC write instance + // bit 2 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) + // bit 3 = Used with bit 4 to make a 2-bit key for ACC read instance + // bit 4 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) // bit 5 = Read Q1/P1 or backup? // bit 6 = Write to Q2/P2? - // bit 7 = Write Fd/Acc/Result to backup memory? + // bit 7 = Write VI(Fd) Result to backup memory? // bit 8 = Update Mac Flags? // bit 9 = Update Status Flags? // bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance // bit 11 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) // bit 12 = Used with bit 13 to make a 2-bit key for status flag instance // bit 13 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) + // bit 14 = Read VI(Fs) from backup memory? + // bit 15 = Read VI(Ft) from backup memory? u32 curPC; }; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 9e4886820e..52c3bba95d 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -48,6 +48,7 @@ //------------------------------------------------------------------ // FMAC1 - Normal FMAC Opcodes //------------------------------------------------------------------ + microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) { microVU* mVU = mVUx; Fs = xmmFs; @@ -85,6 +86,7 @@ microVUt(void) mVUallocFMAC1b(int& Fd) { //------------------------------------------------------------------ // FMAC2 - ABS/FTOI/ITOF Opcodes //------------------------------------------------------------------ + microVUt(void) mVUallocFMAC2a(int& Fs, int& Ft) { microVU* mVU = mVUx; Fs = xmmFs; @@ -164,4 +166,127 @@ microVUt(void) mVUallocFMAC3b(int& Fd) { mVUallocFMAC1b(Fd); } +//------------------------------------------------------------------ +// FMAC4 - FMAC Opcodes Storing Result to ACC +//------------------------------------------------------------------ + +#define getReg4(reg, _reg_) { \ + mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (_XYZW_SS) ? 15 : _X_Y_Z_W); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (_XYZW_SS) ? 15 : _X_Y_Z_W); \ +} + +#define getZero4(reg) { \ + if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], (_XYZW_SS) ? 15 : _X_Y_Z_W); } \ + else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ +} + +#define getACC(reg) { \ + reg = xmmACC0 + writeACC; \ + if (_X_Y_Z_W != 15) { SSE_MOVAPS_XMM_to_XMM(reg, (xmmACC0 + prevACC)); } \ +} + +microVUt(void) mVUallocFMAC4a(int& ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + getACC(ACC); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZeroSS(Ft); } + else { getReg(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (_Ft_ == _Fs_) { Ft = Fs; } + else { + if (!_Ft_) { getZero4(Ft); } + else { getReg4(Ft, _Ft_); } + } + } +} + +microVUt(void) mVUallocFMAC4b(int& ACC, int& Fs) { + microVU* mVU = mVUx; + if (!_Fd_) return; + if (CHECK_VU_OVERFLOW) mVUclamp1(Fs, xmmT1, (_XYZW_SS && !_X) ? 15 : _X_Y_Z_W); + mVUmergeRegs(ACC, Fs, _X_Y_Z_W); +} + +//------------------------------------------------------------------ +// FMAC5 - FMAC BC(xyzw) Opcodes Storing Result to ACC +//------------------------------------------------------------------ + +microVUt(void) mVUallocFMAC5a(int& ACC, int& Fs, int& Ft) { + microVU* mVU = mVUx; + Fs = xmmFs; + Ft = xmmFt; + getACC(ACC); + if (_XYZW_SS && _X) { + if (!_Fs_) { getZeroSS(Fs); } + else { getReg(Fs, _Fs_); } + + if ( (_Ft_ == _Fs_) && _bc_x) { + Ft = Fs; + } + else { + if (!_Ft_) { getZero3SS(Ft); } + else { getReg3SS(Ft, _Ft_); } + } + } + else { + if (!_Fs_) { getZero4(Fs); } + else { getReg4(Fs, _Fs_); } + + if (!_Ft_) { getZero3(Ft); } + else { getReg3(Ft, _Ft_); } + } +} + +microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) { + mVUallocFMAC4b(ACC, Fs); +} + +//------------------------------------------------------------------ +// Flag Allocators +//------------------------------------------------------------------ + +#define getFlagReg(regX, fInst) { \ + switch (fInst) { \ + case 0: regX = gprF0; break; \ + case 1: regX = gprF1; break; \ + case 2: regX = gprF2; break; \ + case 3: regX = gprF3; break; \ + } \ +} + +microVUt(void) mVUallocSFLAGa(int reg, int fInstance) { + getFlagReg(fInstance, fInstance); + MOVZX32R16toR(reg, fInstance); +} + +microVUt(void) mVUallocSFLAGb(int reg, int fInstance) { + getFlagReg(fInstance, fInstance); + MOV32RtoR(fInstance, reg); +} + +microVUt(void) mVUallocMFLAGa(int reg, int fInstance) { + getFlagReg(fInstance, fInstance); + MOV32RtoR(reg, fInstance); + SHR32ItoR(reg, 16); +} + +microVUt(void) mVUallocMFLAGb(int reg, int fInstance) { + getFlagReg(fInstance, fInstance); + AND32ItoR(fInstance, 0xffff); + SHL32ItoR(reg, 16); + OR32RtoR(fInstance, reg); +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index b5f5870321..949f345648 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -63,20 +63,29 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define xmmT1 0 // Temp Reg #define xmmFs 1 // Holds the Value of Fs (writes back result Fd) #define xmmFt 2 // Holds the Value of Ft -#define xmmACC1 3 // Holds the Value of ACC -#define xmmACC2 4 // Holds the Backup Value of ACC -#define xmmPQ 5 // Holds the Value and Backup Values of P and Q regs -#define xmmVI 6 // Holds VI regs 8, 9, 10, 11, 12, 13, 14, and 15 -#define xmmF 7 // Holds 4 instances of the status and mac flags (macflagX4::statusflagX4) +#define xmmACC0 3 // Holds ACC Instance #0 +#define xmmACC1 4 // Holds ACC Instance #1 +#define xmmACC2 5 // Holds ACC Instance #2 +#define xmmACC3 6 // Holds ACC Instance #3 +#define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs + +#define mmxT1 0 // Temp Reg +#define mmxC 1 // Clip Flag? +#define mmxVI0 2 // Holds VI 00 to 03? +#define mmxVI1 3 // Holds VI 04 to 07? +#define mmxVI2 4 // Holds VI 08 to 11? +#define mmxVI3 5 // Holds VI 12 to 15? +#define mmxM 6 // ? +#define mmxS 7 // ? #define gprT1 0 // Temp Reg #define gprT2 1 // Temp Reg -#define gprT3 2 // Temp Reg -#define gprVI7 3 // VI 7 +#define gprT3 2 // Temp Reg? +#define gprF0 3 // MAC Flag::Status Flag 0 #define gprESP 4 // Don't use? -#define gprVI5 5 // VI 6::5 -#define gprVI3 6 // VI 4::3 -#define gprVI1 7 // VI 2::1 +#define gprF1 5 // MAC Flag::Status Flag 1 +#define gprF2 6 // MAC Flag::Status Flag 2 +#define gprF3 7 // MAC Flag::Status Flag 3 // Template Stuff #define mVUx (vuIndex ? µVU1 : µVU0) @@ -87,17 +96,20 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]); #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo -#define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0)) -#define getFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1)) -#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) -#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3)) -#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7)) -#define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8)) -#define doMac (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<8)) -#define doStatus (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<9)) -#define fmInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) + 4) -#define fsInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) + 0) -#define fpmInstance ((((u8)(mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) - 1) & 0x3) + 4) -#define fpsInstance ((((u8)(mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) - 1) & 0x3) + 0) +#define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0)) +#define writeACC ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1) +#define prevACC (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1) - 1) & 0x3) +//#define setACCreg ((mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1)) >> 1) +//#define setACCmem (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) +//#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7)) +#define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8)) +#define doMac (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<8)) +#define doStatus (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<9)) +#define fmInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) >> 10) +#define fsInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) >> 12) +#define fpmInstance (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) >> 10) - 1) & 0x3) +#define fpsInstance (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) >> 12) - 1) & 0x3) +//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) +//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3)) #include "microVU_Misc.inl" diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 6f5ba445a2..207c78536c 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -152,4 +152,62 @@ microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) { } } +// Modifies the Source Reg! +microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) { + xyzw &= 0xf; + if ( (dest != src) && (xyzw != 0) ) { + if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) { + xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); + SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw); + } + else { + switch (xyzw) { + case 1: SSE_MOVHLPS_XMM_to_XMM(src, dest); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4); + break; + case 2: SSE_MOVHLPS_XMM_to_XMM(src, dest); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64); + break; + case 3: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); + break; + case 4: SSE_MOVSS_XMM_to_XMM(src, dest); + SSE2_MOVSD_XMM_to_XMM(dest, src); + break; + case 5: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd8); + break; + case 6: SSE_SHUFPS_XMM_to_XMM(dest, src, 0x9c); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x78); + break; + case 7: SSE_MOVSS_XMM_to_XMM(src, dest); + SSE_MOVAPS_XMM_to_XMM(dest, src); + break; + case 8: SSE_MOVSS_XMM_to_XMM(dest, src); + break; + case 9: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc9); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd2); + break; + case 10: SSE_SHUFPS_XMM_to_XMM(dest, src, 0x8d); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x72); + break; + case 11: SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); + break; + case 12: SSE2_MOVSD_XMM_to_XMM(dest, src); + break; + case 13: SSE_MOVHLPS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, dest, 0x64); + SSE_MOVAPS_XMM_to_XMM(dest, src); + break; + case 14: SSE_MOVHLPS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, dest, 0xc4); + SSE_MOVAPS_XMM_to_XMM(dest, src); + break; + default: SSE_MOVAPS_XMM_to_XMM(dest, src); + break; + } + } + } +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Tables.inl b/pcsx2/x86/microVU_Tables.inl index cd568fe1f3..a0ea34cae0 100644 --- a/pcsx2/x86/microVU_Tables.inl +++ b/pcsx2/x86/microVU_Tables.inl @@ -24,7 +24,7 @@ //------------------------------------------------------------------ // Declarations //------------------------------------------------------------------ -#define mVUgetCode (vuIndex ? microVU1.regs->code : microVU0.regs->code) +#define mVUgetCode (vuIndex ? microVU1.code : microVU0.code) microVUf(void) mVU_UPPER_FD_00(); microVUf(void) mVU_UPPER_FD_01(); @@ -729,15 +729,15 @@ void (* mVU_UPPER_FD_11_TABLE11 [32])() = { //------------------------------------------------------------------ // Table Functions //------------------------------------------------------------------ -#define doTableStuff(tableName, args) { \ - if (recPass) { \ - if (vuIndex) tableName##11[ args ](); \ - else tableName##01[ args ](); \ - } \ - else { \ - if (vuIndex) tableName##10[ args ](); \ - else tableName##00[ args ](); \ - } \ +#define doTableStuff(tableName, args) { \ + if (recPass) { \ + if (vuIndex) tableName##11[ args ](); \ + else tableName##01[ args ](); \ + } \ + else { \ + if (vuIndex) tableName##10[ args ](); \ + else tableName##00[ args ](); \ + } \ } microVUf(void) mVU_UPPER_FD_00() { doTableStuff(mVU_UPPER_FD_00_TABLE, ((mVUgetCode >> 6) & 0x1f)); } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index b66891ed91..bc187fb29b 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -26,6 +26,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { microVU* mVU = mVUx; + int sReg, mReg = gprT1; static u8 *pjmp, *pjmp2; static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; @@ -35,8 +36,9 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { if (!doMac) { regT1 = reg; } else SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); // Flip wzyx to xyzw if (doStatus) { - SSE_PEXTRW_XMM_to_R32(gprT1, xmmF, fpsInstance); // Get Prev Status Flag - AND16ItoR(gprT1, 0xff0); // Keep Sticky and D/I flags + getFlagReg(sReg, fsInstance); // Set sReg to valid GPR by Cur Flag Instance + mVUallocSFLAGa(sReg, fpsInstance); // Get Prev Status Flag + AND16ItoR(sReg, 0xff0); // Keep Sticky and D/I flags } //-------------------------Check for Signed flags------------------------------ @@ -44,64 +46,91 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { // The following code makes sure the Signed Bit isn't set with Negative Zero SSE_XORPS_XMM_to_XMM(regT2, regT2); // Clear regT2 SSE_CMPEQPS_XMM_to_XMM(regT2, regT1); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(gprT3, regT2); // Used for Zero Flag Calculation + SSE_MOVMSKPS_XMM_to_R32(gprT2, regT2); // Used for Zero Flag Calculation SSE_ANDNPS_XMM_to_XMM(regT2, regT1); - SSE_MOVMSKPS_XMM_to_R32(gprT2, regT2); // Move the sign bits of the t1reg + SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg - AND16ItoR(gprT2, AND_XYZW ); // Grab "Is Signed" bits from the previous calculation + AND16ItoR(mReg, AND_XYZW ); // Grab "Is Signed" bits from the previous calculation pjmp = JZ8(0); // Skip if none are - if (doMac) SHL16ItoR(gprT2, 4); - if (doStatus) OR16ItoR(gprT1, 0x82); // SS, S flags + if (doMac) SHL16ItoR(mReg, 4); + if (doStatus) OR16ItoR(sReg, 0x82); // SS, S flags if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking x86SetJ8(pjmp); //-------------------------Check for Zero flags------------------------------ - AND16ItoR(gprT3, AND_XYZW ); // Grab "Is Zero" bits from the previous calculation + AND16ItoR(gprT2, AND_XYZW ); // Grab "Is Zero" bits from the previous calculation pjmp = JZ8(0); // Skip if none are - if (doMac) OR32RtoR(gprT2, gprT3); - if (doStatus) OR16ItoR(gprT1, 0x41); // ZS, Z flags + if (doMac) OR32RtoR(mReg, gprT2); + if (doStatus) OR16ItoR(sReg, 0x41); // ZS, Z flags x86SetJ8(pjmp); //-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------ if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here - if (doMac) SSE_PINSRW_R32_to_XMM(xmmF, gprT2, fmInstance); // Set Mac Flag - if (doStatus) SSE_PINSRW_R32_to_XMM(xmmF, gprT1, fsInstance); // Set Status Flag + if (doMac) mVUallocMFLAGb(mReg, fmInstance); // Set Mac Flag } //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ -#define mVU_FMAC1(operation) { \ - microVU* mVU = mVUx; \ - if (recPass == 0) {} \ - else { \ - int Fd, Fs, Ft; \ - if (isNOP) return; \ - mVUallocFMAC1a(Fd, Fs, Ft); \ - if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ - else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ - mVUallocFMAC1b(Fd); \ - } \ +#define mVU_FMAC1(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC1a(Fd, Fs, Ft); \ + if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ + mVUallocFMAC1b(Fd); \ + } \ } -#define mVU_FMAC3(operation) { \ - microVU* mVU = mVUx; \ - if (recPass == 0) {} \ - else { \ - int Fd, Fs, Ft; \ - if (isNOP) return; \ - mVUallocFMAC3a(Fd, Fs, Ft); \ - if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ - else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ - mVUallocFMAC3b(Fd); \ - } \ +#define mVU_FMAC3(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int Fd, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC3a(Fd, Fs, Ft); \ + if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W); \ + mVUallocFMAC3b(Fd); \ + } \ +} + +#define mVU_FMAC4(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC4a(ACC, Fs, Ft); \ + if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W); \ + mVUallocFMAC4b(ACC, Fs); \ + } \ +} + +#define mVU_FMAC5(operation) { \ + microVU* mVU = mVUx; \ + if (recPass == 0) {} \ + else { \ + int ACC, Fs, Ft; \ + if (isNOP) return; \ + mVUallocFMAC5a(ACC, Fs, Ft); \ + if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ + else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ + mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W); \ + mVUallocFMAC5b(ACC, Fs); \ + } \ } //------------------------------------------------------------------ @@ -122,48 +151,45 @@ microVUf(void) mVU_ABS() { microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); } microVUf(void) mVU_ADDi(){} microVUf(void) mVU_ADDq(){} -microVUq(void) mVU_ADDxyzw() { mVU_FMAC3(ADD); } -microVUf(void) mVU_ADDx() { mVU_ADDxyzw(); } -microVUf(void) mVU_ADDy() { mVU_ADDxyzw(); } -microVUf(void) mVU_ADDz() { mVU_ADDxyzw(); } -microVUf(void) mVU_ADDw() { mVU_ADDxyzw(); } -microVUf(void) mVU_ADDA(){} +microVUf(void) mVU_ADDx() { mVU_FMAC3(ADD); } +microVUf(void) mVU_ADDy() { mVU_FMAC3(ADD); } +microVUf(void) mVU_ADDz() { mVU_FMAC3(ADD); } +microVUf(void) mVU_ADDw() { mVU_FMAC3(ADD); } +microVUf(void) mVU_ADDA() { mVU_FMAC4(ADD); } microVUf(void) mVU_ADDAi(){} microVUf(void) mVU_ADDAq(){} -microVUf(void) mVU_ADDAx(){} -microVUf(void) mVU_ADDAy(){} -microVUf(void) mVU_ADDAz(){} -microVUf(void) mVU_ADDAw(){} +microVUf(void) mVU_ADDAx() { mVU_FMAC5(ADD); } +microVUf(void) mVU_ADDAy() { mVU_FMAC5(ADD); } +microVUf(void) mVU_ADDAz() { mVU_FMAC5(ADD); } +microVUf(void) mVU_ADDAw() { mVU_FMAC5(ADD); } microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); } microVUf(void) mVU_SUBi(){} microVUf(void) mVU_SUBq(){} -microVUq(void) mVU_SUBxyzw() { mVU_FMAC3(SUB); } -microVUf(void) mVU_SUBx() { mVU_SUBxyzw(); } -microVUf(void) mVU_SUBy() { mVU_SUBxyzw(); } -microVUf(void) mVU_SUBz() { mVU_SUBxyzw(); } -microVUf(void) mVU_SUBw() { mVU_SUBxyzw(); } -microVUf(void) mVU_SUBA(){} +microVUf(void) mVU_SUBx() { mVU_FMAC3(SUB); } +microVUf(void) mVU_SUBy() { mVU_FMAC3(SUB); } +microVUf(void) mVU_SUBz() { mVU_FMAC3(SUB); } +microVUf(void) mVU_SUBw() { mVU_FMAC3(SUB); } +microVUf(void) mVU_SUBA() { mVU_FMAC4(SUB); } microVUf(void) mVU_SUBAi(){} microVUf(void) mVU_SUBAq(){} -microVUf(void) mVU_SUBAx(){} -microVUf(void) mVU_SUBAy(){} -microVUf(void) mVU_SUBAz(){} -microVUf(void) mVU_SUBAw(){} +microVUf(void) mVU_SUBAx() { mVU_FMAC5(SUB); } +microVUf(void) mVU_SUBAy() { mVU_FMAC5(SUB); } +microVUf(void) mVU_SUBAz() { mVU_FMAC5(SUB); } +microVUf(void) mVU_SUBAw() { mVU_FMAC5(SUB); } microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); } microVUf(void) mVU_MULi(){} microVUf(void) mVU_MULq(){} -microVUq(void) mVU_MULxyzw() { mVU_FMAC3(MUL); } -microVUf(void) mVU_MULx() { mVU_MULxyzw(); } -microVUf(void) mVU_MULy() { mVU_MULxyzw(); } -microVUf(void) mVU_MULz() { mVU_MULxyzw(); } -microVUf(void) mVU_MULw() { mVU_MULxyzw(); } -microVUf(void) mVU_MULA(){} +microVUf(void) mVU_MULx() { mVU_FMAC3(MUL); } +microVUf(void) mVU_MULy() { mVU_FMAC3(MUL); } +microVUf(void) mVU_MULz() { mVU_FMAC3(MUL); } +microVUf(void) mVU_MULw() { mVU_FMAC3(MUL); } +microVUf(void) mVU_MULA() { mVU_FMAC4(MUL); } microVUf(void) mVU_MULAi(){} microVUf(void) mVU_MULAq(){} -microVUf(void) mVU_MULAx(){} -microVUf(void) mVU_MULAy(){} -microVUf(void) mVU_MULAz(){} -microVUf(void) mVU_MULAw(){} +microVUf(void) mVU_MULAx() { mVU_FMAC5(MUL); } +microVUf(void) mVU_MULAy() { mVU_FMAC5(MUL); } +microVUf(void) mVU_MULAz() { mVU_FMAC5(MUL); } +microVUf(void) mVU_MULAw() { mVU_FMAC5(MUL); } microVUf(void) mVU_MADD(){} microVUf(void) mVU_MADDi(){} microVUf(void) mVU_MADDq(){} @@ -194,18 +220,16 @@ microVUf(void) mVU_MSUBAz(){} microVUf(void) mVU_MSUBAw(){} microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); } microVUf(void) mVU_MAXi(){} -microVUq(void) mVU_MAXxyzw() { mVU_FMAC3(MAX); } -microVUf(void) mVU_MAXx() { mVU_MAXxyzw(); } -microVUf(void) mVU_MAXy() { mVU_MAXxyzw(); } -microVUf(void) mVU_MAXz() { mVU_MAXxyzw(); } -microVUf(void) mVU_MAXw() { mVU_MAXxyzw(); } +microVUf(void) mVU_MAXx() { mVU_FMAC3(MAX); } +microVUf(void) mVU_MAXy() { mVU_FMAC3(MAX); } +microVUf(void) mVU_MAXz() { mVU_FMAC3(MAX); } +microVUf(void) mVU_MAXw() { mVU_FMAC3(MAX); } microVUf(void) mVU_MINI() { mVU_FMAC1(MIN); } microVUf(void) mVU_MINIi(){} -microVUq(void) mVU_MINIxyzw(){ mVU_FMAC3(MIN); } -microVUf(void) mVU_MINIx() { mVU_MINIxyzw(); } -microVUf(void) mVU_MINIy() { mVU_MINIxyzw(); } -microVUf(void) mVU_MINIz() { mVU_MINIxyzw(); } -microVUf(void) mVU_MINIw() { mVU_MINIxyzw(); } +microVUf(void) mVU_MINIx() { mVU_FMAC3(MIN); } +microVUf(void) mVU_MINIy() { mVU_FMAC3(MIN); } +microVUf(void) mVU_MINIz() { mVU_FMAC3(MIN); } +microVUf(void) mVU_MINIw() { mVU_FMAC3(MIN); } microVUf(void) mVU_OPMULA(){} microVUf(void) mVU_OPMSUB(){} microVUf(void) mVU_NOP(){}