implemented alot of microVU shit...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@708 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-03-07 02:38:05 +00:00
parent 56f802c80f
commit 4f63ac1101
7 changed files with 341 additions and 119 deletions

View File

@ -33,12 +33,12 @@ PCSX2_ALIGNED16(const u32 mVU_absclip[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff,
PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; PCSX2_ALIGNED16(const u32 mVU_signbit[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; PCSX2_ALIGNED16(const u32 mVU_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff};
PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; PCSX2_ALIGNED16(const u32 mVU_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff};
PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = { 16.0, 16.0, 16.0, 16.0 }; PCSX2_ALIGNED16(const float mVU_FTOI_4[4]) = {16.0, 16.0, 16.0, 16.0};
PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = { 4096.0, 4096.0, 4096.0, 4096.0 }; PCSX2_ALIGNED16(const float mVU_FTOI_12[4]) = {4096.0, 4096.0, 4096.0, 4096.0};
PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = { 32768.0, 32768.0, 32768.0, 32768.0 }; PCSX2_ALIGNED16(const float mVU_FTOI_15[4]) = {32768.0, 32768.0, 32768.0, 32768.0};
PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; PCSX2_ALIGNED16(const float mVU_ITOF_4[4]) = {0.0625f, 0.0625f, 0.0625f, 0.0625f};
PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; PCSX2_ALIGNED16(const float mVU_ITOF_12[4]) = {0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625};
PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; PCSX2_ALIGNED16(const float mVU_ITOF_15[4]) = {0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125};
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -270,6 +270,7 @@ __declspec(naked) void __fastcall endVU0(u32 startPC, u32 cycles) {
pop ebx; pop ebx;
ldmxcsr g_sseMXCSR ldmxcsr g_sseMXCSR
emms
ret ret
} }

View File

@ -38,19 +38,21 @@ struct microAllocInfo {
u8 p; u8 p;
u8 r; u8 r;
u16 info[pSize];// bit 0 = NOP? u16 info[pSize];// bit 0 = NOP?
// bit 1 = Read Fd from backup memory? // bit 1 = Used with bit 2 to make a 2-bit key for ACC write instance
// bit 2 = Read Fs from backup memory? // bit 2 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3)
// bit 3 = Read Ft from backup memory? // bit 3 = Used with bit 4 to make a 2-bit key for ACC read instance
// bit 4 = ACC1 or ACC2? // bit 4 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3)
// bit 5 = Read Q1/P1 or backup? // bit 5 = Read Q1/P1 or backup?
// bit 6 = Write to Q2/P2? // bit 6 = Write to Q2/P2?
// bit 7 = Write Fd/Acc/Result to backup memory? // bit 7 = Write VI(Fd) Result to backup memory?
// bit 8 = Update Mac Flags? // bit 8 = Update Mac Flags?
// bit 9 = Update Status Flags? // bit 9 = Update Status Flags?
// bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance // bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance
// bit 11 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) // bit 11 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3)
// bit 12 = Used with bit 13 to make a 2-bit key for status flag instance // bit 12 = Used with bit 13 to make a 2-bit key for status flag instance
// bit 13 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3) // bit 13 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3)
// bit 14 = Read VI(Fs) from backup memory?
// bit 15 = Read VI(Ft) from backup memory?
u32 curPC; u32 curPC;
}; };

View File

@ -48,6 +48,7 @@
//------------------------------------------------------------------ //------------------------------------------------------------------
// FMAC1 - Normal FMAC Opcodes // FMAC1 - Normal FMAC Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) { microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
Fs = xmmFs; Fs = xmmFs;
@ -85,6 +86,7 @@ microVUt(void) mVUallocFMAC1b(int& Fd) {
//------------------------------------------------------------------ //------------------------------------------------------------------
// FMAC2 - ABS/FTOI/ITOF Opcodes // FMAC2 - ABS/FTOI/ITOF Opcodes
//------------------------------------------------------------------ //------------------------------------------------------------------
microVUt(void) mVUallocFMAC2a(int& Fs, int& Ft) { microVUt(void) mVUallocFMAC2a(int& Fs, int& Ft) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
Fs = xmmFs; Fs = xmmFs;
@ -164,4 +166,127 @@ microVUt(void) mVUallocFMAC3b(int& Fd) {
mVUallocFMAC1b<vuIndex>(Fd); mVUallocFMAC1b<vuIndex>(Fd);
} }
//------------------------------------------------------------------
// FMAC4 - FMAC Opcodes Storing Result to ACC
//------------------------------------------------------------------
#define getReg4(reg, _reg_) { \
mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (_XYZW_SS) ? 15 : _X_Y_Z_W); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, (_XYZW_SS) ? 15 : _X_Y_Z_W); \
}
#define getZero4(reg) { \
if (_W) { mVUloadReg<vuIndex>(reg, (uptr)&mVU->regs->VF[0].UL[0], (_XYZW_SS) ? 15 : _X_Y_Z_W); } \
else { SSE_XORPS_XMM_to_XMM(reg, reg); } \
}
#define getACC(reg) { \
reg = xmmACC0 + writeACC; \
if (_X_Y_Z_W != 15) { SSE_MOVAPS_XMM_to_XMM(reg, (xmmACC0 + prevACC)); } \
}
microVUt(void) mVUallocFMAC4a(int& ACC, int& Fs, int& Ft) {
microVU* mVU = mVUx;
Fs = xmmFs;
Ft = xmmFt;
getACC(ACC);
if (_XYZW_SS && _X) {
if (!_Fs_) { getZeroSS(Fs); }
else { getReg(Fs, _Fs_); }
if (_Ft_ == _Fs_) { Ft = Fs; }
else {
if (!_Ft_) { getZeroSS(Ft); }
else { getReg(Ft, _Ft_); }
}
}
else {
if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
if (_Ft_ == _Fs_) { Ft = Fs; }
else {
if (!_Ft_) { getZero4(Ft); }
else { getReg4(Ft, _Ft_); }
}
}
}
microVUt(void) mVUallocFMAC4b(int& ACC, int& Fs) {
microVU* mVU = mVUx;
if (!_Fd_) return;
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Fs, xmmT1, (_XYZW_SS && !_X) ? 15 : _X_Y_Z_W);
mVUmergeRegs<vuIndex>(ACC, Fs, _X_Y_Z_W);
}
//------------------------------------------------------------------
// FMAC5 - FMAC BC(xyzw) Opcodes Storing Result to ACC
//------------------------------------------------------------------
microVUt(void) mVUallocFMAC5a(int& ACC, int& Fs, int& Ft) {
microVU* mVU = mVUx;
Fs = xmmFs;
Ft = xmmFt;
getACC(ACC);
if (_XYZW_SS && _X) {
if (!_Fs_) { getZeroSS(Fs); }
else { getReg(Fs, _Fs_); }
if ( (_Ft_ == _Fs_) && _bc_x) {
Ft = Fs;
}
else {
if (!_Ft_) { getZero3SS(Ft); }
else { getReg3SS(Ft, _Ft_); }
}
}
else {
if (!_Fs_) { getZero4(Fs); }
else { getReg4(Fs, _Fs_); }
if (!_Ft_) { getZero3(Ft); }
else { getReg3(Ft, _Ft_); }
}
}
microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) {
mVUallocFMAC4b<vuIndex>(ACC, Fs);
}
//------------------------------------------------------------------
// Flag Allocators
//------------------------------------------------------------------
#define getFlagReg(regX, fInst) { \
switch (fInst) { \
case 0: regX = gprF0; break; \
case 1: regX = gprF1; break; \
case 2: regX = gprF2; break; \
case 3: regX = gprF3; break; \
} \
}
microVUt(void) mVUallocSFLAGa(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
MOVZX32R16toR(reg, fInstance);
}
microVUt(void) mVUallocSFLAGb(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
MOV32RtoR(fInstance, reg);
}
microVUt(void) mVUallocMFLAGa(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
MOV32RtoR(reg, fInstance);
SHR32ItoR(reg, 16);
}
microVUt(void) mVUallocMFLAGb(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
AND32ItoR(fInstance, 0xffff);
SHL32ItoR(reg, 16);
OR32RtoR(fInstance, reg);
}
#endif //PCSX2_MICROVU #endif //PCSX2_MICROVU

View File

@ -63,20 +63,29 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]);
#define xmmT1 0 // Temp Reg #define xmmT1 0 // Temp Reg
#define xmmFs 1 // Holds the Value of Fs (writes back result Fd) #define xmmFs 1 // Holds the Value of Fs (writes back result Fd)
#define xmmFt 2 // Holds the Value of Ft #define xmmFt 2 // Holds the Value of Ft
#define xmmACC1 3 // Holds the Value of ACC #define xmmACC0 3 // Holds ACC Instance #0
#define xmmACC2 4 // Holds the Backup Value of ACC #define xmmACC1 4 // Holds ACC Instance #1
#define xmmPQ 5 // Holds the Value and Backup Values of P and Q regs #define xmmACC2 5 // Holds ACC Instance #2
#define xmmVI 6 // Holds VI regs 8, 9, 10, 11, 12, 13, 14, and 15 #define xmmACC3 6 // Holds ACC Instance #3
#define xmmF 7 // Holds 4 instances of the status and mac flags (macflagX4::statusflagX4) #define xmmPQ 7 // Holds the Value and Backup Values of P and Q regs
#define mmxT1 0 // Temp Reg
#define mmxC 1 // Clip Flag?
#define mmxVI0 2 // Holds VI 00 to 03?
#define mmxVI1 3 // Holds VI 04 to 07?
#define mmxVI2 4 // Holds VI 08 to 11?
#define mmxVI3 5 // Holds VI 12 to 15?
#define mmxM 6 // ?
#define mmxS 7 // ?
#define gprT1 0 // Temp Reg #define gprT1 0 // Temp Reg
#define gprT2 1 // Temp Reg #define gprT2 1 // Temp Reg
#define gprT3 2 // Temp Reg #define gprT3 2 // Temp Reg?
#define gprVI7 3 // VI 7 #define gprF0 3 // MAC Flag::Status Flag 0
#define gprESP 4 // Don't use? #define gprESP 4 // Don't use?
#define gprVI5 5 // VI 6::5 #define gprF1 5 // MAC Flag::Status Flag 1
#define gprVI3 6 // VI 4::3 #define gprF2 6 // MAC Flag::Status Flag 2
#define gprVI1 7 // VI 2::1 #define gprF3 7 // MAC Flag::Status Flag 3
// Template Stuff // Template Stuff
#define mVUx (vuIndex ? &microVU1 : &microVU0) #define mVUx (vuIndex ? &microVU1 : &microVU0)
@ -87,17 +96,20 @@ PCSX2_ALIGNED16_EXTERN(const float mVU_ITOF_15[4]);
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
#define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0)) #define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0))
#define getFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1)) #define writeACC ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1)
#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2)) #define prevACC (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<1)) >> 1) - 1) & 0x3)
#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3)) //#define setACCreg ((mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1)) >> 1)
#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7)) //#define setACCmem (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2))
#define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8)) //#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7))
#define doMac (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<8)) #define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8))
#define doStatus (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<9)) #define doMac (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<8))
#define fmInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) + 4) #define doStatus (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<9))
#define fsInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) + 0) #define fmInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) >> 10)
#define fpmInstance ((((u8)(mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) - 1) & 0x3) + 4) #define fsInstance ((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) >> 12)
#define fpsInstance ((((u8)(mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) - 1) & 0x3) + 0) #define fpmInstance (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<10)) >> 10) - 1) & 0x3)
#define fpsInstance (((u8)((mVUallocInfo.info[mVUallocInfo.curPC] & (3<<12)) >> 12) - 1) & 0x3)
//#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2))
//#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3))
#include "microVU_Misc.inl" #include "microVU_Misc.inl"

View File

@ -152,4 +152,62 @@ microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) {
} }
} }
// Modifies the Source Reg!
microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) {
xyzw &= 0xf;
if ( (dest != src) && (xyzw != 0) ) {
if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) {
xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3);
SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw);
}
else {
switch (xyzw) {
case 1: SSE_MOVHLPS_XMM_to_XMM(src, dest);
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4);
break;
case 2: SSE_MOVHLPS_XMM_to_XMM(src, dest);
SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64);
break;
case 3: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4);
break;
case 4: SSE_MOVSS_XMM_to_XMM(src, dest);
SSE2_MOVSD_XMM_to_XMM(dest, src);
break;
case 5: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd8);
break;
case 6: SSE_SHUFPS_XMM_to_XMM(dest, src, 0x9c);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x78);
break;
case 7: SSE_MOVSS_XMM_to_XMM(src, dest);
SSE_MOVAPS_XMM_to_XMM(dest, src);
break;
case 8: SSE_MOVSS_XMM_to_XMM(dest, src);
break;
case 9: SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc9);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd2);
break;
case 10: SSE_SHUFPS_XMM_to_XMM(dest, src, 0x8d);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x72);
break;
case 11: SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4);
break;
case 12: SSE2_MOVSD_XMM_to_XMM(dest, src);
break;
case 13: SSE_MOVHLPS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, dest, 0x64);
SSE_MOVAPS_XMM_to_XMM(dest, src);
break;
case 14: SSE_MOVHLPS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, dest, 0xc4);
SSE_MOVAPS_XMM_to_XMM(dest, src);
break;
default: SSE_MOVAPS_XMM_to_XMM(dest, src);
break;
}
}
}
}
#endif //PCSX2_MICROVU #endif //PCSX2_MICROVU

View File

@ -24,7 +24,7 @@
//------------------------------------------------------------------ //------------------------------------------------------------------
// Declarations // Declarations
//------------------------------------------------------------------ //------------------------------------------------------------------
#define mVUgetCode (vuIndex ? microVU1.regs->code : microVU0.regs->code) #define mVUgetCode (vuIndex ? microVU1.code : microVU0.code)
microVUf(void) mVU_UPPER_FD_00(); microVUf(void) mVU_UPPER_FD_00();
microVUf(void) mVU_UPPER_FD_01(); microVUf(void) mVU_UPPER_FD_01();
@ -729,15 +729,15 @@ void (* mVU_UPPER_FD_11_TABLE11 [32])() = {
//------------------------------------------------------------------ //------------------------------------------------------------------
// Table Functions // Table Functions
//------------------------------------------------------------------ //------------------------------------------------------------------
#define doTableStuff(tableName, args) { \ #define doTableStuff(tableName, args) { \
if (recPass) { \ if (recPass) { \
if (vuIndex) tableName##11[ args ](); \ if (vuIndex) tableName##11[ args ](); \
else tableName##01[ args ](); \ else tableName##01[ args ](); \
} \ } \
else { \ else { \
if (vuIndex) tableName##10[ args ](); \ if (vuIndex) tableName##10[ args ](); \
else tableName##00[ args ](); \ else tableName##00[ args ](); \
} \ } \
} }
microVUf(void) mVU_UPPER_FD_00() { doTableStuff(mVU_UPPER_FD_00_TABLE, ((mVUgetCode >> 6) & 0x1f)); } microVUf(void) mVU_UPPER_FD_00() { doTableStuff(mVU_UPPER_FD_00_TABLE, ((mVUgetCode >> 6) & 0x1f)); }

View File

@ -26,6 +26,7 @@
microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) { microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
int sReg, mReg = gprT1;
static u8 *pjmp, *pjmp2; static u8 *pjmp, *pjmp2;
static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
@ -35,8 +36,9 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) {
if (!doMac) { regT1 = reg; } if (!doMac) { regT1 = reg; }
else SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); // Flip wzyx to xyzw else SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); // Flip wzyx to xyzw
if (doStatus) { if (doStatus) {
SSE_PEXTRW_XMM_to_R32(gprT1, xmmF, fpsInstance); // Get Prev Status Flag getFlagReg(sReg, fsInstance); // Set sReg to valid GPR by Cur Flag Instance
AND16ItoR(gprT1, 0xff0); // Keep Sticky and D/I flags mVUallocSFLAGa<vuIndex>(sReg, fpsInstance); // Get Prev Status Flag
AND16ItoR(sReg, 0xff0); // Keep Sticky and D/I flags
} }
//-------------------------Check for Signed flags------------------------------ //-------------------------Check for Signed flags------------------------------
@ -44,64 +46,91 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) {
// The following code makes sure the Signed Bit isn't set with Negative Zero // The following code makes sure the Signed Bit isn't set with Negative Zero
SSE_XORPS_XMM_to_XMM(regT2, regT2); // Clear regT2 SSE_XORPS_XMM_to_XMM(regT2, regT2); // Clear regT2
SSE_CMPEQPS_XMM_to_XMM(regT2, regT1); // Set all F's if each vector is zero SSE_CMPEQPS_XMM_to_XMM(regT2, regT1); // Set all F's if each vector is zero
SSE_MOVMSKPS_XMM_to_R32(gprT3, regT2); // Used for Zero Flag Calculation SSE_MOVMSKPS_XMM_to_R32(gprT2, regT2); // Used for Zero Flag Calculation
SSE_ANDNPS_XMM_to_XMM(regT2, regT1); SSE_ANDNPS_XMM_to_XMM(regT2, regT1);
SSE_MOVMSKPS_XMM_to_R32(gprT2, regT2); // Move the sign bits of the t1reg SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg
AND16ItoR(gprT2, AND_XYZW ); // Grab "Is Signed" bits from the previous calculation AND16ItoR(mReg, AND_XYZW ); // Grab "Is Signed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
if (doMac) SHL16ItoR(gprT2, 4); if (doMac) SHL16ItoR(mReg, 4);
if (doStatus) OR16ItoR(gprT1, 0x82); // SS, S flags if (doStatus) OR16ItoR(sReg, 0x82); // SS, S flags
if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
x86SetJ8(pjmp); x86SetJ8(pjmp);
//-------------------------Check for Zero flags------------------------------ //-------------------------Check for Zero flags------------------------------
AND16ItoR(gprT3, AND_XYZW ); // Grab "Is Zero" bits from the previous calculation AND16ItoR(gprT2, AND_XYZW ); // Grab "Is Zero" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are pjmp = JZ8(0); // Skip if none are
if (doMac) OR32RtoR(gprT2, gprT3); if (doMac) OR32RtoR(mReg, gprT2);
if (doStatus) OR16ItoR(gprT1, 0x41); // ZS, Z flags if (doStatus) OR16ItoR(sReg, 0x41); // ZS, Z flags
x86SetJ8(pjmp); x86SetJ8(pjmp);
//-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------ //-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------
if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
if (doMac) SSE_PINSRW_R32_to_XMM(xmmF, gprT2, fmInstance); // Set Mac Flag if (doMac) mVUallocMFLAGb<vuIndex>(mReg, fmInstance); // Set Mac Flag
if (doStatus) SSE_PINSRW_R32_to_XMM(xmmF, gprT1, fsInstance); // Set Status Flag
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
// Helper Macros // Helper Macros
//------------------------------------------------------------------ //------------------------------------------------------------------
#define mVU_FMAC1(operation) { \ #define mVU_FMAC1(operation) { \
microVU* mVU = mVUx; \ microVU* mVU = mVUx; \
if (recPass == 0) {} \ if (recPass == 0) {} \
else { \ else { \
int Fd, Fs, Ft; \ int Fd, Fs, Ft; \
if (isNOP) return; \ if (isNOP) return; \
mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft); \ mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft); \
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
mVUupdateFlags<vuIndex>(Fd, xmmT1, Ft, _X_Y_Z_W); \ mVUupdateFlags<vuIndex>(Fd, xmmT1, Ft, _X_Y_Z_W); \
mVUallocFMAC1b<vuIndex>(Fd); \ mVUallocFMAC1b<vuIndex>(Fd); \
} \ } \
} }
#define mVU_FMAC3(operation) { \ #define mVU_FMAC3(operation) { \
microVU* mVU = mVUx; \ microVU* mVU = mVUx; \
if (recPass == 0) {} \ if (recPass == 0) {} \
else { \ else { \
int Fd, Fs, Ft; \ int Fd, Fs, Ft; \
if (isNOP) return; \ if (isNOP) return; \
mVUallocFMAC3a<vuIndex>(Fd, Fs, Ft); \ mVUallocFMAC3a<vuIndex>(Fd, Fs, Ft); \
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
mVUupdateFlags<vuIndex>(Fd, xmmT1, Ft, _X_Y_Z_W); \ mVUupdateFlags<vuIndex>(Fd, xmmT1, Ft, _X_Y_Z_W); \
mVUallocFMAC3b<vuIndex>(Fd); \ mVUallocFMAC3b<vuIndex>(Fd); \
} \ } \
}
#define mVU_FMAC4(operation) { \
microVU* mVU = mVUx; \
if (recPass == 0) {} \
else { \
int ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC4a<vuIndex>(ACC, Fs, Ft); \
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
mVUupdateFlags<vuIndex>(Fs, xmmT1, Ft, _X_Y_Z_W); \
mVUallocFMAC4b<vuIndex>(ACC, Fs); \
} \
}
#define mVU_FMAC5(operation) { \
microVU* mVU = mVUx; \
if (recPass == 0) {} \
else { \
int ACC, Fs, Ft; \
if (isNOP) return; \
mVUallocFMAC5a<vuIndex>(ACC, Fs, Ft); \
if (_XYZW_SS && _X) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
mVUupdateFlags<vuIndex>(Fs, xmmT1, Ft, _X_Y_Z_W); \
mVUallocFMAC5b<vuIndex>(ACC, Fs); \
} \
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -122,48 +151,45 @@ microVUf(void) mVU_ABS() {
microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); } microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); }
microVUf(void) mVU_ADDi(){} microVUf(void) mVU_ADDi(){}
microVUf(void) mVU_ADDq(){} microVUf(void) mVU_ADDq(){}
microVUq(void) mVU_ADDxyzw() { mVU_FMAC3(ADD); } microVUf(void) mVU_ADDx() { mVU_FMAC3(ADD); }
microVUf(void) mVU_ADDx() { mVU_ADDxyzw<vuIndex, recPass>(); } microVUf(void) mVU_ADDy() { mVU_FMAC3(ADD); }
microVUf(void) mVU_ADDy() { mVU_ADDxyzw<vuIndex, recPass>(); } microVUf(void) mVU_ADDz() { mVU_FMAC3(ADD); }
microVUf(void) mVU_ADDz() { mVU_ADDxyzw<vuIndex, recPass>(); } microVUf(void) mVU_ADDw() { mVU_FMAC3(ADD); }
microVUf(void) mVU_ADDw() { mVU_ADDxyzw<vuIndex, recPass>(); } microVUf(void) mVU_ADDA() { mVU_FMAC4(ADD); }
microVUf(void) mVU_ADDA(){}
microVUf(void) mVU_ADDAi(){} microVUf(void) mVU_ADDAi(){}
microVUf(void) mVU_ADDAq(){} microVUf(void) mVU_ADDAq(){}
microVUf(void) mVU_ADDAx(){} microVUf(void) mVU_ADDAx() { mVU_FMAC5(ADD); }
microVUf(void) mVU_ADDAy(){} microVUf(void) mVU_ADDAy() { mVU_FMAC5(ADD); }
microVUf(void) mVU_ADDAz(){} microVUf(void) mVU_ADDAz() { mVU_FMAC5(ADD); }
microVUf(void) mVU_ADDAw(){} microVUf(void) mVU_ADDAw() { mVU_FMAC5(ADD); }
microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); } microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); }
microVUf(void) mVU_SUBi(){} microVUf(void) mVU_SUBi(){}
microVUf(void) mVU_SUBq(){} microVUf(void) mVU_SUBq(){}
microVUq(void) mVU_SUBxyzw() { mVU_FMAC3(SUB); } microVUf(void) mVU_SUBx() { mVU_FMAC3(SUB); }
microVUf(void) mVU_SUBx() { mVU_SUBxyzw<vuIndex, recPass>(); } microVUf(void) mVU_SUBy() { mVU_FMAC3(SUB); }
microVUf(void) mVU_SUBy() { mVU_SUBxyzw<vuIndex, recPass>(); } microVUf(void) mVU_SUBz() { mVU_FMAC3(SUB); }
microVUf(void) mVU_SUBz() { mVU_SUBxyzw<vuIndex, recPass>(); } microVUf(void) mVU_SUBw() { mVU_FMAC3(SUB); }
microVUf(void) mVU_SUBw() { mVU_SUBxyzw<vuIndex, recPass>(); } microVUf(void) mVU_SUBA() { mVU_FMAC4(SUB); }
microVUf(void) mVU_SUBA(){}
microVUf(void) mVU_SUBAi(){} microVUf(void) mVU_SUBAi(){}
microVUf(void) mVU_SUBAq(){} microVUf(void) mVU_SUBAq(){}
microVUf(void) mVU_SUBAx(){} microVUf(void) mVU_SUBAx() { mVU_FMAC5(SUB); }
microVUf(void) mVU_SUBAy(){} microVUf(void) mVU_SUBAy() { mVU_FMAC5(SUB); }
microVUf(void) mVU_SUBAz(){} microVUf(void) mVU_SUBAz() { mVU_FMAC5(SUB); }
microVUf(void) mVU_SUBAw(){} microVUf(void) mVU_SUBAw() { mVU_FMAC5(SUB); }
microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); } microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); }
microVUf(void) mVU_MULi(){} microVUf(void) mVU_MULi(){}
microVUf(void) mVU_MULq(){} microVUf(void) mVU_MULq(){}
microVUq(void) mVU_MULxyzw() { mVU_FMAC3(MUL); } microVUf(void) mVU_MULx() { mVU_FMAC3(MUL); }
microVUf(void) mVU_MULx() { mVU_MULxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MULy() { mVU_FMAC3(MUL); }
microVUf(void) mVU_MULy() { mVU_MULxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MULz() { mVU_FMAC3(MUL); }
microVUf(void) mVU_MULz() { mVU_MULxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MULw() { mVU_FMAC3(MUL); }
microVUf(void) mVU_MULw() { mVU_MULxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MULA() { mVU_FMAC4(MUL); }
microVUf(void) mVU_MULA(){}
microVUf(void) mVU_MULAi(){} microVUf(void) mVU_MULAi(){}
microVUf(void) mVU_MULAq(){} microVUf(void) mVU_MULAq(){}
microVUf(void) mVU_MULAx(){} microVUf(void) mVU_MULAx() { mVU_FMAC5(MUL); }
microVUf(void) mVU_MULAy(){} microVUf(void) mVU_MULAy() { mVU_FMAC5(MUL); }
microVUf(void) mVU_MULAz(){} microVUf(void) mVU_MULAz() { mVU_FMAC5(MUL); }
microVUf(void) mVU_MULAw(){} microVUf(void) mVU_MULAw() { mVU_FMAC5(MUL); }
microVUf(void) mVU_MADD(){} microVUf(void) mVU_MADD(){}
microVUf(void) mVU_MADDi(){} microVUf(void) mVU_MADDi(){}
microVUf(void) mVU_MADDq(){} microVUf(void) mVU_MADDq(){}
@ -194,18 +220,16 @@ microVUf(void) mVU_MSUBAz(){}
microVUf(void) mVU_MSUBAw(){} microVUf(void) mVU_MSUBAw(){}
microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); } microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); }
microVUf(void) mVU_MAXi(){} microVUf(void) mVU_MAXi(){}
microVUq(void) mVU_MAXxyzw() { mVU_FMAC3(MAX); } microVUf(void) mVU_MAXx() { mVU_FMAC3(MAX); }
microVUf(void) mVU_MAXx() { mVU_MAXxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MAXy() { mVU_FMAC3(MAX); }
microVUf(void) mVU_MAXy() { mVU_MAXxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MAXz() { mVU_FMAC3(MAX); }
microVUf(void) mVU_MAXz() { mVU_MAXxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MAXw() { mVU_FMAC3(MAX); }
microVUf(void) mVU_MAXw() { mVU_MAXxyzw<vuIndex, recPass>(); }
microVUf(void) mVU_MINI() { mVU_FMAC1(MIN); } microVUf(void) mVU_MINI() { mVU_FMAC1(MIN); }
microVUf(void) mVU_MINIi(){} microVUf(void) mVU_MINIi(){}
microVUq(void) mVU_MINIxyzw(){ mVU_FMAC3(MIN); } microVUf(void) mVU_MINIx() { mVU_FMAC3(MIN); }
microVUf(void) mVU_MINIx() { mVU_MINIxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MINIy() { mVU_FMAC3(MIN); }
microVUf(void) mVU_MINIy() { mVU_MINIxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MINIz() { mVU_FMAC3(MIN); }
microVUf(void) mVU_MINIz() { mVU_MINIxyzw<vuIndex, recPass>(); } microVUf(void) mVU_MINIw() { mVU_FMAC3(MIN); }
microVUf(void) mVU_MINIw() { mVU_MINIxyzw<vuIndex, recPass>(); }
microVUf(void) mVU_OPMULA(){} microVUf(void) mVU_OPMULA(){}
microVUf(void) mVU_OPMSUB(){} microVUf(void) mVU_OPMSUB(){}
microVUf(void) mVU_NOP(){} microVUf(void) mVU_NOP(){}