mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Re-implemented logical min/max code - Re-implemented tri-ace gamefix - Fixed some bugs... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1567 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
93d6d5a2ac
commit
829efedfb5
|
@ -118,8 +118,7 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
#define getIreg(reg, modXYZW) { \
|
#define getIreg(reg, modXYZW) { \
|
||||||
MOV32MtoR(gprT1, (uptr)&mVU->regs->VI[REG_I].UL); \
|
SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); \
|
||||||
SSE2_MOVD_R_to_XMM(reg, gprT1); \
|
|
||||||
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT2, 8); \
|
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT2, 8); \
|
||||||
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \
|
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \
|
||||||
}
|
}
|
||||||
|
|
|
@ -317,67 +317,60 @@ microVUt(void) mVUrestoreRegs(microVU* mVU) {
|
||||||
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
|
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
|
||||||
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
|
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
|
||||||
|
|
||||||
// Warning: Modifies xmmT1 and xmmT2
|
// Warning: Modifies t1 and t2
|
||||||
void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
|
void MIN_MAX_PS(microVU* mVU, int to, int from, int t1, int t2, bool min) {
|
||||||
|
bool t1b = 0, t2b = 0;
|
||||||
|
if (t1 < 0) { t1 = mVU->regAlloc->allocReg(); t1b = 1; }
|
||||||
|
if (t2 < 0) { t2 = mVU->regAlloc->allocReg(); t2b = 1; }
|
||||||
|
|
||||||
// ZW
|
// ZW
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0xfa);
|
SSE2_PSHUFD_XMM_to_XMM(t1, to, 0xfa);
|
||||||
SSE2_PAND_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK1);
|
SSE2_PAND_M128_to_XMM (t1, (uptr)MIN_MAX_MASK1);
|
||||||
SSE2_POR_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK2);
|
SSE2_POR_M128_to_XMM (t1, (uptr)MIN_MAX_MASK2);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0xfa);
|
SSE2_PSHUFD_XMM_to_XMM(t2, from, 0xfa);
|
||||||
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
|
SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX_MASK1);
|
||||||
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
|
SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX_MASK2);
|
||||||
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
|
if (min) SSE2_MINPD_XMM_to_XMM(t1, t2);
|
||||||
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
|
else SSE2_MAXPD_XMM_to_XMM(t1, t2);
|
||||||
|
|
||||||
// XY
|
// XY
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0x50);
|
SSE2_PSHUFD_XMM_to_XMM(t2, from, 0x50);
|
||||||
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
|
SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX_MASK1);
|
||||||
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
|
SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX_MASK2);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50);
|
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50);
|
||||||
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
|
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
|
||||||
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
|
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
|
||||||
if (min) SSE2_MINPD_XMM_to_XMM(to, xmmT2);
|
if (min) SSE2_MINPD_XMM_to_XMM(to, t2);
|
||||||
else SSE2_MAXPD_XMM_to_XMM(to, xmmT2);
|
else SSE2_MAXPD_XMM_to_XMM(to, t2);
|
||||||
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(to, xmmT1, 0x88);
|
SSE_SHUFPS_XMM_to_XMM(to, t1, 0x88);
|
||||||
|
if (t1b) mVU->regAlloc->clearNeeded(t1);
|
||||||
|
if (t2b) mVU->regAlloc->clearNeeded(t2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warning: Modifies from and to's upper 3 vectors
|
// Warning: Modifies from's upper 3 vectors, and t1
|
||||||
void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) {
|
void MIN_MAX_SS(mV, int to, int from, int t1, bool min) {
|
||||||
|
bool t1b = 0;
|
||||||
|
if (t1 < 0) { t1 = mVU->regAlloc->allocReg(); t1b = 1; }
|
||||||
SSE_SHUFPS_XMM_to_XMM (to, from, 0);
|
SSE_SHUFPS_XMM_to_XMM (to, from, 0);
|
||||||
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
|
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
|
||||||
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
|
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(from, to, 0xee);
|
SSE2_PSHUFD_XMM_to_XMM(t1, to, 0xee);
|
||||||
if (min) SSE2_MINPD_XMM_to_XMM(to, from);
|
if (min) SSE2_MINPD_XMM_to_XMM(to, t1);
|
||||||
else SSE2_MAXPD_XMM_to_XMM(to, from);
|
else SSE2_MAXPD_XMM_to_XMM(to, t1);
|
||||||
}
|
if (t1b) mVU->regAlloc->clearNeeded(t1);
|
||||||
|
|
||||||
void SSE_MAX2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
|
|
||||||
else { MIN_MAX_(to, from, 0); }
|
|
||||||
}
|
|
||||||
void SSE_MIN2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
|
|
||||||
else { MIN_MAX_(to, from, 1); }
|
|
||||||
}
|
|
||||||
void SSE_MAX2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
|
|
||||||
else { MIN_MAX_SS(to, from, 0); }
|
|
||||||
}
|
|
||||||
void SSE_MIN2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
|
|
||||||
else { MIN_MAX_SS(to, from, 1); }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2
|
// Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2
|
||||||
void SSE_ADD2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
void ADD_SS(microVU* mVU, int to, int from, int t1, int t2) {
|
||||||
|
|
||||||
if (!CHECK_VUADDSUBHACK) { SSE_ADDSS_XMM_to_XMM(to, from); return; }
|
|
||||||
u8 *localptr[8];
|
u8 *localptr[8];
|
||||||
|
bool t1b = 0, t2b = 0;
|
||||||
|
if (t1 < 0) { t1 = mVU->regAlloc->allocReg(); t1b = 1; }
|
||||||
|
if (t2 < 0) { t2 = mVU->regAlloc->allocReg(); t2b = 1; }
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(xmmT1, to);
|
SSE_MOVAPS_XMM_to_XMM(t1, to);
|
||||||
SSE_MOVAPS_XMM_to_XMM(xmmT2, from);
|
SSE_MOVAPS_XMM_to_XMM(t2, from);
|
||||||
SSE2_MOVD_XMM_to_R(gprT2, to);
|
SSE2_MOVD_XMM_to_R(gprT2, to);
|
||||||
SHR32ItoR(gprT2, 23);
|
SHR32ItoR(gprT2, 23);
|
||||||
SSE2_MOVD_XMM_to_R(gprT1, from);
|
SSE2_MOVD_XMM_to_R(gprT1, from);
|
||||||
|
@ -435,15 +428,54 @@ void SSE_ADD2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
||||||
x86SetJ8(localptr[6]);
|
x86SetJ8(localptr[6]);
|
||||||
x86SetJ8(localptr[7]);
|
x86SetJ8(localptr[7]);
|
||||||
|
|
||||||
SSE_ANDPS_XMM_to_XMM(to, xmmT1); //to contains mask
|
SSE_ANDPS_XMM_to_XMM(to, t1); // to contains mask
|
||||||
SSE_ANDPS_XMM_to_XMM(from, xmmT2); //from contains mask
|
SSE_ANDPS_XMM_to_XMM(from, t2); // from contains mask
|
||||||
SSE_ADDSS_XMM_to_XMM(to, from);
|
SSE_ADDSS_XMM_to_XMM(to, from);
|
||||||
|
if (t1b) mVU->regAlloc->clearNeeded(t1);
|
||||||
|
if (t2b) mVU->regAlloc->clearNeeded(t2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: Wrapper function, Tri-Ace Games just need the SS implementation
|
void SSE_MAXPS(mV, int to, int from, int t1, int t2) {
|
||||||
void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
|
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
|
||||||
|
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
|
||||||
|
}
|
||||||
|
void SSE_MINPS(mV, int to, int from, int t1, int t2) {
|
||||||
|
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
|
||||||
|
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
|
||||||
|
}
|
||||||
|
void SSE_MAXSS(mV, int to, int from, int t1, int t2) {
|
||||||
|
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
|
||||||
|
else { MIN_MAX_SS(mVU, to, from, t1, 0); }
|
||||||
|
}
|
||||||
|
void SSE_MINSS(mV, int to, int from, int t1, int t2) {
|
||||||
|
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
|
||||||
|
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
|
||||||
|
}
|
||||||
|
void SSE_ADD2SS(mV, int to, int from, int t1, int t2) {
|
||||||
|
if (!CHECK_VUADDSUBHACK) { SSE_ADDSS_XMM_to_XMM(to, from); }
|
||||||
|
else { ADD_SS(mVU, to, from, t1, t2); }
|
||||||
|
}
|
||||||
|
void SSE_ADD2PS(mV, int to, int from, int t1, int t2) {
|
||||||
SSE_ADDPS_XMM_to_XMM(to, from);
|
SSE_ADDPS_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
void SSE_ADDPS(mV, int to, int from, int t1, int t2) {
|
||||||
|
SSE_ADDPS_XMM_to_XMM(to, from);
|
||||||
|
}
|
||||||
|
void SSE_ADDSS(mV, int to, int from, int t1, int t2) {
|
||||||
|
SSE_ADDSS_XMM_to_XMM(to, from);
|
||||||
|
}
|
||||||
|
void SSE_SUBPS(mV, int to, int from, int t1, int t2) {
|
||||||
|
SSE_SUBPS_XMM_to_XMM(to, from);
|
||||||
|
}
|
||||||
|
void SSE_SUBSS(mV, int to, int from, int t1, int t2) {
|
||||||
|
SSE_SUBSS_XMM_to_XMM(to, from);
|
||||||
|
}
|
||||||
|
void SSE_MULPS(mV, int to, int from, int t1, int t2) {
|
||||||
|
SSE_MULPS_XMM_to_XMM(to, from);
|
||||||
|
}
|
||||||
|
void SSE_MULSS(mV, int to, int from, int t1, int t2) {
|
||||||
|
SSE_MULSS_XMM_to_XMM(to, from);
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU - Custom Quick Search
|
// Micro VU - Custom Quick Search
|
||||||
|
|
|
@ -81,20 +81,22 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) {
|
||||||
// Helper Macros and Functions
|
// Helper Macros and Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
static void (*SSE_PS[]) (x86SSERegType, x86SSERegType) = {
|
static void (*SSE_PS[]) (microVU*, int, int, int, int) = {
|
||||||
SSE_ADDPS_XMM_to_XMM, // 0
|
SSE_ADDPS, // 0
|
||||||
SSE_SUBPS_XMM_to_XMM, // 1
|
SSE_SUBPS, // 1
|
||||||
SSE_MULPS_XMM_to_XMM, // 2
|
SSE_MULPS, // 2
|
||||||
SSE_MAXPS_XMM_to_XMM, // 3
|
SSE_MAXPS, // 3
|
||||||
SSE_MINPS_XMM_to_XMM // 4
|
SSE_MINPS, // 4
|
||||||
|
SSE_ADD2PS // 5
|
||||||
};
|
};
|
||||||
|
|
||||||
static void (*SSE_SS[]) (x86SSERegType, x86SSERegType) = {
|
static void (*SSE_SS[]) (microVU*, int, int, int, int) = {
|
||||||
SSE_ADDSS_XMM_to_XMM, // 0
|
SSE_ADDSS, // 0
|
||||||
SSE_SUBSS_XMM_to_XMM, // 1
|
SSE_SUBSS, // 1
|
||||||
SSE_MULSS_XMM_to_XMM, // 2
|
SSE_MULSS, // 2
|
||||||
SSE_MAXSS_XMM_to_XMM, // 3
|
SSE_MAXSS, // 3
|
||||||
SSE_MINSS_XMM_to_XMM // 4
|
SSE_MINSS, // 4
|
||||||
|
SSE_ADD2SS // 5
|
||||||
};
|
};
|
||||||
|
|
||||||
// Prints Opcode to MicroProgram Logs
|
// Prints Opcode to MicroProgram Logs
|
||||||
|
@ -147,8 +149,8 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
||||||
|
|
||||||
if (_XYZW_SS) SSE_SS[opType](Fs, Ft);
|
if (_XYZW_SS) SSE_SS[opType](mVU, Fs, Ft, -1, -1);
|
||||||
else SSE_PS[opType](Fs, Ft);
|
else SSE_PS[opType](mVU, Fs, Ft, -1, -1);
|
||||||
|
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
||||||
|
@ -185,22 +187,22 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
||||||
|
|
||||||
if (_XYZW_SS) SSE_SS[2](Fs, Ft);
|
if (_XYZW_SS) SSE_SS[2](mVU, Fs, Ft, -1, -1);
|
||||||
else SSE_PS[2](Fs, Ft);
|
else SSE_PS[2](mVU, Fs, Ft, -1, -1);
|
||||||
|
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
||||||
|
|
||||||
if (_XYZW_SS || _X_Y_Z_W == 0xf) {
|
if (_XYZW_SS || _X_Y_Z_W == 0xf) {
|
||||||
if (_XYZW_SS) SSE_SS[opType](ACC, Fs);
|
if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, -1, -1);
|
||||||
else SSE_PS[opType](ACC, Fs);
|
else SSE_PS[opType](mVU, ACC, Fs, -1, -1);
|
||||||
mVUupdateFlags(mVU, ACC, Fs);
|
mVUupdateFlags(mVU, ACC, Fs);
|
||||||
if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W));
|
if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int tempACC = mVU->regAlloc->allocReg();
|
int tempACC = mVU->regAlloc->allocReg();
|
||||||
SSE_MOVAPS_XMM_to_XMM(tempACC, ACC);
|
SSE_MOVAPS_XMM_to_XMM(tempACC, ACC);
|
||||||
SSE_PS[opType](tempACC, Fs);
|
SSE_PS[opType](mVU, tempACC, Fs, -1, -1);
|
||||||
mVUmergeRegs(ACC, tempACC, _X_Y_Z_W);
|
mVUmergeRegs(ACC, tempACC, _X_Y_Z_W);
|
||||||
mVU->regAlloc->clearNeeded(tempACC);
|
mVU->regAlloc->clearNeeded(tempACC);
|
||||||
}
|
}
|
||||||
|
@ -229,8 +231,8 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) {
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
||||||
|
|
||||||
if (_XYZW_SS) { SSE_SS[2](Fs, Ft); SSE_SS[0](Fs, ACC); }
|
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, -1, -1); }
|
||||||
else { SSE_PS[2](Fs, Ft); SSE_PS[0](Fs, ACC); }
|
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, -1, -1); }
|
||||||
|
|
||||||
mVUupdateFlags(mVU, Fs, -1);
|
mVUupdateFlags(mVU, Fs, -1);
|
||||||
|
|
||||||
|
@ -260,8 +262,8 @@ void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) {
|
||||||
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
|
||||||
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
|
||||||
|
|
||||||
if (_XYZW_SS) { SSE_SS[2](Fs, Ft); SSE_SS[1](Fd, Fs); }
|
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, -1, -1); }
|
||||||
else { SSE_PS[2](Fs, Ft); SSE_PS[1](Fd, Fs); }
|
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, -1, -1); }
|
||||||
|
|
||||||
mVUupdateFlags(mVU, Fd, Fs);
|
mVUupdateFlags(mVU, Fd, Fs);
|
||||||
|
|
||||||
|
@ -315,8 +317,8 @@ mVUop(mVU_OPMSUB) {
|
||||||
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
mVU->regAlloc->reset(); // Reset for Testing
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_);
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf);
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
int ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
|
int ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
|
||||||
|
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
||||||
|
@ -325,7 +327,7 @@ mVUop(mVU_OPMSUB) {
|
||||||
SSE_SUBPS_XMM_to_XMM(ACC, Fs);
|
SSE_SUBPS_XMM_to_XMM(ACC, Fs);
|
||||||
mVUupdateFlags(mVU, ACC, Fs);
|
mVUupdateFlags(mVU, ACC, Fs);
|
||||||
|
|
||||||
mVU->regAlloc->clearNeeded(32);
|
mVU->regAlloc->clearNeeded(ACC);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVU->regAlloc->flushAll(); // Flush All for Testing
|
mVU->regAlloc->flushAll(); // Flush All for Testing
|
||||||
|
@ -386,8 +388,8 @@ mVUop(mVU_CLIP) {
|
||||||
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
|
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
mVU->regAlloc->reset(); // Reset for Testing
|
mVU->regAlloc->reset(); // Reset for Testing
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
|
||||||
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 1);
|
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1);
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
|
|
||||||
mVUunpack_xyzw(Ft, Ft, 0);
|
mVUunpack_xyzw(Ft, Ft, 0);
|
||||||
|
@ -429,7 +431,7 @@ mVUop(mVU_CLIP) {
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
mVUop(mVU_ADD) { mVU_FMACa(mVU, recPass, 1, 0, 0, "ADD"); }
|
mVUop(mVU_ADD) { mVU_FMACa(mVU, recPass, 1, 0, 0, "ADD"); }
|
||||||
mVUop(mVU_ADDi) { mVU_FMACa(mVU, recPass, 3, 0, 0, "ADDi"); }
|
mVUop(mVU_ADDi) { mVU_FMACa(mVU, recPass, 3, 5, 0, "ADDi"); }
|
||||||
mVUop(mVU_ADDq) { mVU_FMACa(mVU, recPass, 4, 0, 0, "ADDq"); }
|
mVUop(mVU_ADDq) { mVU_FMACa(mVU, recPass, 4, 0, 0, "ADDq"); }
|
||||||
mVUop(mVU_ADDx) { mVU_FMACa(mVU, recPass, 2, 0, 0, "ADDx"); }
|
mVUop(mVU_ADDx) { mVU_FMACa(mVU, recPass, 2, 0, 0, "ADDx"); }
|
||||||
mVUop(mVU_ADDy) { mVU_FMACa(mVU, recPass, 2, 0, 0, "ADDy"); }
|
mVUop(mVU_ADDy) { mVU_FMACa(mVU, recPass, 2, 0, 0, "ADDy"); }
|
||||||
|
|
Loading…
Reference in New Issue