- Re-implemented logical min/max code
- Re-implemented tri-ace gamefix
- Fixed some bugs...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1567 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-07-25 01:56:27 +00:00
parent 93d6d5a2ac
commit 829efedfb5
3 changed files with 114 additions and 81 deletions

View File

@ -118,8 +118,7 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
//------------------------------------------------------------------
#define getIreg(reg, modXYZW) { \
MOV32MtoR(gprT1, (uptr)&mVU->regs->VI[REG_I].UL); \
SSE2_MOVD_R_to_XMM(reg, gprT1); \
SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT2, 8); \
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \
}

View File

@ -317,67 +317,60 @@ microVUt(void) mVUrestoreRegs(microVU* mVU) {
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
// Warning: Modifies xmmT1 and xmmT2
void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
// Warning: Modifies t1 and t2
void MIN_MAX_PS(microVU* mVU, int to, int from, int t1, int t2, bool min) {
bool t1b = 0, t2b = 0;
if (t1 < 0) { t1 = mVU->regAlloc->allocReg(); t1b = 1; }
if (t2 < 0) { t2 = mVU->regAlloc->allocReg(); t2b = 1; }
// ZW
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(t1, to, 0xfa);
SSE2_PAND_M128_to_XMM (t1, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (t1, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(t2, from, 0xfa);
SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(t1, t2);
else SSE2_MAXPD_XMM_to_XMM(t1, t2);
// XY
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0x50);
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(t2, from, 0x50);
SSE2_PAND_M128_to_XMM (t2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (t2, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(to, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(to, xmmT2);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(to, t2);
else SSE2_MAXPD_XMM_to_XMM(to, t2);
SSE_SHUFPS_XMM_to_XMM(to, xmmT1, 0x88);
SSE_SHUFPS_XMM_to_XMM(to, t1, 0x88);
if (t1b) mVU->regAlloc->clearNeeded(t1);
if (t2b) mVU->regAlloc->clearNeeded(t2);
}
// Warning: Modifies from and to's upper 3 vectors
void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) {
// Warning: Modifies from's upper 3 vectors, and t1
void MIN_MAX_SS(mV, int to, int from, int t1, bool min) {
bool t1b = 0;
if (t1 < 0) { t1 = mVU->regAlloc->allocReg(); t1b = 1; }
SSE_SHUFPS_XMM_to_XMM (to, from, 0);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(from, to, 0xee);
if (min) SSE2_MINPD_XMM_to_XMM(to, from);
else SSE2_MAXPD_XMM_to_XMM(to, from);
}
void SSE_MAX2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
else { MIN_MAX_(to, from, 0); }
}
void SSE_MIN2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
else { MIN_MAX_(to, from, 1); }
}
void SSE_MAX2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(to, from, 0); }
}
void SSE_MIN2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(to, from, 1); }
SSE2_PSHUFD_XMM_to_XMM(t1, to, 0xee);
if (min) SSE2_MINPD_XMM_to_XMM(to, t1);
else SSE2_MAXPD_XMM_to_XMM(to, t1);
if (t1b) mVU->regAlloc->clearNeeded(t1);
}
// Warning: Modifies all vectors in 'to' and 'from', and Modifies xmmT1 and xmmT2
void SSE_ADD2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (!CHECK_VUADDSUBHACK) { SSE_ADDSS_XMM_to_XMM(to, from); return; }
u8 *localptr[8];
void ADD_SS(microVU* mVU, int to, int from, int t1, int t2) {
SSE_MOVAPS_XMM_to_XMM(xmmT1, to);
SSE_MOVAPS_XMM_to_XMM(xmmT2, from);
u8 *localptr[8];
bool t1b = 0, t2b = 0;
if (t1 < 0) { t1 = mVU->regAlloc->allocReg(); t1b = 1; }
if (t2 < 0) { t2 = mVU->regAlloc->allocReg(); t2b = 1; }
SSE_MOVAPS_XMM_to_XMM(t1, to);
SSE_MOVAPS_XMM_to_XMM(t2, from);
SSE2_MOVD_XMM_to_R(gprT2, to);
SHR32ItoR(gprT2, 23);
SSE2_MOVD_XMM_to_R(gprT1, from);
@ -435,15 +428,54 @@ void SSE_ADD2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
x86SetJ8(localptr[6]);
x86SetJ8(localptr[7]);
SSE_ANDPS_XMM_to_XMM(to, xmmT1); //to contains mask
SSE_ANDPS_XMM_to_XMM(from, xmmT2); //from contains mask
SSE_ANDPS_XMM_to_XMM(to, t1); // to contains mask
SSE_ANDPS_XMM_to_XMM(from, t2); // from contains mask
SSE_ADDSS_XMM_to_XMM(to, from);
if (t1b) mVU->regAlloc->clearNeeded(t1);
if (t2b) mVU->regAlloc->clearNeeded(t2);
}
// Note: Wrapper function, Tri-Ace Games just need the SS implementation
void SSE_ADD2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
void SSE_MAXPS(mV, int to, int from, int t1, int t2) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
}
void SSE_MINPS(mV, int to, int from, int t1, int t2) {
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
}
void SSE_MAXSS(mV, int to, int from, int t1, int t2) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(mVU, to, from, t1, 0); }
}
void SSE_MINSS(mV, int to, int from, int t1, int t2) {
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
}
void SSE_ADD2SS(mV, int to, int from, int t1, int t2) {
if (!CHECK_VUADDSUBHACK) { SSE_ADDSS_XMM_to_XMM(to, from); }
else { ADD_SS(mVU, to, from, t1, t2); }
}
void SSE_ADD2PS(mV, int to, int from, int t1, int t2) {
SSE_ADDPS_XMM_to_XMM(to, from);
}
void SSE_ADDPS(mV, int to, int from, int t1, int t2) {
SSE_ADDPS_XMM_to_XMM(to, from);
}
void SSE_ADDSS(mV, int to, int from, int t1, int t2) {
SSE_ADDSS_XMM_to_XMM(to, from);
}
void SSE_SUBPS(mV, int to, int from, int t1, int t2) {
SSE_SUBPS_XMM_to_XMM(to, from);
}
void SSE_SUBSS(mV, int to, int from, int t1, int t2) {
SSE_SUBSS_XMM_to_XMM(to, from);
}
void SSE_MULPS(mV, int to, int from, int t1, int t2) {
SSE_MULPS_XMM_to_XMM(to, from);
}
void SSE_MULSS(mV, int to, int from, int t1, int t2) {
SSE_MULSS_XMM_to_XMM(to, from);
}
//------------------------------------------------------------------
// Micro VU - Custom Quick Search

View File

@ -81,20 +81,22 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1, bool modXYZW = 1) {
// Helper Macros and Functions
//------------------------------------------------------------------
static void (*SSE_PS[]) (x86SSERegType, x86SSERegType) = {
SSE_ADDPS_XMM_to_XMM, // 0
SSE_SUBPS_XMM_to_XMM, // 1
SSE_MULPS_XMM_to_XMM, // 2
SSE_MAXPS_XMM_to_XMM, // 3
SSE_MINPS_XMM_to_XMM // 4
static void (*SSE_PS[]) (microVU*, int, int, int, int) = {
SSE_ADDPS, // 0
SSE_SUBPS, // 1
SSE_MULPS, // 2
SSE_MAXPS, // 3
SSE_MINPS, // 4
SSE_ADD2PS // 5
};
static void (*SSE_SS[]) (x86SSERegType, x86SSERegType) = {
SSE_ADDSS_XMM_to_XMM, // 0
SSE_SUBSS_XMM_to_XMM, // 1
SSE_MULSS_XMM_to_XMM, // 2
SSE_MAXSS_XMM_to_XMM, // 3
SSE_MINSS_XMM_to_XMM // 4
static void (*SSE_SS[]) (microVU*, int, int, int, int) = {
SSE_ADDSS, // 0
SSE_SUBSS, // 1
SSE_MULSS, // 2
SSE_MAXSS, // 3
SSE_MINSS, // 4
SSE_ADD2SS // 5
};
// Prints Opcode to MicroProgram Logs
@ -147,8 +149,8 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
if (_XYZW_SS) SSE_SS[opType](Fs, Ft);
else SSE_PS[opType](Fs, Ft);
if (_XYZW_SS) SSE_SS[opType](mVU, Fs, Ft, -1, -1);
else SSE_PS[opType](mVU, Fs, Ft, -1, -1);
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
@ -185,22 +187,22 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
if (_XYZW_SS) SSE_SS[2](Fs, Ft);
else SSE_PS[2](Fs, Ft);
if (_XYZW_SS) SSE_SS[2](mVU, Fs, Ft, -1, -1);
else SSE_PS[2](mVU, Fs, Ft, -1, -1);
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
if (_XYZW_SS || _X_Y_Z_W == 0xf) {
if (_XYZW_SS) SSE_SS[opType](ACC, Fs);
else SSE_PS[opType](ACC, Fs);
if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, -1, -1);
else SSE_PS[opType](mVU, ACC, Fs, -1, -1);
mVUupdateFlags(mVU, ACC, Fs);
if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W));
}
else {
int tempACC = mVU->regAlloc->allocReg();
SSE_MOVAPS_XMM_to_XMM(tempACC, ACC);
SSE_PS[opType](tempACC, Fs);
SSE_PS[opType](mVU, tempACC, Fs, -1, -1);
mVUmergeRegs(ACC, tempACC, _X_Y_Z_W);
mVU->regAlloc->clearNeeded(tempACC);
}
@ -229,8 +231,8 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) {
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
if (_XYZW_SS) { SSE_SS[2](Fs, Ft); SSE_SS[0](Fs, ACC); }
else { SSE_PS[2](Fs, Ft); SSE_PS[0](Fs, ACC); }
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, -1, -1); }
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, -1, -1); }
mVUupdateFlags(mVU, Fs, -1);
@ -260,8 +262,8 @@ void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) {
opCase1 { if (_XYZW_SS && _X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS(_X_Y_Z_W)); } }
opCase2 { if (_XYZW_SS && (!_bc_x)) { SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, shuffleSS((1 << (3 - _bc_)))); } }
if (_XYZW_SS) { SSE_SS[2](Fs, Ft); SSE_SS[1](Fd, Fs); }
else { SSE_PS[2](Fs, Ft); SSE_PS[1](Fd, Fs); }
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, -1, -1); }
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, -1, -1); }
mVUupdateFlags(mVU, Fd, Fs);
@ -315,8 +317,8 @@ mVUop(mVU_OPMSUB) {
pass1 { mVUanalyzeFMAC1(mVU, _Fd_, _Fs_, _Ft_); }
pass2 {
mVU->regAlloc->reset(); // Reset for Testing
int Ft = mVU->regAlloc->allocReg(_Ft_);
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf);
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
int ACC = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
@ -325,7 +327,7 @@ mVUop(mVU_OPMSUB) {
SSE_SUBPS_XMM_to_XMM(ACC, Fs);
mVUupdateFlags(mVU, ACC, Fs);
mVU->regAlloc->clearNeeded(32);
mVU->regAlloc->clearNeeded(ACC);
mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(Ft);
mVU->regAlloc->flushAll(); // Flush All for Testing
@ -386,8 +388,8 @@ mVUop(mVU_CLIP) {
pass1 { mVUanalyzeFMAC4(mVU, _Fs_, _Ft_); }
pass2 {
mVU->regAlloc->reset(); // Reset for Testing
int Fs = mVU->regAlloc->allocReg(_Fs_);
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 1);
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0x1);
int t1 = mVU->regAlloc->allocReg();
mVUunpack_xyzw(Ft, Ft, 0);
@ -429,7 +431,7 @@ mVUop(mVU_CLIP) {
//------------------------------------------------------------------
mVUop(mVU_ADD) { mVU_FMACa(mVU, recPass, 1, 0, 0, "ADD"); }
mVUop(mVU_ADDi) { mVU_FMACa(mVU, recPass, 3, 0, 0, "ADDi"); }
mVUop(mVU_ADDi) { mVU_FMACa(mVU, recPass, 3, 5, 0, "ADDi"); }
mVUop(mVU_ADDq) { mVU_FMACa(mVU, recPass, 4, 0, 0, "ADDq"); }
mVUop(mVU_ADDx) { mVU_FMACa(mVU, recPass, 2, 0, 0, "ADDx"); }
mVUop(mVU_ADDy) { mVU_FMACa(mVU, recPass, 2, 0, 0, "ADDy"); }