- Fixed the bugs with the logical min/max code (DaZ should work fine again)
- Optimized some lower instructions a bit.


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1220 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-05-18 11:13:30 +00:00
parent 5f392aba63
commit 3019f7d02c
2 changed files with 20 additions and 19 deletions

View File

@ -73,8 +73,9 @@ microVUf(void) mVU_DIV() {
mVUclamp1<vuIndex>(xmmFs, xmmFt, 8);
x86SetJ8(djmp);
mVUunpack_xyzw<vuIndex>(xmmFs, xmmFs, 0);
mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
}
pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
}
@ -91,8 +92,9 @@ microVUf(void) mVU_SQRT() {
if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
mVUunpack_xyzw<vuIndex>(xmmFt, xmmFt, 0);
mVUmergeRegs(xmmPQ, xmmFt, writeQ ? 4 : 8);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFt);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
}
pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); }
}
@ -129,8 +131,9 @@ microVUf(void) mVU_RSQRT() {
mVUclamp1<vuIndex>(xmmFs, xmmFt, 8);
x86SetJ8(djmp);
mVUunpack_xyzw<vuIndex>(xmmFs, xmmFs, 0);
mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs);
if (writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
}
pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
}

View File

@ -313,34 +313,33 @@ microVUt(void) mVUcheckSflag(int progIndex) {
}
}
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0x7fffffff, 0x80000000, 0x7fffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
// Warning: Modifies xmmT1 and xmmT2
void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
// XY
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0x50);
// ZW
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0x50);
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x88);
// ZW
SSE2_PSHUFD_XMM_to_XMM(to, to, 0xfa);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0xfa);
// XY
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0x50);
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(to, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(to, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x88);
SSE2_MOVSD_XMM_to_XMM (to, xmmT1);
SSE_SHUFPS_XMM_to_XMM(to, xmmT1, 0x88);
}
// Warning: Modifies from and to's upper 3 vectors
@ -370,5 +369,4 @@ void SSE_MIN2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
else { MIN_MAX_SS(to, from, 1); }
}
#endif //PCSX2_MICROVU