mirror of https://github.com/PCSX2/pcsx2.git
mVU: Experimented with some code to clamp every ADD/SUB/MUL/DIV operation.
Code is off by default, broke a lot of games... This confirms my theory that the best way to handle clamping is to limit the clamping to places we've tested fixes games. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1796 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c6a33eb2f0
commit
fa4fdcfacd
|
@ -75,8 +75,8 @@ mVUop(mVU_DIV) {
|
|||
djmp = JMP8(0);
|
||||
x86SetJ8(cjmp);
|
||||
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
||||
SSE_DIVSS_XMM_to_XMM(Fs, Ft);
|
||||
mVUclamp1(Fs, t1, 8);
|
||||
SSE_DIVSS(mVU, Fs, Ft);
|
||||
mVUclamp1(Fs, t1, 8, 1);
|
||||
x86SetJ8(djmp);
|
||||
|
||||
writeQreg(Fs, mVUinfo.writeQ);
|
||||
|
@ -134,8 +134,8 @@ mVUop(mVU_RSQRT) {
|
|||
|
||||
djmp = JMP8(0);
|
||||
x86SetJ8(ajmp);
|
||||
SSE_DIVSS_XMM_to_XMM(Fs, Ft);
|
||||
mVUclamp1(Fs, t1, 8);
|
||||
SSE_DIVSS(mVU, Fs, Ft);
|
||||
mVUclamp1(Fs, t1, 8, 1);
|
||||
x86SetJ8(djmp);
|
||||
|
||||
writeQreg(Fs, mVUinfo.writeQ);
|
||||
|
@ -152,11 +152,11 @@ mVUop(mVU_RSQRT) {
|
|||
//------------------------------------------------------------------
|
||||
|
||||
#define EATANhelper(addr) { \
|
||||
SSE_MULSS_XMM_to_XMM (t2, Fs); \
|
||||
SSE_MULSS_XMM_to_XMM (t2, Fs); \
|
||||
SSE_MULSS(mVU, t2, Fs); \
|
||||
SSE_MULSS(mVU, t2, Fs); \
|
||||
SSE_MOVAPS_XMM_to_XMM(t1, t2); \
|
||||
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
|
||||
SSE_ADDSS_XMM_to_XMM (PQ, t1); \
|
||||
SSE_ADDSS(mVU, PQ, t1); \
|
||||
}
|
||||
|
||||
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
|
||||
|
@ -185,7 +185,7 @@ mVUop(mVU_EATAN) {
|
|||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE_SUBSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
||||
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
mVU->regAlloc->clearNeeded(t1);
|
||||
|
@ -203,9 +203,9 @@ mVUop(mVU_EATANxy) {
|
|||
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x01);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE_SUBSS_XMM_to_XMM (Fs, t1); // y-x, not y-1? ><
|
||||
SSE_ADDSS_XMM_to_XMM (t1, xmmPQ);
|
||||
SSE_DIVSS_XMM_to_XMM (Fs, t1);
|
||||
SSE_SUBSS (mVU, Fs, t1); // y-x, not y-1? ><
|
||||
SSE_ADDSS (mVU, t1, xmmPQ);
|
||||
SSE_DIVSS (mVU, Fs, t1);
|
||||
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
mVU->regAlloc->clearNeeded(t1);
|
||||
|
@ -223,9 +223,9 @@ mVUop(mVU_EATANxz) {
|
|||
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x02);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE_SUBSS_XMM_to_XMM (Fs, t1);
|
||||
SSE_ADDSS_XMM_to_XMM (t1, xmmPQ);
|
||||
SSE_DIVSS_XMM_to_XMM (Fs, t1);
|
||||
SSE_SUBSS (mVU, Fs, t1);
|
||||
SSE_ADDSS (mVU, t1, xmmPQ);
|
||||
SSE_DIVSS (mVU, Fs, t1);
|
||||
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
mVU->regAlloc->clearNeeded(t1);
|
||||
|
@ -235,10 +235,10 @@ mVUop(mVU_EATANxz) {
|
|||
}
|
||||
|
||||
#define eexpHelper(addr) { \
|
||||
SSE_MULSS_XMM_to_XMM (t2, Fs); \
|
||||
SSE_MULSS(mVU, t2, Fs); \
|
||||
SSE_MOVAPS_XMM_to_XMM(t1, t2); \
|
||||
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
|
||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t1); \
|
||||
SSE_ADDSS(mVU, xmmPQ, t1); \
|
||||
}
|
||||
|
||||
mVUop(mVU_EEXP) {
|
||||
|
@ -252,20 +252,20 @@ mVUop(mVU_EEXP) {
|
|||
SSE_MULSS_M32_to_XMM (xmmPQ, (uptr)mVU_E1);
|
||||
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
|
||||
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
||||
SSE_MULSS_XMM_to_XMM (t1, Fs);
|
||||
SSE_MULSS (mVU, t1, Fs);
|
||||
SSE_MOVAPS_XMM_to_XMM (t2, t1);
|
||||
SSE_MULSS_M32_to_XMM (t1, (uptr)mVU_E2);
|
||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t1);
|
||||
SSE_ADDSS (mVU, xmmPQ, t1);
|
||||
eexpHelper(mVU_E3);
|
||||
eexpHelper(mVU_E4);
|
||||
eexpHelper(mVU_E5);
|
||||
SSE_MULSS_XMM_to_XMM (t2, Fs);
|
||||
SSE_MULSS (mVU, t2, Fs);
|
||||
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_E6);
|
||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t2);
|
||||
SSE_MULSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
||||
SSE_MULSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
||||
SSE_ADDSS (mVU, xmmPQ, t2);
|
||||
SSE_MULSS (mVU, xmmPQ, xmmPQ);
|
||||
SSE_MULSS (mVU, xmmPQ, xmmPQ);
|
||||
SSE_MOVSS_M32_to_XMM (t2, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM (t2, xmmPQ);
|
||||
SSE_DIVSS (mVU, t2, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, t2);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
@ -276,18 +276,18 @@ mVUop(mVU_EEXP) {
|
|||
}
|
||||
|
||||
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
||||
microVUt(void) mVU_sumXYZ(int PQ, int Fs) {
|
||||
microVUt(void) mVU_sumXYZ(mV, int PQ, int Fs) {
|
||||
if( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
SSE4_DPPS_XMM_to_XMM(Fs, Fs, 0x71);
|
||||
SSE_MOVSS_XMM_to_XMM(PQ, Fs);
|
||||
}
|
||||
else {
|
||||
SSE_MULPS_XMM_to_XMM (Fs, Fs); // wzyx ^ 2
|
||||
SSE_MULPS (mVU, Fs, Fs); // wzyx ^ 2
|
||||
SSE_MOVSS_XMM_to_XMM (PQ, Fs); // x ^ 2
|
||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xe1); // wzyx -> wzxy
|
||||
SSE_ADDSS_XMM_to_XMM (PQ, Fs); // x ^ 2 + y ^ 2
|
||||
SSE_ADDSS (mVU, PQ, Fs); // x ^ 2 + y ^ 2
|
||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xD2); // wzxy -> wxyz
|
||||
SSE_ADDSS_XMM_to_XMM (PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
|
||||
SSE_ADDSS (mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -296,7 +296,7 @@ mVUop(mVU_ELENG) {
|
|||
pass2 {
|
||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ(xmmPQ, Fs);
|
||||
mVU_sumXYZ (mVU, xmmPQ, Fs);
|
||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
@ -311,7 +311,7 @@ mVUop(mVU_ERCPR) {
|
|||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
||||
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
@ -324,10 +324,10 @@ mVUop(mVU_ERLENG) {
|
|||
pass2 {
|
||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ(xmmPQ, Fs);
|
||||
mVU_sumXYZ (mVU, xmmPQ, Fs);
|
||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
||||
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
@ -340,9 +340,9 @@ mVUop(mVU_ERSADD) {
|
|||
pass2 {
|
||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ(xmmPQ, Fs);
|
||||
mVU_sumXYZ (mVU, xmmPQ, Fs);
|
||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
||||
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
@ -358,7 +358,7 @@ mVUop(mVU_ERSQRT) {
|
|||
SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip);
|
||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
||||
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
@ -371,7 +371,7 @@ mVUop(mVU_ESADD) {
|
|||
pass2 {
|
||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
mVU_sumXYZ(xmmPQ, Fs);
|
||||
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
}
|
||||
|
@ -379,10 +379,10 @@ mVUop(mVU_ESADD) {
|
|||
}
|
||||
|
||||
#define esinHelper(addr) { \
|
||||
SSE_MULSS_XMM_to_XMM (t2, t1); \
|
||||
SSE_MULSS(mVU, t2, t1); \
|
||||
SSE_MOVAPS_XMM_to_XMM(Fs, t2); \
|
||||
SSE_MULSS_M32_to_XMM (Fs, (uptr)addr); \
|
||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, Fs); \
|
||||
SSE_ADDSS(mVU, xmmPQ, Fs); \
|
||||
}
|
||||
|
||||
mVUop(mVU_ESIN) {
|
||||
|
@ -394,17 +394,17 @@ mVUop(mVU_ESIN) {
|
|||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
||||
SSE_MULSS_XMM_to_XMM (Fs, t1);
|
||||
SSE_MULSS (mVU, Fs, t1);
|
||||
SSE_MOVAPS_XMM_to_XMM (t2, Fs);
|
||||
SSE_MULSS_XMM_to_XMM (Fs, t1);
|
||||
SSE_MULSS (mVU, Fs, t1);
|
||||
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
||||
SSE_MULSS_M32_to_XMM (Fs, (uptr)mVU_S2);
|
||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE_ADDSS (mVU, xmmPQ, Fs);
|
||||
esinHelper(mVU_S3);
|
||||
esinHelper(mVU_S4);
|
||||
SSE_MULSS_XMM_to_XMM (t2, t1);
|
||||
SSE_MULSS (mVU, t2, t1);
|
||||
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_S5);
|
||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t2);
|
||||
SSE_ADDSS (mVU, xmmPQ, t2);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
mVU->regAlloc->clearNeeded(t1);
|
||||
|
@ -433,9 +433,9 @@ mVUop(mVU_ESUM) {
|
|||
int t1 = mVU->regAlloc->allocReg();
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x1b);
|
||||
SSE_ADDPS_XMM_to_XMM (Fs, t1);
|
||||
SSE_ADDPS (mVU, Fs, t1);
|
||||
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x01);
|
||||
SSE_ADDSS_XMM_to_XMM (Fs, t1);
|
||||
SSE_ADDSS (mVU, Fs, t1);
|
||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
|
|
@ -220,6 +220,16 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
#define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
|
||||
#define _1mb (0x100000)
|
||||
|
||||
//#undef CHECK_VU_OVERFLOW
|
||||
//#undef CHECK_VU_EXTRA_OVERFLOW
|
||||
//#undef CHECK_VU_SIGN_OVERFLOW
|
||||
//#undef CHECK_VU_UNDERFLOW
|
||||
//#define CHECK_VU_OVERFLOW 1
|
||||
//#define CHECK_VU_EXTRA_OVERFLOW 1
|
||||
//#define CHECK_VU_SIGN_OVERFLOW 1
|
||||
//#define CHECK_VU_UNDERFLOW 1
|
||||
#define clampE 0//CHECK_VU_EXTRA_OVERFLOW
|
||||
|
||||
// Flag Info
|
||||
#define __Status (mVUregs.needExactMatch & 1)
|
||||
#define __Mac (mVUregs.needExactMatch & 2)
|
||||
|
@ -271,12 +281,12 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
|
||||
|
||||
// Cache Limit Check
|
||||
#define mVUcacheCheck(ptr, start, limit) { \
|
||||
uptr diff = ptr - start; \
|
||||
if (diff >= limit) { \
|
||||
#define mVUcacheCheck(ptr, start, limit) { \
|
||||
uptr diff = ptr - start; \
|
||||
if (diff >= limit) { \
|
||||
Console::Status("microVU%d: Program cache limit reached. Size = 0x%x", mVU->index, diff); \
|
||||
mVUreset(mVU); \
|
||||
} \
|
||||
mVUreset(mVU); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define mVUdebugNOW(isEndPC) { \
|
||||
|
|
|
@ -23,8 +23,8 @@
|
|||
//------------------------------------------------------------------
|
||||
|
||||
// Used for Result Clamping
|
||||
void mVUclamp1(int reg, int regT1, int xyzw) {
|
||||
if (CHECK_VU_OVERFLOW) {
|
||||
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
|
||||
|
@ -39,8 +39,8 @@ void mVUclamp1(int reg, int regT1, int xyzw) {
|
|||
}
|
||||
|
||||
// Used for Operand Clamping
|
||||
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||
if (CHECK_VU_SIGN_OVERFLOW) {
|
||||
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE)) {
|
||||
int regT1b = 0;
|
||||
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; }
|
||||
switch (xyzw) {
|
||||
|
@ -61,7 +61,15 @@ void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw) {
|
|||
}
|
||||
if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
||||
}
|
||||
else mVUclamp1(reg, regT1, xyzw);
|
||||
else mVUclamp1(reg, regT1, xyzw, bClampE);
|
||||
}
|
||||
|
||||
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
||||
void mVUclamp4(int reg, int regT1, int xyzw) {
|
||||
if (clampE) mVUclamp1(reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -444,46 +452,60 @@ void ADD_SS(microVU* mVU, int to, int from, int t1, int t2) {
|
|||
if (t2b) mVU->regAlloc->clearNeeded(t2);
|
||||
}
|
||||
|
||||
void SSE_MAXPS(mV, int to, int from, int t1, int t2) {
|
||||
#define clampOp(opX) { \
|
||||
/*mVUclamp3(mVU, to, t1, 0xf);*/ \
|
||||
/*mVUclamp3(mVU, from, t1, 0xf);*/ \
|
||||
opX(to, from); \
|
||||
/*mVUclamp4(to, t1, 0xf);*/ \
|
||||
}
|
||||
|
||||
void SSE_MAXPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
|
||||
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
|
||||
}
|
||||
void SSE_MINPS(mV, int to, int from, int t1, int t2) {
|
||||
void SSE_MINPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
|
||||
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
|
||||
}
|
||||
void SSE_MAXSS(mV, int to, int from, int t1, int t2) {
|
||||
void SSE_MAXSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
|
||||
else { MIN_MAX_SS(mVU, to, from, t1, 0); }
|
||||
}
|
||||
void SSE_MINSS(mV, int to, int from, int t1, int t2) {
|
||||
void SSE_MINSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
|
||||
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
|
||||
}
|
||||
void SSE_ADD2SS(mV, int to, int from, int t1, int t2) {
|
||||
if (!CHECK_VUADDSUBHACK) { SSE_ADDSS_XMM_to_XMM(to, from); }
|
||||
void SSE_ADD2SS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
if (!CHECK_VUADDSUBHACK) { clampOp(SSE_ADDSS_XMM_to_XMM); }
|
||||
else { ADD_SS(mVU, to, from, t1, t2); }
|
||||
}
|
||||
void SSE_ADD2PS(mV, int to, int from, int t1, int t2) {
|
||||
SSE_ADDPS_XMM_to_XMM(to, from);
|
||||
|
||||
void SSE_ADD2PS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_ADDPS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_ADDPS(mV, int to, int from, int t1, int t2) {
|
||||
SSE_ADDPS_XMM_to_XMM(to, from);
|
||||
void SSE_ADDPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_ADDPS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_ADDSS(mV, int to, int from, int t1, int t2) {
|
||||
SSE_ADDSS_XMM_to_XMM(to, from);
|
||||
void SSE_ADDSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_ADDSS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_SUBPS(mV, int to, int from, int t1, int t2) {
|
||||
SSE_SUBPS_XMM_to_XMM(to, from);
|
||||
void SSE_SUBPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_SUBPS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_SUBSS(mV, int to, int from, int t1, int t2) {
|
||||
SSE_SUBSS_XMM_to_XMM(to, from);
|
||||
void SSE_SUBSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_SUBSS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_MULPS(mV, int to, int from, int t1, int t2) {
|
||||
SSE_MULPS_XMM_to_XMM(to, from);
|
||||
void SSE_MULPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_MULPS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_MULSS(mV, int to, int from, int t1, int t2) {
|
||||
SSE_MULSS_XMM_to_XMM(to, from);
|
||||
void SSE_MULSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_MULSS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_DIVPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_DIVPS_XMM_to_XMM);
|
||||
}
|
||||
void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_DIVSS_XMM_to_XMM);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
|
|
@ -132,8 +132,9 @@ bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) {
|
|||
// Sets Up Ft Reg for Normal, BC, I, and Q Cases
|
||||
void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
|
||||
opCase1 {
|
||||
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
|
||||
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
|
||||
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
|
||||
else if (clampE) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; }
|
||||
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
|
||||
}
|
||||
opCase2 {
|
||||
tempFt = mVU->regAlloc->allocReg(_Ft_);
|
||||
|
@ -144,7 +145,7 @@ void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
|
|||
}
|
||||
opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; }
|
||||
opCase4 {
|
||||
if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
|
||||
if (!clampE && _XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
|
||||
else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }
|
||||
}
|
||||
}
|
||||
|
@ -301,7 +302,7 @@ mVUop(mVU_OPMULA) {
|
|||
|
||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
||||
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
||||
SSE_MULPS_XMM_to_XMM(Fs, Ft);
|
||||
SSE_MULPS(mVU, Fs, Ft);
|
||||
mVU->regAlloc->clearNeeded(Ft);
|
||||
mVUupdateFlags(mVU, Fs);
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
|
@ -320,8 +321,8 @@ mVUop(mVU_OPMSUB) {
|
|||
|
||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
||||
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
||||
SSE_MULPS_XMM_to_XMM(Fs, Ft);
|
||||
SSE_SUBPS_XMM_to_XMM(ACC, Fs);
|
||||
SSE_MULPS(mVU, Fs, Ft);
|
||||
SSE_SUBPS(mVU, ACC, Fs);
|
||||
mVU->regAlloc->clearNeeded(Fs);
|
||||
mVU->regAlloc->clearNeeded(Ft);
|
||||
mVUupdateFlags(mVU, ACC);
|
||||
|
|
Loading…
Reference in New Issue