mVU: Experimented with some code to clamp every ADD/SUB/MUL/DIV operation.

Code is off by default, broke a lot of games...

This confirms my theory that the best way to handle clamping is to limit the clamping to places we've tested fixes games.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1796 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-09-09 05:57:35 +00:00
parent c6a33eb2f0
commit fa4fdcfacd
4 changed files with 113 additions and 80 deletions

View File

@ -75,8 +75,8 @@ mVUop(mVU_DIV) {
djmp = JMP8(0); djmp = JMP8(0);
x86SetJ8(cjmp); x86SetJ8(cjmp);
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
SSE_DIVSS_XMM_to_XMM(Fs, Ft); SSE_DIVSS(mVU, Fs, Ft);
mVUclamp1(Fs, t1, 8); mVUclamp1(Fs, t1, 8, 1);
x86SetJ8(djmp); x86SetJ8(djmp);
writeQreg(Fs, mVUinfo.writeQ); writeQreg(Fs, mVUinfo.writeQ);
@ -134,8 +134,8 @@ mVUop(mVU_RSQRT) {
djmp = JMP8(0); djmp = JMP8(0);
x86SetJ8(ajmp); x86SetJ8(ajmp);
SSE_DIVSS_XMM_to_XMM(Fs, Ft); SSE_DIVSS(mVU, Fs, Ft);
mVUclamp1(Fs, t1, 8); mVUclamp1(Fs, t1, 8, 1);
x86SetJ8(djmp); x86SetJ8(djmp);
writeQreg(Fs, mVUinfo.writeQ); writeQreg(Fs, mVUinfo.writeQ);
@ -152,11 +152,11 @@ mVUop(mVU_RSQRT) {
//------------------------------------------------------------------ //------------------------------------------------------------------
#define EATANhelper(addr) { \ #define EATANhelper(addr) { \
SSE_MULSS_XMM_to_XMM (t2, Fs); \ SSE_MULSS(mVU, t2, Fs); \
SSE_MULSS_XMM_to_XMM (t2, Fs); \ SSE_MULSS(mVU, t2, Fs); \
SSE_MOVAPS_XMM_to_XMM(t1, t2); \ SSE_MOVAPS_XMM_to_XMM(t1, t2); \
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \ SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM (PQ, t1); \ SSE_ADDSS(mVU, PQ, t1); \
} }
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d) // ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
@ -185,7 +185,7 @@ mVUop(mVU_EATAN) {
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE_SUBSS_M32_to_XMM (Fs, (uptr)mVU_one); SSE_SUBSS_M32_to_XMM (Fs, (uptr)mVU_one);
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one); SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ); SSE_DIVSS (mVU, Fs, xmmPQ);
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2); mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t1);
@ -203,9 +203,9 @@ mVUop(mVU_EATANxy) {
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x01); SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x01);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE_SUBSS_XMM_to_XMM (Fs, t1); // y-x, not y-1? >< SSE_SUBSS (mVU, Fs, t1); // y-x, not y-1? ><
SSE_ADDSS_XMM_to_XMM (t1, xmmPQ); SSE_ADDSS (mVU, t1, xmmPQ);
SSE_DIVSS_XMM_to_XMM (Fs, t1); SSE_DIVSS (mVU, Fs, t1);
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2); mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t1);
@ -223,9 +223,9 @@ mVUop(mVU_EATANxz) {
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x02); SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x02);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE_SUBSS_XMM_to_XMM (Fs, t1); SSE_SUBSS (mVU, Fs, t1);
SSE_ADDSS_XMM_to_XMM (t1, xmmPQ); SSE_ADDSS (mVU, t1, xmmPQ);
SSE_DIVSS_XMM_to_XMM (Fs, t1); SSE_DIVSS (mVU, Fs, t1);
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2); mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t1);
@ -235,10 +235,10 @@ mVUop(mVU_EATANxz) {
} }
#define eexpHelper(addr) { \ #define eexpHelper(addr) { \
SSE_MULSS_XMM_to_XMM (t2, Fs); \ SSE_MULSS(mVU, t2, Fs); \
SSE_MOVAPS_XMM_to_XMM(t1, t2); \ SSE_MOVAPS_XMM_to_XMM(t1, t2); \
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \ SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM (xmmPQ, t1); \ SSE_ADDSS(mVU, xmmPQ, t1); \
} }
mVUop(mVU_EEXP) { mVUop(mVU_EEXP) {
@ -252,20 +252,20 @@ mVUop(mVU_EEXP) {
SSE_MULSS_M32_to_XMM (xmmPQ, (uptr)mVU_E1); SSE_MULSS_M32_to_XMM (xmmPQ, (uptr)mVU_E1);
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one); SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
SSE_MOVAPS_XMM_to_XMM (t1, Fs); SSE_MOVAPS_XMM_to_XMM (t1, Fs);
SSE_MULSS_XMM_to_XMM (t1, Fs); SSE_MULSS (mVU, t1, Fs);
SSE_MOVAPS_XMM_to_XMM (t2, t1); SSE_MOVAPS_XMM_to_XMM (t2, t1);
SSE_MULSS_M32_to_XMM (t1, (uptr)mVU_E2); SSE_MULSS_M32_to_XMM (t1, (uptr)mVU_E2);
SSE_ADDSS_XMM_to_XMM (xmmPQ, t1); SSE_ADDSS (mVU, xmmPQ, t1);
eexpHelper(mVU_E3); eexpHelper(mVU_E3);
eexpHelper(mVU_E4); eexpHelper(mVU_E4);
eexpHelper(mVU_E5); eexpHelper(mVU_E5);
SSE_MULSS_XMM_to_XMM (t2, Fs); SSE_MULSS (mVU, t2, Fs);
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_E6); SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_E6);
SSE_ADDSS_XMM_to_XMM (xmmPQ, t2); SSE_ADDSS (mVU, xmmPQ, t2);
SSE_MULSS_XMM_to_XMM (xmmPQ, xmmPQ); SSE_MULSS (mVU, xmmPQ, xmmPQ);
SSE_MULSS_XMM_to_XMM (xmmPQ, xmmPQ); SSE_MULSS (mVU, xmmPQ, xmmPQ);
SSE_MOVSS_M32_to_XMM (t2, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM (t2, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM (t2, xmmPQ); SSE_DIVSS (mVU, t2, xmmPQ);
SSE_MOVSS_XMM_to_XMM (xmmPQ, t2); SSE_MOVSS_XMM_to_XMM (xmmPQ, t2);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
@ -276,18 +276,18 @@ mVUop(mVU_EEXP) {
} }
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2 // sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
microVUt(void) mVU_sumXYZ(int PQ, int Fs) { microVUt(void) mVU_sumXYZ(mV, int PQ, int Fs) {
if( x86caps.hasStreamingSIMD4Extensions ) { if( x86caps.hasStreamingSIMD4Extensions ) {
SSE4_DPPS_XMM_to_XMM(Fs, Fs, 0x71); SSE4_DPPS_XMM_to_XMM(Fs, Fs, 0x71);
SSE_MOVSS_XMM_to_XMM(PQ, Fs); SSE_MOVSS_XMM_to_XMM(PQ, Fs);
} }
else { else {
SSE_MULPS_XMM_to_XMM (Fs, Fs); // wzyx ^ 2 SSE_MULPS (mVU, Fs, Fs); // wzyx ^ 2
SSE_MOVSS_XMM_to_XMM (PQ, Fs); // x ^ 2 SSE_MOVSS_XMM_to_XMM (PQ, Fs); // x ^ 2
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xe1); // wzyx -> wzxy SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xe1); // wzyx -> wzxy
SSE_ADDSS_XMM_to_XMM (PQ, Fs); // x ^ 2 + y ^ 2 SSE_ADDSS (mVU, PQ, Fs); // x ^ 2 + y ^ 2
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xD2); // wzxy -> wxyz SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xD2); // wzxy -> wxyz
SSE_ADDSS_XMM_to_XMM (PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2 SSE_ADDSS (mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
} }
} }
@ -296,7 +296,7 @@ mVUop(mVU_ELENG) {
pass2 { pass2 {
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(xmmPQ, Fs); mVU_sumXYZ (mVU, xmmPQ, Fs);
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ); SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
@ -311,7 +311,7 @@ mVUop(mVU_ERCPR) {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ); SSE_DIVSS (mVU, Fs, xmmPQ);
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
@ -324,10 +324,10 @@ mVUop(mVU_ERLENG) {
pass2 { pass2 {
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(xmmPQ, Fs); mVU_sumXYZ (mVU, xmmPQ, Fs);
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ); SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ); SSE_DIVSS (mVU, Fs, xmmPQ);
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
@ -340,9 +340,9 @@ mVUop(mVU_ERSADD) {
pass2 { pass2 {
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(xmmPQ, Fs); mVU_sumXYZ (mVU, xmmPQ, Fs);
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ); SSE_DIVSS (mVU, Fs, xmmPQ);
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
@ -358,7 +358,7 @@ mVUop(mVU_ERSQRT) {
SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip); SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip);
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ); SSE_DIVSS (mVU, Fs, xmmPQ);
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
@ -371,7 +371,7 @@ mVUop(mVU_ESADD) {
pass2 { pass2 {
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
mVU_sumXYZ(xmmPQ, Fs); mVU_sumXYZ(mVU, xmmPQ, Fs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
} }
@ -379,10 +379,10 @@ mVUop(mVU_ESADD) {
} }
#define esinHelper(addr) { \ #define esinHelper(addr) { \
SSE_MULSS_XMM_to_XMM (t2, t1); \ SSE_MULSS(mVU, t2, t1); \
SSE_MOVAPS_XMM_to_XMM(Fs, t2); \ SSE_MOVAPS_XMM_to_XMM(Fs, t2); \
SSE_MULSS_M32_to_XMM (Fs, (uptr)addr); \ SSE_MULSS_M32_to_XMM (Fs, (uptr)addr); \
SSE_ADDSS_XMM_to_XMM (xmmPQ, Fs); \ SSE_ADDSS(mVU, xmmPQ, Fs); \
} }
mVUop(mVU_ESIN) { mVUop(mVU_ESIN) {
@ -394,17 +394,17 @@ mVUop(mVU_ESIN) {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE_MOVAPS_XMM_to_XMM (t1, Fs); SSE_MOVAPS_XMM_to_XMM (t1, Fs);
SSE_MULSS_XMM_to_XMM (Fs, t1); SSE_MULSS (mVU, Fs, t1);
SSE_MOVAPS_XMM_to_XMM (t2, Fs); SSE_MOVAPS_XMM_to_XMM (t2, Fs);
SSE_MULSS_XMM_to_XMM (Fs, t1); SSE_MULSS (mVU, Fs, t1);
SSE_MOVAPS_XMM_to_XMM (t1, Fs); SSE_MOVAPS_XMM_to_XMM (t1, Fs);
SSE_MULSS_M32_to_XMM (Fs, (uptr)mVU_S2); SSE_MULSS_M32_to_XMM (Fs, (uptr)mVU_S2);
SSE_ADDSS_XMM_to_XMM (xmmPQ, Fs); SSE_ADDSS (mVU, xmmPQ, Fs);
esinHelper(mVU_S3); esinHelper(mVU_S3);
esinHelper(mVU_S4); esinHelper(mVU_S4);
SSE_MULSS_XMM_to_XMM (t2, t1); SSE_MULSS (mVU, t2, t1);
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_S5); SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_S5);
SSE_ADDSS_XMM_to_XMM (xmmPQ, t2); SSE_ADDSS (mVU, xmmPQ, t2);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(t1); mVU->regAlloc->clearNeeded(t1);
@ -433,9 +433,9 @@ mVUop(mVU_ESUM) {
int t1 = mVU->regAlloc->allocReg(); int t1 = mVU->regAlloc->allocReg();
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x1b); SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x1b);
SSE_ADDPS_XMM_to_XMM (Fs, t1); SSE_ADDPS (mVU, Fs, t1);
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x01); SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x01);
SSE_ADDSS_XMM_to_XMM (Fs, t1); SSE_ADDSS (mVU, Fs, t1);
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);

View File

@ -220,6 +220,16 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
#define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4)))) #define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
#define _1mb (0x100000) #define _1mb (0x100000)
//#undef CHECK_VU_OVERFLOW
//#undef CHECK_VU_EXTRA_OVERFLOW
//#undef CHECK_VU_SIGN_OVERFLOW
//#undef CHECK_VU_UNDERFLOW
//#define CHECK_VU_OVERFLOW 1
//#define CHECK_VU_EXTRA_OVERFLOW 1
//#define CHECK_VU_SIGN_OVERFLOW 1
//#define CHECK_VU_UNDERFLOW 1
#define clampE 0//CHECK_VU_EXTRA_OVERFLOW
// Flag Info // Flag Info
#define __Status (mVUregs.needExactMatch & 1) #define __Status (mVUregs.needExactMatch & 1)
#define __Mac (mVUregs.needExactMatch & 2) #define __Mac (mVUregs.needExactMatch & 2)
@ -271,12 +281,12 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
// Cache Limit Check // Cache Limit Check
#define mVUcacheCheck(ptr, start, limit) { \ #define mVUcacheCheck(ptr, start, limit) { \
uptr diff = ptr - start; \ uptr diff = ptr - start; \
if (diff >= limit) { \ if (diff >= limit) { \
Console::Status("microVU%d: Program cache limit reached. Size = 0x%x", mVU->index, diff); \ Console::Status("microVU%d: Program cache limit reached. Size = 0x%x", mVU->index, diff); \
mVUreset(mVU); \ mVUreset(mVU); \
} \ } \
} }
#define mVUdebugNOW(isEndPC) { \ #define mVUdebugNOW(isEndPC) { \

View File

@ -23,8 +23,8 @@
//------------------------------------------------------------------ //------------------------------------------------------------------
// Used for Result Clamping // Used for Result Clamping
void mVUclamp1(int reg, int regT1, int xyzw) { void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
if (CHECK_VU_OVERFLOW) { if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
switch (xyzw) { switch (xyzw) {
case 1: case 2: case 4: case 8: case 1: case 2: case 4: case 8:
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals); SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
@ -39,8 +39,8 @@ void mVUclamp1(int reg, int regT1, int xyzw) {
} }
// Used for Operand Clamping // Used for Operand Clamping
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw) { void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
if (CHECK_VU_SIGN_OVERFLOW) { if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE)) {
int regT1b = 0; int regT1b = 0;
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; } if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; }
switch (xyzw) { switch (xyzw) {
@ -61,7 +61,15 @@ void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw) {
} }
if (regT1b) mVU->regAlloc->clearNeeded(regT1); if (regT1b) mVU->regAlloc->clearNeeded(regT1);
} }
else mVUclamp1(reg, regT1, xyzw); else mVUclamp1(reg, regT1, xyzw, bClampE);
}
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
}
void mVUclamp4(int reg, int regT1, int xyzw) {
if (clampE) mVUclamp1(reg, regT1, xyzw, 1);
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -444,46 +452,60 @@ void ADD_SS(microVU* mVU, int to, int from, int t1, int t2) {
if (t2b) mVU->regAlloc->clearNeeded(t2); if (t2b) mVU->regAlloc->clearNeeded(t2);
} }
void SSE_MAXPS(mV, int to, int from, int t1, int t2) { #define clampOp(opX) { \
/*mVUclamp3(mVU, to, t1, 0xf);*/ \
/*mVUclamp3(mVU, from, t1, 0xf);*/ \
opX(to, from); \
/*mVUclamp4(to, t1, 0xf);*/ \
}
void SSE_MAXPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); } if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); } else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
} }
void SSE_MINPS(mV, int to, int from, int t1, int t2) { void SSE_MINPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); } if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); } else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
} }
void SSE_MAXSS(mV, int to, int from, int t1, int t2) { void SSE_MAXSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); } if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(mVU, to, from, t1, 0); } else { MIN_MAX_SS(mVU, to, from, t1, 0); }
} }
void SSE_MINSS(mV, int to, int from, int t1, int t2) { void SSE_MINSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); } if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(mVU, to, from, t1, 1); } else { MIN_MAX_SS(mVU, to, from, t1, 1); }
} }
void SSE_ADD2SS(mV, int to, int from, int t1, int t2) { void SSE_ADD2SS(mV, int to, int from, int t1 = -1, int t2 = -1) {
if (!CHECK_VUADDSUBHACK) { SSE_ADDSS_XMM_to_XMM(to, from); } if (!CHECK_VUADDSUBHACK) { clampOp(SSE_ADDSS_XMM_to_XMM); }
else { ADD_SS(mVU, to, from, t1, t2); } else { ADD_SS(mVU, to, from, t1, t2); }
} }
void SSE_ADD2PS(mV, int to, int from, int t1, int t2) {
SSE_ADDPS_XMM_to_XMM(to, from); void SSE_ADD2PS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_ADDPS_XMM_to_XMM);
} }
void SSE_ADDPS(mV, int to, int from, int t1, int t2) { void SSE_ADDPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
SSE_ADDPS_XMM_to_XMM(to, from); clampOp(SSE_ADDPS_XMM_to_XMM);
} }
void SSE_ADDSS(mV, int to, int from, int t1, int t2) { void SSE_ADDSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
SSE_ADDSS_XMM_to_XMM(to, from); clampOp(SSE_ADDSS_XMM_to_XMM);
} }
void SSE_SUBPS(mV, int to, int from, int t1, int t2) { void SSE_SUBPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
SSE_SUBPS_XMM_to_XMM(to, from); clampOp(SSE_SUBPS_XMM_to_XMM);
} }
void SSE_SUBSS(mV, int to, int from, int t1, int t2) { void SSE_SUBSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
SSE_SUBSS_XMM_to_XMM(to, from); clampOp(SSE_SUBSS_XMM_to_XMM);
} }
void SSE_MULPS(mV, int to, int from, int t1, int t2) { void SSE_MULPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
SSE_MULPS_XMM_to_XMM(to, from); clampOp(SSE_MULPS_XMM_to_XMM);
} }
void SSE_MULSS(mV, int to, int from, int t1, int t2) { void SSE_MULSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
SSE_MULSS_XMM_to_XMM(to, from); clampOp(SSE_MULSS_XMM_to_XMM);
}
void SSE_DIVPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_DIVPS_XMM_to_XMM);
}
void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_DIVSS_XMM_to_XMM);
} }
//------------------------------------------------------------------ //------------------------------------------------------------------

View File

@ -132,8 +132,9 @@ bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) {
// Sets Up Ft Reg for Normal, BC, I, and Q Cases // Sets Up Ft Reg for Normal, BC, I, and Q Cases
void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) { void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
opCase1 { opCase1 {
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; } else if (clampE) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; }
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
} }
opCase2 { opCase2 {
tempFt = mVU->regAlloc->allocReg(_Ft_); tempFt = mVU->regAlloc->allocReg(_Ft_);
@ -144,7 +145,7 @@ void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
} }
opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; } opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; }
opCase4 { opCase4 {
if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; } if (!clampE && _XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); } else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }
} }
} }
@ -301,7 +302,7 @@ mVUop(mVU_OPMULA) {
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
SSE_MULPS_XMM_to_XMM(Fs, Ft); SSE_MULPS(mVU, Fs, Ft);
mVU->regAlloc->clearNeeded(Ft); mVU->regAlloc->clearNeeded(Ft);
mVUupdateFlags(mVU, Fs); mVUupdateFlags(mVU, Fs);
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
@ -320,8 +321,8 @@ mVUop(mVU_OPMSUB) {
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
SSE_MULPS_XMM_to_XMM(Fs, Ft); SSE_MULPS(mVU, Fs, Ft);
SSE_SUBPS_XMM_to_XMM(ACC, Fs); SSE_SUBPS(mVU, ACC, Fs);
mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(Ft); mVU->regAlloc->clearNeeded(Ft);
mVUupdateFlags(mVU, ACC); mVUupdateFlags(mVU, ACC);