mirror of https://github.com/PCSX2/pcsx2.git
mVU: Experimented with some code to clamp every ADD/SUB/MUL/DIV operation.
Code is off by default, broke a lot of games... This confirms my theory that the best way to handle clamping is to limit the clamping to places we've tested fixes games. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1796 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c6a33eb2f0
commit
fa4fdcfacd
|
@ -75,8 +75,8 @@ mVUop(mVU_DIV) {
|
||||||
djmp = JMP8(0);
|
djmp = JMP8(0);
|
||||||
x86SetJ8(cjmp);
|
x86SetJ8(cjmp);
|
||||||
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
MOV32ItoM((uptr)&mVU->divFlag, 0); // Clear I/D flags
|
||||||
SSE_DIVSS_XMM_to_XMM(Fs, Ft);
|
SSE_DIVSS(mVU, Fs, Ft);
|
||||||
mVUclamp1(Fs, t1, 8);
|
mVUclamp1(Fs, t1, 8, 1);
|
||||||
x86SetJ8(djmp);
|
x86SetJ8(djmp);
|
||||||
|
|
||||||
writeQreg(Fs, mVUinfo.writeQ);
|
writeQreg(Fs, mVUinfo.writeQ);
|
||||||
|
@ -134,8 +134,8 @@ mVUop(mVU_RSQRT) {
|
||||||
|
|
||||||
djmp = JMP8(0);
|
djmp = JMP8(0);
|
||||||
x86SetJ8(ajmp);
|
x86SetJ8(ajmp);
|
||||||
SSE_DIVSS_XMM_to_XMM(Fs, Ft);
|
SSE_DIVSS(mVU, Fs, Ft);
|
||||||
mVUclamp1(Fs, t1, 8);
|
mVUclamp1(Fs, t1, 8, 1);
|
||||||
x86SetJ8(djmp);
|
x86SetJ8(djmp);
|
||||||
|
|
||||||
writeQreg(Fs, mVUinfo.writeQ);
|
writeQreg(Fs, mVUinfo.writeQ);
|
||||||
|
@ -152,11 +152,11 @@ mVUop(mVU_RSQRT) {
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
#define EATANhelper(addr) { \
|
#define EATANhelper(addr) { \
|
||||||
SSE_MULSS_XMM_to_XMM (t2, Fs); \
|
SSE_MULSS(mVU, t2, Fs); \
|
||||||
SSE_MULSS_XMM_to_XMM (t2, Fs); \
|
SSE_MULSS(mVU, t2, Fs); \
|
||||||
SSE_MOVAPS_XMM_to_XMM(t1, t2); \
|
SSE_MOVAPS_XMM_to_XMM(t1, t2); \
|
||||||
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
|
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
|
||||||
SSE_ADDSS_XMM_to_XMM (PQ, t1); \
|
SSE_ADDSS(mVU, PQ, t1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
|
// ToDo: Can Be Optimized Further? (takes approximately (~115 cycles + mem access time) on a c2d)
|
||||||
|
@ -185,7 +185,7 @@ mVUop(mVU_EATAN) {
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE_SUBSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
SSE_SUBSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||||
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
|
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
|
||||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||||
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
|
@ -203,9 +203,9 @@ mVUop(mVU_EATANxy) {
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x01);
|
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x01);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE_SUBSS_XMM_to_XMM (Fs, t1); // y-x, not y-1? ><
|
SSE_SUBSS (mVU, Fs, t1); // y-x, not y-1? ><
|
||||||
SSE_ADDSS_XMM_to_XMM (t1, xmmPQ);
|
SSE_ADDSS (mVU, t1, xmmPQ);
|
||||||
SSE_DIVSS_XMM_to_XMM (Fs, t1);
|
SSE_DIVSS (mVU, Fs, t1);
|
||||||
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
|
@ -223,9 +223,9 @@ mVUop(mVU_EATANxz) {
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x02);
|
SSE2_PSHUFD_XMM_to_XMM(Fs, t1, 0x02);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE_SUBSS_XMM_to_XMM (Fs, t1);
|
SSE_SUBSS (mVU, Fs, t1);
|
||||||
SSE_ADDSS_XMM_to_XMM (t1, xmmPQ);
|
SSE_ADDSS (mVU, t1, xmmPQ);
|
||||||
SSE_DIVSS_XMM_to_XMM (Fs, t1);
|
SSE_DIVSS (mVU, Fs, t1);
|
||||||
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
mVU_EATAN_(mVU, xmmPQ, Fs, t1, t2);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
|
@ -235,10 +235,10 @@ mVUop(mVU_EATANxz) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#define eexpHelper(addr) { \
|
#define eexpHelper(addr) { \
|
||||||
SSE_MULSS_XMM_to_XMM (t2, Fs); \
|
SSE_MULSS(mVU, t2, Fs); \
|
||||||
SSE_MOVAPS_XMM_to_XMM(t1, t2); \
|
SSE_MOVAPS_XMM_to_XMM(t1, t2); \
|
||||||
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
|
SSE_MULSS_M32_to_XMM (t1, (uptr)addr); \
|
||||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t1); \
|
SSE_ADDSS(mVU, xmmPQ, t1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_EEXP) {
|
mVUop(mVU_EEXP) {
|
||||||
|
@ -252,20 +252,20 @@ mVUop(mVU_EEXP) {
|
||||||
SSE_MULSS_M32_to_XMM (xmmPQ, (uptr)mVU_E1);
|
SSE_MULSS_M32_to_XMM (xmmPQ, (uptr)mVU_E1);
|
||||||
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
|
SSE_ADDSS_M32_to_XMM (xmmPQ, (uptr)mVU_one);
|
||||||
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
||||||
SSE_MULSS_XMM_to_XMM (t1, Fs);
|
SSE_MULSS (mVU, t1, Fs);
|
||||||
SSE_MOVAPS_XMM_to_XMM (t2, t1);
|
SSE_MOVAPS_XMM_to_XMM (t2, t1);
|
||||||
SSE_MULSS_M32_to_XMM (t1, (uptr)mVU_E2);
|
SSE_MULSS_M32_to_XMM (t1, (uptr)mVU_E2);
|
||||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t1);
|
SSE_ADDSS (mVU, xmmPQ, t1);
|
||||||
eexpHelper(mVU_E3);
|
eexpHelper(mVU_E3);
|
||||||
eexpHelper(mVU_E4);
|
eexpHelper(mVU_E4);
|
||||||
eexpHelper(mVU_E5);
|
eexpHelper(mVU_E5);
|
||||||
SSE_MULSS_XMM_to_XMM (t2, Fs);
|
SSE_MULSS (mVU, t2, Fs);
|
||||||
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_E6);
|
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_E6);
|
||||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t2);
|
SSE_ADDSS (mVU, xmmPQ, t2);
|
||||||
SSE_MULSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
SSE_MULSS (mVU, xmmPQ, xmmPQ);
|
||||||
SSE_MULSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
SSE_MULSS (mVU, xmmPQ, xmmPQ);
|
||||||
SSE_MOVSS_M32_to_XMM (t2, (uptr)mVU_one);
|
SSE_MOVSS_M32_to_XMM (t2, (uptr)mVU_one);
|
||||||
SSE_DIVSS_XMM_to_XMM (t2, xmmPQ);
|
SSE_DIVSS (mVU, t2, xmmPQ);
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, t2);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, t2);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -276,18 +276,18 @@ mVUop(mVU_EEXP) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
||||||
microVUt(void) mVU_sumXYZ(int PQ, int Fs) {
|
microVUt(void) mVU_sumXYZ(mV, int PQ, int Fs) {
|
||||||
if( x86caps.hasStreamingSIMD4Extensions ) {
|
if( x86caps.hasStreamingSIMD4Extensions ) {
|
||||||
SSE4_DPPS_XMM_to_XMM(Fs, Fs, 0x71);
|
SSE4_DPPS_XMM_to_XMM(Fs, Fs, 0x71);
|
||||||
SSE_MOVSS_XMM_to_XMM(PQ, Fs);
|
SSE_MOVSS_XMM_to_XMM(PQ, Fs);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
SSE_MULPS_XMM_to_XMM (Fs, Fs); // wzyx ^ 2
|
SSE_MULPS (mVU, Fs, Fs); // wzyx ^ 2
|
||||||
SSE_MOVSS_XMM_to_XMM (PQ, Fs); // x ^ 2
|
SSE_MOVSS_XMM_to_XMM (PQ, Fs); // x ^ 2
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xe1); // wzyx -> wzxy
|
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xe1); // wzyx -> wzxy
|
||||||
SSE_ADDSS_XMM_to_XMM (PQ, Fs); // x ^ 2 + y ^ 2
|
SSE_ADDSS (mVU, PQ, Fs); // x ^ 2 + y ^ 2
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xD2); // wzxy -> wxyz
|
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xD2); // wzxy -> wxyz
|
||||||
SSE_ADDSS_XMM_to_XMM (PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
|
SSE_ADDSS (mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -296,7 +296,7 @@ mVUop(mVU_ELENG) {
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ (mVU, xmmPQ, Fs);
|
||||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -311,7 +311,7 @@ mVUop(mVU_ERCPR) {
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -324,10 +324,10 @@ mVUop(mVU_ERLENG) {
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ (mVU, xmmPQ, Fs);
|
||||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
SSE_SQRTSS_XMM_to_XMM (xmmPQ, xmmPQ);
|
||||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -340,9 +340,9 @@ mVUop(mVU_ERSADD) {
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ (mVU, xmmPQ, Fs);
|
||||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -358,7 +358,7 @@ mVUop(mVU_ERSQRT) {
|
||||||
SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip);
|
SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip);
|
||||||
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one);
|
||||||
SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ);
|
SSE_DIVSS (mVU, Fs, xmmPQ);
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -371,7 +371,7 @@ mVUop(mVU_ESADD) {
|
||||||
pass2 {
|
pass2 {
|
||||||
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
mVU_sumXYZ(xmmPQ, Fs);
|
mVU_sumXYZ(mVU, xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
}
|
}
|
||||||
|
@ -379,10 +379,10 @@ mVUop(mVU_ESADD) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#define esinHelper(addr) { \
|
#define esinHelper(addr) { \
|
||||||
SSE_MULSS_XMM_to_XMM (t2, t1); \
|
SSE_MULSS(mVU, t2, t1); \
|
||||||
SSE_MOVAPS_XMM_to_XMM(Fs, t2); \
|
SSE_MOVAPS_XMM_to_XMM(Fs, t2); \
|
||||||
SSE_MULSS_M32_to_XMM (Fs, (uptr)addr); \
|
SSE_MULSS_M32_to_XMM (Fs, (uptr)addr); \
|
||||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, Fs); \
|
SSE_ADDSS(mVU, xmmPQ, Fs); \
|
||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_ESIN) {
|
mVUop(mVU_ESIN) {
|
||||||
|
@ -394,17 +394,17 @@ mVUop(mVU_ESIN) {
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
||||||
SSE_MULSS_XMM_to_XMM (Fs, t1);
|
SSE_MULSS (mVU, Fs, t1);
|
||||||
SSE_MOVAPS_XMM_to_XMM (t2, Fs);
|
SSE_MOVAPS_XMM_to_XMM (t2, Fs);
|
||||||
SSE_MULSS_XMM_to_XMM (Fs, t1);
|
SSE_MULSS (mVU, Fs, t1);
|
||||||
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
SSE_MOVAPS_XMM_to_XMM (t1, Fs);
|
||||||
SSE_MULSS_M32_to_XMM (Fs, (uptr)mVU_S2);
|
SSE_MULSS_M32_to_XMM (Fs, (uptr)mVU_S2);
|
||||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_ADDSS (mVU, xmmPQ, Fs);
|
||||||
esinHelper(mVU_S3);
|
esinHelper(mVU_S3);
|
||||||
esinHelper(mVU_S4);
|
esinHelper(mVU_S4);
|
||||||
SSE_MULSS_XMM_to_XMM (t2, t1);
|
SSE_MULSS (mVU, t2, t1);
|
||||||
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_S5);
|
SSE_MULSS_M32_to_XMM (t2, (uptr)mVU_S5);
|
||||||
SSE_ADDSS_XMM_to_XMM (xmmPQ, t2);
|
SSE_ADDSS (mVU, xmmPQ, t2);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(t1);
|
mVU->regAlloc->clearNeeded(t1);
|
||||||
|
@ -433,9 +433,9 @@ mVUop(mVU_ESUM) {
|
||||||
int t1 = mVU->regAlloc->allocReg();
|
int t1 = mVU->regAlloc->allocReg();
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance
|
||||||
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x1b);
|
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x1b);
|
||||||
SSE_ADDPS_XMM_to_XMM (Fs, t1);
|
SSE_ADDPS (mVU, Fs, t1);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x01);
|
SSE2_PSHUFD_XMM_to_XMM(t1, Fs, 0x01);
|
||||||
SSE_ADDSS_XMM_to_XMM (Fs, t1);
|
SSE_ADDSS (mVU, Fs, t1);
|
||||||
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
SSE_MOVSS_XMM_to_XMM (xmmPQ, Fs);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
|
|
@ -220,6 +220,16 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
#define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
|
#define shuffleSS(x) ((x==1)?(0x27):((x==2)?(0xc6):((x==4)?(0xe1):(0xe4))))
|
||||||
#define _1mb (0x100000)
|
#define _1mb (0x100000)
|
||||||
|
|
||||||
|
//#undef CHECK_VU_OVERFLOW
|
||||||
|
//#undef CHECK_VU_EXTRA_OVERFLOW
|
||||||
|
//#undef CHECK_VU_SIGN_OVERFLOW
|
||||||
|
//#undef CHECK_VU_UNDERFLOW
|
||||||
|
//#define CHECK_VU_OVERFLOW 1
|
||||||
|
//#define CHECK_VU_EXTRA_OVERFLOW 1
|
||||||
|
//#define CHECK_VU_SIGN_OVERFLOW 1
|
||||||
|
//#define CHECK_VU_UNDERFLOW 1
|
||||||
|
#define clampE 0//CHECK_VU_EXTRA_OVERFLOW
|
||||||
|
|
||||||
// Flag Info
|
// Flag Info
|
||||||
#define __Status (mVUregs.needExactMatch & 1)
|
#define __Status (mVUregs.needExactMatch & 1)
|
||||||
#define __Mac (mVUregs.needExactMatch & 2)
|
#define __Mac (mVUregs.needExactMatch & 2)
|
||||||
|
@ -271,12 +281,12 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
|
|
||||||
|
|
||||||
// Cache Limit Check
|
// Cache Limit Check
|
||||||
#define mVUcacheCheck(ptr, start, limit) { \
|
#define mVUcacheCheck(ptr, start, limit) { \
|
||||||
uptr diff = ptr - start; \
|
uptr diff = ptr - start; \
|
||||||
if (diff >= limit) { \
|
if (diff >= limit) { \
|
||||||
Console::Status("microVU%d: Program cache limit reached. Size = 0x%x", mVU->index, diff); \
|
Console::Status("microVU%d: Program cache limit reached. Size = 0x%x", mVU->index, diff); \
|
||||||
mVUreset(mVU); \
|
mVUreset(mVU); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define mVUdebugNOW(isEndPC) { \
|
#define mVUdebugNOW(isEndPC) { \
|
||||||
|
|
|
@ -23,8 +23,8 @@
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
// Used for Result Clamping
|
// Used for Result Clamping
|
||||||
void mVUclamp1(int reg, int regT1, int xyzw) {
|
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||||
if (CHECK_VU_OVERFLOW) {
|
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
||||||
switch (xyzw) {
|
switch (xyzw) {
|
||||||
case 1: case 2: case 4: case 8:
|
case 1: case 2: case 4: case 8:
|
||||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
|
SSE_MINSS_M32_to_XMM(reg, (uptr)mVU_maxvals);
|
||||||
|
@ -39,8 +39,8 @@ void mVUclamp1(int reg, int regT1, int xyzw) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used for Operand Clamping
|
// Used for Operand Clamping
|
||||||
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw) {
|
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||||
if (CHECK_VU_SIGN_OVERFLOW) {
|
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE)) {
|
||||||
int regT1b = 0;
|
int regT1b = 0;
|
||||||
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; }
|
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; }
|
||||||
switch (xyzw) {
|
switch (xyzw) {
|
||||||
|
@ -61,7 +61,15 @@ void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||||
}
|
}
|
||||||
if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
||||||
}
|
}
|
||||||
else mVUclamp1(reg, regT1, xyzw);
|
else mVUclamp1(reg, regT1, xyzw, bClampE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||||
|
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void mVUclamp4(int reg, int regT1, int xyzw) {
|
||||||
|
if (clampE) mVUclamp1(reg, regT1, xyzw, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
@ -444,46 +452,60 @@ void ADD_SS(microVU* mVU, int to, int from, int t1, int t2) {
|
||||||
if (t2b) mVU->regAlloc->clearNeeded(t2);
|
if (t2b) mVU->regAlloc->clearNeeded(t2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SSE_MAXPS(mV, int to, int from, int t1, int t2) {
|
#define clampOp(opX) { \
|
||||||
|
/*mVUclamp3(mVU, to, t1, 0xf);*/ \
|
||||||
|
/*mVUclamp3(mVU, from, t1, 0xf);*/ \
|
||||||
|
opX(to, from); \
|
||||||
|
/*mVUclamp4(to, t1, 0xf);*/ \
|
||||||
|
}
|
||||||
|
|
||||||
|
void SSE_MAXPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
|
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
|
||||||
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
|
else { MIN_MAX_PS(mVU, to, from, t1, t2, 0); }
|
||||||
}
|
}
|
||||||
void SSE_MINPS(mV, int to, int from, int t1, int t2) {
|
void SSE_MINPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
|
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
|
||||||
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
|
else { MIN_MAX_PS(mVU, to, from, t1, t2, 1); }
|
||||||
}
|
}
|
||||||
void SSE_MAXSS(mV, int to, int from, int t1, int t2) {
|
void SSE_MAXSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
|
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
|
||||||
else { MIN_MAX_SS(mVU, to, from, t1, 0); }
|
else { MIN_MAX_SS(mVU, to, from, t1, 0); }
|
||||||
}
|
}
|
||||||
void SSE_MINSS(mV, int to, int from, int t1, int t2) {
|
void SSE_MINSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
|
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
|
||||||
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
|
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
|
||||||
}
|
}
|
||||||
void SSE_ADD2SS(mV, int to, int from, int t1, int t2) {
|
void SSE_ADD2SS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
if (!CHECK_VUADDSUBHACK) { SSE_ADDSS_XMM_to_XMM(to, from); }
|
if (!CHECK_VUADDSUBHACK) { clampOp(SSE_ADDSS_XMM_to_XMM); }
|
||||||
else { ADD_SS(mVU, to, from, t1, t2); }
|
else { ADD_SS(mVU, to, from, t1, t2); }
|
||||||
}
|
}
|
||||||
void SSE_ADD2PS(mV, int to, int from, int t1, int t2) {
|
|
||||||
SSE_ADDPS_XMM_to_XMM(to, from);
|
void SSE_ADD2PS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
|
clampOp(SSE_ADDPS_XMM_to_XMM);
|
||||||
}
|
}
|
||||||
void SSE_ADDPS(mV, int to, int from, int t1, int t2) {
|
void SSE_ADDPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
SSE_ADDPS_XMM_to_XMM(to, from);
|
clampOp(SSE_ADDPS_XMM_to_XMM);
|
||||||
}
|
}
|
||||||
void SSE_ADDSS(mV, int to, int from, int t1, int t2) {
|
void SSE_ADDSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
SSE_ADDSS_XMM_to_XMM(to, from);
|
clampOp(SSE_ADDSS_XMM_to_XMM);
|
||||||
}
|
}
|
||||||
void SSE_SUBPS(mV, int to, int from, int t1, int t2) {
|
void SSE_SUBPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
SSE_SUBPS_XMM_to_XMM(to, from);
|
clampOp(SSE_SUBPS_XMM_to_XMM);
|
||||||
}
|
}
|
||||||
void SSE_SUBSS(mV, int to, int from, int t1, int t2) {
|
void SSE_SUBSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
SSE_SUBSS_XMM_to_XMM(to, from);
|
clampOp(SSE_SUBSS_XMM_to_XMM);
|
||||||
}
|
}
|
||||||
void SSE_MULPS(mV, int to, int from, int t1, int t2) {
|
void SSE_MULPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
SSE_MULPS_XMM_to_XMM(to, from);
|
clampOp(SSE_MULPS_XMM_to_XMM);
|
||||||
}
|
}
|
||||||
void SSE_MULSS(mV, int to, int from, int t1, int t2) {
|
void SSE_MULSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
SSE_MULSS_XMM_to_XMM(to, from);
|
clampOp(SSE_MULSS_XMM_to_XMM);
|
||||||
|
}
|
||||||
|
void SSE_DIVPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
|
clampOp(SSE_DIVPS_XMM_to_XMM);
|
||||||
|
}
|
||||||
|
void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||||
|
clampOp(SSE_DIVSS_XMM_to_XMM);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
|
@ -132,8 +132,9 @@ bool doSafeSub(microVU* mVU, int opCase, int opType, bool isACC) {
|
||||||
// Sets Up Ft Reg for Normal, BC, I, and Q Cases
|
// Sets Up Ft Reg for Normal, BC, I, and Q Cases
|
||||||
void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
|
void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
|
||||||
opCase1 {
|
opCase1 {
|
||||||
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
|
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
|
||||||
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
|
else if (clampE) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, 0xf); tempFt = Ft; }
|
||||||
|
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
|
||||||
}
|
}
|
||||||
opCase2 {
|
opCase2 {
|
||||||
tempFt = mVU->regAlloc->allocReg(_Ft_);
|
tempFt = mVU->regAlloc->allocReg(_Ft_);
|
||||||
|
@ -144,7 +145,7 @@ void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
|
||||||
}
|
}
|
||||||
opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; }
|
opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; }
|
||||||
opCase4 {
|
opCase4 {
|
||||||
if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
|
if (!clampE && _XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
|
||||||
else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }
|
else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -301,7 +302,7 @@ mVUop(mVU_OPMULA) {
|
||||||
|
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
||||||
SSE_MULPS_XMM_to_XMM(Fs, Ft);
|
SSE_MULPS(mVU, Fs, Ft);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVUupdateFlags(mVU, Fs);
|
mVUupdateFlags(mVU, Fs);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
|
@ -320,8 +321,8 @@ mVUop(mVU_OPMSUB) {
|
||||||
|
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY
|
||||||
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ
|
||||||
SSE_MULPS_XMM_to_XMM(Fs, Ft);
|
SSE_MULPS(mVU, Fs, Ft);
|
||||||
SSE_SUBPS_XMM_to_XMM(ACC, Fs);
|
SSE_SUBPS(mVU, ACC, Fs);
|
||||||
mVU->regAlloc->clearNeeded(Fs);
|
mVU->regAlloc->clearNeeded(Fs);
|
||||||
mVU->regAlloc->clearNeeded(Ft);
|
mVU->regAlloc->clearNeeded(Ft);
|
||||||
mVUupdateFlags(mVU, ACC);
|
mVUupdateFlags(mVU, ACC);
|
||||||
|
|
Loading…
Reference in New Issue