- Implemented Nneeve's logical min/max algorithm. This should fix the problems with DaZ and mVU.
- Applied a patch by Gigaherz that more clearly distinguishes Immediate values in microProgram log files.
- Added a speedhack to disable the logical min/max code. (see below)

Note:
From my testing, using DaZ on mVU doesn't do much. However I have an AMD cpu, and they don't benefit as much as Intel C2D's from DaZ. So this could be effecting results.

The logical min/max code is SLOW, and the little-benefit I get with DaZ means I get better performance with DaZ off and the min/max speedhack (which disables the extra min/max code).

It would be nice is someone with an Intel C2D can compare the speed of:
-mVU normal without DaZ
-mVU normal with DaZ
-mVU min/max speedhack without DaZ
-mVU min/max speedhack with DaZ


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1177 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-05-14 07:36:10 +00:00
parent 3c715556e9
commit b3bdaab4f0
7 changed files with 180 additions and 117 deletions

View File

@ -166,7 +166,7 @@ microVUt(void) mVUallocFMAC4a(int& ACC, int& Fs, int& Ft) {
microVUt(void) mVUallocFMAC4b(int& ACC, int& Fs) {
microVU* mVU = mVUx;
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Fs, xmmT1, _xyzw_ACC);
mVUmergeRegs<vuIndex>(ACC, Fs, _X_Y_Z_W);
mVUmergeRegs(ACC, Fs, _X_Y_Z_W);
}
//------------------------------------------------------------------
@ -435,7 +435,7 @@ microVUt(void) mVUallocFMAC14a(int& ACCw, int& ACCr, int& Fs, int& Ft) {
microVUt(void) mVUallocFMAC14b(int& ACCw, int& ACCr) {
microVU* mVU = mVUx;
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(ACCr, xmmFt, _xyzw_ACC);
mVUmergeRegs<vuIndex>(ACCw, ACCr, _X_Y_Z_W);
mVUmergeRegs(ACCw, ACCr, _X_Y_Z_W);
}
//------------------------------------------------------------------

View File

@ -128,22 +128,19 @@ microVUt(int) mVUsetFlags(int* xStatus, int* xMac, int* xClip) {
mVUinfo |= findFlagInst(xMac, cycles) << 16; // _fvmInstance
mVUinfo |= findFlagInst(xClip, cycles) << 20; // _fvcInstance
mVUinfo |= (xS & 3) << 12; // _fsInstance
mVUinfo |= (xM & 3) << 10; // _fmInstance
mVUinfo |= (xC & 3) << 14; // _fcInstance
mVUinfo |= xS << 12; // _fsInstance
mVUinfo |= xM << 10; // _fmInstance
mVUinfo |= xC << 14; // _fcInstance
if (doStatus || isFSSET || doDivFlag)
xStatus[xS++ & 3] = cycles + 4;
if (doMac)
xMac[xM++ & 3] = cycles + 4;
if (doClip)
xClip[xC++ & 3] = cycles + 4;
if (doStatus || isFSSET || doDivFlag) { xStatus[xS] = cycles + 4; xS = (xS+1) & 3; }
if (doMac) { xMac [xM] = cycles + 4; xM = (xM+1) & 3; }
if (doClip) { xClip [xC] = cycles + 4; xC = (xC+1) & 3; }
cycles++;
incPC2(2);
}
mVUregs.flags = ((__Clip) ? 0 : ((xC & 3) << 2)) | ((__Status) ? 0 : (xS & 3));
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
return cycles;
}
@ -180,7 +177,6 @@ microVUt(void) mVUsetupFlags(int* xStatus, int* xMac, int* xClip, int cycles) {
if (__Clip) {
int bClip[4];
sortFlag(xClip, bClip, cycles);
//SysPrintf("__Clip\n");
SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)mVU->clipFlag);
SSE_SHUFPS_XMM_to_XMM (xmmT1, xmmT1, shuffleClip);
SSE_MOVAPS_XMM_to_M128((uptr)mVU->clipFlag, xmmT1);

View File

@ -41,7 +41,7 @@ microVUx(void) __mVULog(const char* fmt, ...) {
microVUt(void) __mVUdumpProgram(int progIndex) {
microVU* mVU = mVUx;
bool bitX[9];
bool bitX[7];
char str[30];
int delay = 0;
mVUbranch = 0;
@ -73,10 +73,8 @@ microVUt(void) __mVUdumpProgram(int progIndex) {
bitX[4] = 0;
bitX[5] = 0;
bitX[6] = 0;
bitX[7] = 0;
bitX[8] = 0;
if (mVU->code & _Ibit_) { bitX[0] = 1; bitX[5] = 1; bitX[7] = 1; }
if (mVU->code & _Ibit_) { bitX[0] = 1; bitX[5] = 1; }
if (mVU->code & _Ebit_) { bitX[1] = 1; bitX[5] = 1; delay = 2; }
if (mVU->code & _Mbit_) { bitX[2] = 1; bitX[5] = 1; }
if (mVU->code & _Dbit_) { bitX[3] = 1; bitX[5] = 1; }
@ -101,12 +99,18 @@ microVUt(void) __mVUdumpProgram(int progIndex) {
}
iPC = i;
if (bitX[7]) { mVUlog("<font color=\"#0070ff\">"); }
mVU->code = mVU->prog.prog[progIndex].data[i];
if(bitX[0]) {
mVUlog("<br>\n<font color=\"#FF7000\">");
mVUlog("[%04x] (%08x) %f", i*4, mVU->code, *(float*)&mVU->code);
mVUlog("</font>\n\n<br><br>");
}
else {
mVUlog("<br>\n[%04x] (%08x) ", i*4, mVU->code);
mVUopL<vuIndex, 2>();
mVUlog("\n\n<br><br>");
if (bitX[7]) { mVUlog("</font>"); }
}
}
mVUlog("</font>\n");
mVUlog("</body>\n");

View File

@ -74,7 +74,7 @@ microVUf(void) mVU_DIV() {
x86SetJ8(djmp);
mVUunpack_xyzw<vuIndex>(xmmFs, xmmFs, 0);
mVUmergeRegs<vuIndex>(xmmPQ, xmmFs, writeQ ? 4 : 8);
mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8);
}
pass3 { mVUlog("DIV Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
}
@ -92,7 +92,7 @@ microVUf(void) mVU_SQRT() {
if (CHECK_VU_OVERFLOW) SSE_MINSS_XMM_to_XMM(xmmFt, xmmMax); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
SSE_SQRTSS_XMM_to_XMM(xmmFt, xmmFt);
mVUunpack_xyzw<vuIndex>(xmmFt, xmmFt, 0);
mVUmergeRegs<vuIndex>(xmmPQ, xmmFt, writeQ ? 4 : 8);
mVUmergeRegs(xmmPQ, xmmFt, writeQ ? 4 : 8);
}
pass3 { mVUlog("SQRT Q, vf%02d%s", _Ft_, _Ftf_String); }
}
@ -130,7 +130,7 @@ microVUf(void) mVU_RSQRT() {
x86SetJ8(djmp);
mVUunpack_xyzw<vuIndex>(xmmFs, xmmFs, 0);
mVUmergeRegs<vuIndex>(xmmPQ, xmmFs, writeQ ? 4 : 8);
mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8);
}
pass3 { mVUlog("RSQRT Q, vf%02d%s, vf%02d%s", _Fs_, _Fsf_String, _Ft_, _Ftf_String); }
}

View File

@ -107,6 +107,7 @@ declareAllVariables
#define getVUmem(x) (((vuIndex == 1) ? (x & 0x3ff) : ((x >= 0x400) ? (x & 0x43f) : (x & 0xff))) * 16)
#define offsetSS ((_X) ? (0) : ((_Y) ? (4) : ((_Z) ? 8: 12)))
#define offsetReg ((_X) ? (0) : ((_Y) ? (1) : ((_Z) ? 2: 3)))
#define xmmT1 0 // Temp Reg
#define xmmFs 1 // Holds the Value of Fs (writes back result Fd)
@ -174,6 +175,7 @@ declareAllVariables
#define bSaveAddr (((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) / 8)
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8))
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
#define mVUflagHack (mVUcurProg.sFlagHack)
// Pass 1 uses these to set mVUinfo
#define _isNOP (1<<0) // Skip Lower Instruction
@ -287,9 +289,9 @@ declareAllVariables
#define mVUdumpProg 0&&
#endif
// Status Flag Speed Hack
#define CHECK_VU_FLAGHACK 0 // Set to 1 to turn hack on
#define mVUflagHack (mVUcurProg.sFlagHack)
// Speed Hacks (Set to 1 to turn On)
#define CHECK_VU_FLAGHACK 0 // Status Flag Speed Hack
#define CHECK_VU_MINMAXHACK 0 // Min/Max Speed Hack
// Cache Limit Check
#define mVUcacheCheck(ptr, start, limit) { \

View File

@ -205,7 +205,7 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) {
}
// Modifies the Source Reg!
microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) {
void mVUmergeRegs(int dest, int src, int xyzw) {
xyzw &= 0xf;
if ( (dest != src) && (xyzw != 0) ) {
if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) {
@ -316,4 +316,65 @@ microVUt(void) mVUcheckSflag(int progIndex) {
}
}
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK1[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000};
static const u32 PCSX2_ALIGNED16(MIN_MAX_MASK2[4]) = {0x00000000, 0x40000000, 0x00000000, 0x40000000};
// Warning: Modifies xmmT1 and xmmT2
void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
// XY
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0x50);
SSE2_PAND_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0x50);
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x88);
mVUmergeRegs(to, xmmT1, 0xc);
// ZW
SSE2_PSHUFD_XMM_to_XMM(xmmT1, to, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT1, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(xmmT2, from, 0xfa);
SSE2_PAND_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (xmmT2, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(xmmT1, xmmT2);
else SSE2_MAXPD_XMM_to_XMM(xmmT1, xmmT2);
SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x88);
mVUmergeRegs(to, xmmT1, 0x3);
}
// Warning: Modifies from and to's upper 3 vectors
void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) {
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(from, from, 0x50);
SSE2_PAND_M128_to_XMM (from, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (from, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(to, from);
else SSE2_MAXPD_XMM_to_XMM(to, from);
}
void SSE_MAX2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXPS_XMM_to_XMM(to, from); }
else { MIN_MAX_(to, from, 0); }
}
void SSE_MIN2PS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MINPS_XMM_to_XMM(to, from); }
else { MIN_MAX_(to, from, 1); }
}
void SSE_MAX2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MAXSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(to, from, 0); }
}
void SSE_MIN2SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
if (CHECK_VU_MINMAXHACK) { SSE_MINSS_XMM_to_XMM(to, from); }
else { MIN_MAX_SS(to, from, 1); }
}
#endif //PCSX2_MICROVU

View File

@ -25,7 +25,7 @@
#define AND_XYZW ((_XYZW_SS && modXYZW) ? (1) : (doMac ? (_X_Y_Z_W) : (flipMask[_X_Y_Z_W])))
#define ADD_XYZW ((_XYZW_SS && modXYZW) ? (_X ? 3 : (_Y ? 2 : (_Z ? 1 : 0))) : 0)
#define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { SHL16ItoR(gprReg, ADD_XYZW); } }
#define SHIFT_XYZW(gprReg) { if (_XYZW_SS && modXYZW && !_W) { SHL32ItoR(gprReg, ADD_XYZW); } }
// Note: If modXYZW is true, then it adjusts XYZW for Single Scalar operations
microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modXYZW) {
@ -57,8 +57,8 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
AND32ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
if (doMac) SHL16ItoR(mReg, 4 + ADD_XYZW);
if (doStatus) OR16ItoR(sReg, 0x82); // SS, S flags
if (doMac) SHL32ItoR(mReg, 4 + ADD_XYZW);
if (doStatus) OR32ItoR(sReg, 0x82); // SS, S flags
if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
x86SetJ8(pjmp);
@ -67,7 +67,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX
AND32ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
if (doMac) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); }
if (doStatus) { OR16ItoR(sReg, 0x41); } // ZS, Z flags
if (doStatus) { OR32ItoR(sReg, 0x41); } // ZS, Z flags
x86SetJ8(pjmp);
//-------------------------Write back flags------------------------------
@ -546,18 +546,18 @@ microVUf(void) mVU_MSUBAx() { mVU_FMAC15(SUB, "MSUBAx"); }
microVUf(void) mVU_MSUBAy() { mVU_FMAC15(SUB, "MSUBAy"); }
microVUf(void) mVU_MSUBAz() { mVU_FMAC15(SUB, "MSUBAz"); }
microVUf(void) mVU_MSUBAw() { mVU_FMAC15(SUB, "MSUBAw"); }
microVUf(void) mVU_MAX() { mVU_FMAC1 (MAX, "MAX"); }
microVUf(void) mVU_MAXi() { mVU_FMAC6 (MAX, "MAXi"); }
microVUf(void) mVU_MAXx() { mVU_FMAC3 (MAX, "MAXx"); }
microVUf(void) mVU_MAXy() { mVU_FMAC3 (MAX, "MAXy"); }
microVUf(void) mVU_MAXz() { mVU_FMAC3 (MAX, "MAXz"); }
microVUf(void) mVU_MAXw() { mVU_FMAC3 (MAX, "MAXw"); }
microVUf(void) mVU_MINI() { mVU_FMAC1 (MIN, "MINI"); }
microVUf(void) mVU_MINIi() { mVU_FMAC6 (MIN, "MINIi"); }
microVUf(void) mVU_MINIx() { mVU_FMAC3 (MIN, "MINIx"); }
microVUf(void) mVU_MINIy() { mVU_FMAC3 (MIN, "MINIy"); }
microVUf(void) mVU_MINIz() { mVU_FMAC3 (MIN, "MINIz"); }
microVUf(void) mVU_MINIw() { mVU_FMAC3 (MIN, "MINIw"); }
microVUf(void) mVU_MAX() { mVU_FMAC1 (MAX2, "MAX"); }
microVUf(void) mVU_MAXi() { mVU_FMAC6 (MAX2, "MAXi"); }
microVUf(void) mVU_MAXx() { mVU_FMAC3 (MAX2, "MAXx"); }
microVUf(void) mVU_MAXy() { mVU_FMAC3 (MAX2, "MAXy"); }
microVUf(void) mVU_MAXz() { mVU_FMAC3 (MAX2, "MAXz"); }
microVUf(void) mVU_MAXw() { mVU_FMAC3 (MAX2, "MAXw"); }
microVUf(void) mVU_MINI() { mVU_FMAC1 (MIN2, "MINI"); }
microVUf(void) mVU_MINIi() { mVU_FMAC6 (MIN2, "MINIi"); }
microVUf(void) mVU_MINIx() { mVU_FMAC3 (MIN2, "MINIx"); }
microVUf(void) mVU_MINIy() { mVU_FMAC3 (MIN2, "MINIy"); }
microVUf(void) mVU_MINIz() { mVU_FMAC3 (MIN2, "MINIz"); }
microVUf(void) mVU_MINIw() { mVU_FMAC3 (MIN2, "MINIw"); }
microVUf(void) mVU_OPMULA() { mVU_FMAC18(MUL, "OPMULA"); }
microVUf(void) mVU_OPMSUB() { mVU_FMAC19(SUB, "OPMSUB"); }
microVUf(void) mVU_NOP() { pass3 { mVUlog("NOP"); } }