microVU: Optimizations, cleanup, and fixed a bug...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1589 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-07-31 02:30:32 +00:00
parent a4e0009a7e
commit dea6f1ced2
5 changed files with 76 additions and 86 deletions

View File

@ -26,17 +26,17 @@
// Flag Allocators
//------------------------------------------------------------------
#define getFlagReg(regX, fInst) { \
switch (fInst) { \
case 0: regX = gprF0; break; \
case 1: regX = gprF1; break; \
case 2: regX = gprF2; break; \
case 3: regX = gprF3; break; \
default: \
Console::Error("microVU: Flag Instance Error (fInst = %d)", params fInst); \
regX = gprF0; \
break; \
} \
#define getFlagReg(regX, fInst) { \
switch (fInst) { \
case 0: regX = gprF0; break; \
case 1: regX = gprF1; break; \
case 2: regX = gprF2; break; \
case 3: regX = gprF3; break; \
default: \
Console::Error("microVU Error: fInst = %d", params fInst); \
regX = gprF0; \
break; \
} \
}
#define setBitSFLAG(bitTest, bitSet) { \
@ -114,31 +114,33 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
}
//------------------------------------------------------------------
// I/Q/P Reg Allocators
// I/P/Q Reg Allocators
//------------------------------------------------------------------
#define getIreg(reg, modXYZW) { \
SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8); \
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \
microVUt(void) getIreg(mV, int reg, bool modXYZW) {
SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL);
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8);
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); }
}
#define getQreg(reg) { \
mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ); \
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 15);*/ \
microVUt(void) getPreg(mV, int reg) {
mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP));
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/
}
#define getPreg(reg) { \
mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); \
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \
microVUt(void) getQreg(mV, int reg) {
mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ);
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 15);*/
}
//------------------------------------------------------------------
// Lower Instruction Allocator Helpers
//------------------------------------------------------------------
// VF to GPR
#define getReg8(GPRreg, _reg_, _fxf_) { \
if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \
else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \
microVUt(void) writeQreg(mV, int reg, int qInstance) {
if (qInstance) {
if (!cpucaps.hasStreamingSIMD4Extensions) {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, reg);
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
}
else SSE4_INSERTPS_XMM_to_XMM(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0));
}
else SSE_MOVSS_XMM_to_XMM(xmmPQ, reg);
}

View File

@ -31,10 +31,12 @@
} \
}
#define startLoop() { \
mVUdebug1(); \
memset(&mVUinfo, 0, sizeof(mVUinfo)); \
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \
#define startLoop() { \
if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \
if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \
if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \
memset(&mVUinfo, 0, sizeof(mVUinfo)); \
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \
}
#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); }

View File

@ -51,8 +51,10 @@ mVUop(mVU_DIV) {
pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); }
pass2 {
u8 *ajmp, *bjmp, *cjmp, *djmp;
int Ft;
if (_Ftf_) Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
else Ft = mVU->regAlloc->allocReg(_Ft_);
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_)));
int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_)));
int t1 = mVU->regAlloc->allocReg();
testZero(Ft, t1, gprT1); // Test if Ft is zero
@ -77,9 +79,7 @@ mVUop(mVU_DIV) {
mVUclamp1(Fs, t1, 8);
x86SetJ8(djmp);
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, Fs);
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
writeQreg(mVU, Fs, mVUinfo.writeQ);
mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(Ft);
@ -99,9 +99,7 @@ mVUop(mVU_SQRT) {
if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(Ft, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
SSE_SQRTSS_XMM_to_XMM(Ft, Ft);
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, Ft);
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
writeQreg(mVU, Ft, mVUinfo.writeQ);
mVU->regAlloc->clearNeeded(Ft);
}
@ -140,9 +138,7 @@ mVUop(mVU_RSQRT) {
mVUclamp1(Fs, t1, 8);
x86SetJ8(djmp);
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
SSE_MOVSS_XMM_to_XMM(xmmPQ, Fs);
if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);
writeQreg(mVU, Fs, mVUinfo.writeQ);
mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(Ft);
@ -741,7 +737,7 @@ mVUop(mVU_MFP) {
pass1 { mVUanalyzeMFP(mVU, _Ft_); }
pass2 {
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
getPreg(Ft);
getPreg(mVU, Ft);
mVU->regAlloc->clearNeeded(Ft);
}
pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); }
@ -759,7 +755,7 @@ mVUop(mVU_MOVE) {
mVUop(mVU_MR32) {
pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); }
pass2 {
int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf);
int Fs = mVU->regAlloc->allocReg(_Fs_);
int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0))));
else SSE2_PSHUFD_XMM_to_XMM(Ft, Fs, 0x39);

View File

@ -242,20 +242,8 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
// Debug Stuff...
#ifdef mVUdebug
#define mVUprint Console::Status
#define mVUdebug1() { \
if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \
if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \
if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \
if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \
if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \
}
#else
#define mVUprint 0&&
#define mVUdebug1() { \
if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \
if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \
if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \
}
#endif
// Program Logging...

View File

@ -34,7 +34,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1 = -1, int regT2 = -1, bool
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
if (mVUsFlagHack) { sFLAG.doFlag = 0; }
if (!sFLAG.doFlag && !mFLAG.doFlag) { return; }
if (!(!mFLAG.doFlag || (_XYZW_SS && modXYZW))) {
if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) {
if (regT2 < 0) { regT2 = mVU->regAlloc->allocReg(); regT2b = 1; }
SSE2_PSHUFD_XMM_to_XMM(regT2, reg, 0x1B); // Flip wzyx to xyzw
}
@ -119,27 +119,28 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) {
}
// Sets Up Ft Reg for Normal, BC, I, and Q Cases
void setupFtReg(microVU* mVU, int& Ft, int opCase) {
void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
opCase1 {
if (_XYZW_SS2) Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W);
else Ft = mVU->regAlloc->allocReg(_Ft_);
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
}
opCase2 {
int tempFt = mVU->regAlloc->allocReg(_Ft_);
tempFt = mVU->regAlloc->allocReg(_Ft_);
Ft = mVU->regAlloc->allocReg();
mVUunpack_xyzw(Ft, tempFt, _bc_);
mVU->regAlloc->clearNeeded(tempFt);
tempFt = Ft;
}
opCase3 { Ft = mVU->regAlloc->allocReg(); getIreg(Ft, 1); }
opCase4 { Ft = mVU->regAlloc->allocReg(); getQreg(Ft); }
opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); }
opCase4 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(mVU, Ft); }
}
// Normal FMAC Opcodes
void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, const char* opName) {
pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); }
pass2 {
int Fs, Ft, ACC;
setupFtReg(mVU, Ft, opCase);
int Fs, Ft, ACC, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase);
if (isACC) {
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
@ -156,11 +157,11 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co
if (isACC) {
if (_XYZW_SS) SSE_MOVSS_XMM_to_XMM(ACC, Fs);
else mVUmergeRegs(ACC, Fs, _X_Y_Z_W);
mVUupdateFlags(mVU, ACC, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1)));
mVUupdateFlags(mVU, ACC, Fs, tempFt);
if (_XYZW_SS2) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W));
mVU->regAlloc->clearNeeded(ACC);
}
else mVUupdateFlags(mVU, Fs, ((opCase==2) ? Ft : -1), (((opCase==1) && _XYZW_SS2) ? Ft : -1));
else mVUupdateFlags(mVU, Fs, tempFt);
mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First
mVU->regAlloc->clearNeeded(Ft);
@ -172,8 +173,8 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co
void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* opName) {
pass1 { setupPass1(mVU, opCase, 1, 0); }
pass2 {
int Fs, Ft, ACC;
setupFtReg(mVU, Ft, opCase);
int Fs, Ft, ACC, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase);
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
ACC = mVU->regAlloc->allocReg(32, 32, 0xf, 0);
@ -185,16 +186,17 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op
else SSE_PS[2](mVU, Fs, Ft, -1, -1);
if (_XYZW_SS || _X_Y_Z_W == 0xf) {
if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, -1, -1);
else SSE_PS[opType](mVU, ACC, Fs, -1, -1);
mVUupdateFlags(mVU, ACC, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1)));
if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, tempFt, -1);
else SSE_PS[opType](mVU, ACC, Fs, tempFt, -1);
mVUupdateFlags(mVU, ACC, Fs, tempFt);
if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W));
}
else {
int tempACC = mVU->regAlloc->allocReg();
SSE_MOVAPS_XMM_to_XMM(tempACC, ACC);
SSE_PS[opType](mVU, tempACC, Fs, -1, -1);
SSE_PS[opType](mVU, tempACC, Fs, tempFt, -1);
mVUmergeRegs(ACC, tempACC, _X_Y_Z_W);
mVUupdateFlags(mVU, ACC, Fs, tempFt);
mVU->regAlloc->clearNeeded(tempACC);
}
@ -209,8 +211,8 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op
void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) {
pass1 { setupPass1(mVU, opCase, 0, 0); }
pass2 {
int Fs, Ft, ACC;
setupFtReg(mVU, Ft, opCase);
int Fs, Ft, ACC, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase);
ACC = mVU->regAlloc->allocReg(32);
Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W);
@ -218,12 +220,12 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) {
if (_XYZW_SS2) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); }
opCase2 { mVUclamp1(Fs, -1, _X_Y_Z_W); } // Clamp Needed for alot of games (TOTA, DoM, etc...)
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, -1, -1); }
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, -1, -1); }
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, tempFt, -1); }
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, tempFt, -1); }
if (_XYZW_SS2) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); }
mVUupdateFlags(mVU, Fs, ((opCase==2) ? Ft : -1), (((opCase==1) && _XYZW_SS2) ? Ft : -1));
mVUupdateFlags(mVU, Fs, tempFt);
mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First
mVU->regAlloc->clearNeeded(Ft);
@ -236,16 +238,16 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) {
void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) {
pass1 { setupPass1(mVU, opCase, 0, 0); }
pass2 {
int Fs, Ft, Fd;
setupFtReg(mVU, Ft, opCase);
int Fs, Ft, Fd, tempFt;
setupFtReg(mVU, Ft, tempFt, opCase);
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W);
Fd = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W);
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, -1, -1); }
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, -1, -1); }
if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, tempFt, -1); }
else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, tempFt, -1); }
mVUupdateFlags(mVU, Fd, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1)));
mVUupdateFlags(mVU, Fd, Fs, tempFt);
mVU->regAlloc->clearNeeded(Fd); // Always Clear Written Reg First
mVU->regAlloc->clearNeeded(Ft);