microVU: some optimizations...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1605 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-08-05 04:03:24 +00:00
parent a41f80cfca
commit 3f2f8571df
7 changed files with 44 additions and 28 deletions

View File

@ -117,12 +117,6 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
// I/P/Q Reg Allocators
//------------------------------------------------------------------
microVUt(void) getIreg(mV, int reg, bool modXYZW) {
SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL);
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8);
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); }
}
microVUt(void) getPreg(mV, int reg) {
mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP));
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/

View File

@ -75,15 +75,19 @@ microVUt(void) doSwapOp(mV) {
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
}
microVUt(void) doIbit(mV) {
microVUt(void) doIbit(microVU* mVU) {
if (mVUup.iBit) {
incPC(-1);
u32 tempI;
mVU->regAlloc->clearRegVF(33);
if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) {
Console::Status("microVU%d: Clamping I Reg", params mVU->index);
int tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg
MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI);
tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg
}
else MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI);
else tempI = curI;
MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI);
incPC(1);
}
}
@ -174,7 +178,9 @@ microVUt(void) mVUoptimizePipeState(mV) {
for (int i = 0; i < 16; i++) {
optimizeReg(mVUregs.VI[i]);
}
mVUregs.r = 0;
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(); } }
if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(); } }
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
}
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
@ -204,12 +210,12 @@ microVUt(void) mVUincCycles(mV, int x) {
}
if (mVUregs.q) {
if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } }
else { calcCycles(mVUregs.q, x); }
else { calcCycles(mVUregs.q, x); }
if (!mVUregs.q) { incQ(); }
}
if (mVUregs.p) {
calcCycles(mVUregs.p, x);
if (!mVUregs.p || (mVUregs.p && mVUregsTemp.p)) { incP(); }
if (!mVUregs.p || mVUregsTemp.p) { incP(); }
}
if (mVUregs.xgkick) {
calcCycles(mVUregs.xgkick, x);
@ -277,7 +283,8 @@ microVUt(void) mVUendProgram(mV, int isEbit, int* xStatus, int* xMac, int* xClip
if (isEbit) {
mVUprint("mVUcompile ebit");
memset(&mVUinfo, 0, sizeof(mVUinfo));
memset(&mVUinfo, 0, sizeof(mVUinfo));
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp));
mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
mVUcycles -= 100;
qInst = mVU->q;
@ -430,13 +437,12 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
// Sets Up Flag instances
int xStatus[4], xMac[4], xClip[4];
int xCycles = mVUsetFlags(mVU, xStatus, xMac, xClip);
mVUtestCycles(mVU);
// Fix up vi15 const info for propagation through blocks
mVUregs.vi15 = (mVUconstReg[15].isValid && !CHECK_VU_CONSTHACK) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
// Optimize the End Pipeline State for nicer Block Linking
mVUoptimizePipeState(mVU);
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary
// Second Pass
iPC = mVUstartPC;

View File

@ -152,6 +152,7 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
}
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
iPC = endPC;
return cycles;
}

View File

@ -163,9 +163,10 @@ struct microIR {
void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW);
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
void mVUloadReg(int reg, uptr offset, int xyzw);
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs);
struct microXMM {
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC)
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
int count; // Count of when last used
bool isNeeded; // Is needed for current instruction
@ -221,10 +222,16 @@ public:
xmmReg[reg].xyzw = 0;
xmmReg[reg].isNeeded = 0;
}
void clearRegVF(int VFreg) {
for (int i = 0; i < xmmTotal; i++) {
if (xmmReg[i].reg == VFreg) clearReg(i);
}
}
void writeBackReg(int reg, bool invalidateRegs = 1) {
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
if (xmmReg[reg].reg == 33) SSE_MOVSS_XMM_to_M32((uptr)&vuRegs->VI[REG_I].UL, reg);
else if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
if (invalidateRegs) {
for (int i = 0; i < xmmTotal; i++) {
if ((i == reg) || xmmReg[i].isNeeded) continue;
@ -307,13 +314,15 @@ public:
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
if ((vfLoadReg == 0) && !(xyzw & 1)) { SSE2_PXOR_XMM_to_XMM(x, x); }
else if (vfLoadReg == 32) mVUloadReg(x, (uptr)&vuRegs->ACC.UL[0], xyzw);
else if (vfLoadReg >= 0) mVUloadReg(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
else if (vfLoadReg == 33) mVUloadIreg(x, xyzw, vuRegs);
else if (vfLoadReg == 32) mVUloadReg (x, (uptr)&vuRegs->ACC.UL[0], xyzw);
else if (vfLoadReg >= 0) mVUloadReg (x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
xmmReg[x].reg = vfWriteReg;
xmmReg[x].xyzw = xyzw;
}
else { // Reg Will Not Be Modified (always load full reg for caching)
if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
if (vfLoadReg == 33) mVUloadIreg(x, 0xf, vuRegs);
else if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
else if (vfLoadReg >= 0) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0]);
xmmReg[x].reg = vfLoadReg;
xmmReg[x].xyzw = 0;

View File

@ -84,6 +84,7 @@ declareAllVariables
#define _XYZW_SS (_X+_Y+_Z+_W==1)
#define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8))
#define _XYZW_PS (_X_Y_Z_W == 0xf)
#define _XYZWss(x) ((x==8) || (x==4) || (x==2) || (x==1))
#define _bc_ (mVU->code & 0x3)
#define _bc_x ((mVU->code & 0x3) == 0)

View File

@ -94,6 +94,11 @@ void mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
}
}
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs) {
SSE_MOVSS_M32_to_XMM(reg, (uptr)&vuRegs->VI[REG_I].UL);
if (!_XYZWss(xyzw)) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0);
}
// Modifies the Source Reg!
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) {
/*SSE_MOVAPS_M128_to_XMM(xmmT2, offset);

View File

@ -120,18 +120,18 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) {
// Sets Up Ft Reg for Normal, BC, I, and Q Cases
void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
opCase1 {
opCase1 {
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
}
opCase2 {
opCase2 {
tempFt = mVU->regAlloc->allocReg(_Ft_);
Ft = mVU->regAlloc->allocReg();
Ft = mVU->regAlloc->allocReg();
mVUunpack_xyzw(Ft, tempFt, _bc_);
mVU->regAlloc->clearNeeded(tempFt);
tempFt = Ft;
}
opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); }
opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; }
opCase4 {
if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }