mirror of https://github.com/PCSX2/pcsx2.git
microVU: some optimizations...
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1605 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
a41f80cfca
commit
3f2f8571df
|
@ -117,12 +117,6 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) {
|
|||
// I/P/Q Reg Allocators
|
||||
//------------------------------------------------------------------
|
||||
|
||||
microVUt(void) getIreg(mV, int reg, bool modXYZW) {
|
||||
SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL);
|
||||
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8);
|
||||
if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); }
|
||||
}
|
||||
|
||||
microVUt(void) getPreg(mV, int reg) {
|
||||
mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP));
|
||||
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/
|
||||
|
|
|
@ -75,15 +75,19 @@ microVUt(void) doSwapOp(mV) {
|
|||
else { mVUopL(mVU, 1); incPC(1); doUpperOp(); }
|
||||
}
|
||||
|
||||
microVUt(void) doIbit(mV) {
|
||||
microVUt(void) doIbit(microVU* mVU) {
|
||||
if (mVUup.iBit) {
|
||||
incPC(-1);
|
||||
u32 tempI;
|
||||
mVU->regAlloc->clearRegVF(33);
|
||||
|
||||
if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) {
|
||||
Console::Status("microVU%d: Clamping I Reg", params mVU->index);
|
||||
int tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg
|
||||
MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI);
|
||||
tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg
|
||||
}
|
||||
else MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI);
|
||||
else tempI = curI;
|
||||
|
||||
MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI);
|
||||
incPC(1);
|
||||
}
|
||||
}
|
||||
|
@ -174,7 +178,9 @@ microVUt(void) mVUoptimizePipeState(mV) {
|
|||
for (int i = 0; i < 16; i++) {
|
||||
optimizeReg(mVUregs.VI[i]);
|
||||
}
|
||||
mVUregs.r = 0;
|
||||
if (mVUregs.q) { optimizeReg(mVUregs.q); if (!mVUregs.q) { incQ(); } }
|
||||
if (mVUregs.p) { optimizeReg(mVUregs.p); if (!mVUregs.p) { incP(); } }
|
||||
mVUregs.r = 0; // There are no stalls on the R-reg, so its Safe to discard info
|
||||
}
|
||||
|
||||
// Recompiles Code for Proper Flags and Q/P regs on Block Linkings
|
||||
|
@ -204,12 +210,12 @@ microVUt(void) mVUincCycles(mV, int x) {
|
|||
}
|
||||
if (mVUregs.q) {
|
||||
if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo.doDivFlag = 1; } }
|
||||
else { calcCycles(mVUregs.q, x); }
|
||||
else { calcCycles(mVUregs.q, x); }
|
||||
if (!mVUregs.q) { incQ(); }
|
||||
}
|
||||
if (mVUregs.p) {
|
||||
calcCycles(mVUregs.p, x);
|
||||
if (!mVUregs.p || (mVUregs.p && mVUregsTemp.p)) { incP(); }
|
||||
if (!mVUregs.p || mVUregsTemp.p) { incP(); }
|
||||
}
|
||||
if (mVUregs.xgkick) {
|
||||
calcCycles(mVUregs.xgkick, x);
|
||||
|
@ -277,7 +283,8 @@ microVUt(void) mVUendProgram(mV, int isEbit, int* xStatus, int* xMac, int* xClip
|
|||
|
||||
if (isEbit) {
|
||||
mVUprint("mVUcompile ebit");
|
||||
memset(&mVUinfo, 0, sizeof(mVUinfo));
|
||||
memset(&mVUinfo, 0, sizeof(mVUinfo));
|
||||
memset(&mVUregsTemp, 0, sizeof(mVUregsTemp));
|
||||
mVUincCycles(mVU, 100); // Ensures Valid P/Q instances (And sets all cycle data to 0)
|
||||
mVUcycles -= 100;
|
||||
qInst = mVU->q;
|
||||
|
@ -430,13 +437,12 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
|||
// Sets Up Flag instances
|
||||
int xStatus[4], xMac[4], xClip[4];
|
||||
int xCycles = mVUsetFlags(mVU, xStatus, xMac, xClip);
|
||||
mVUtestCycles(mVU);
|
||||
|
||||
|
||||
// Fix up vi15 const info for propagation through blocks
|
||||
mVUregs.vi15 = (mVUconstReg[15].isValid && !CHECK_VU_CONSTHACK) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
|
||||
|
||||
// Optimize the End Pipeline State for nicer Block Linking
|
||||
mVUoptimizePipeState(mVU);
|
||||
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
|
||||
mVUtestCycles(mVU); // Update VU Cycles and Exit Early if Necessary
|
||||
|
||||
// Second Pass
|
||||
iPC = mVUstartPC;
|
||||
|
|
|
@ -152,6 +152,7 @@ microVUt(int) mVUsetFlags(mV, int* xStatus, int* xMac, int* xClip) {
|
|||
}
|
||||
|
||||
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
|
||||
iPC = endPC;
|
||||
return cycles;
|
||||
}
|
||||
|
||||
|
|
|
@ -163,9 +163,10 @@ struct microIR {
|
|||
void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW);
|
||||
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW);
|
||||
void mVUloadReg(int reg, uptr offset, int xyzw);
|
||||
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs);
|
||||
|
||||
struct microXMM {
|
||||
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC)
|
||||
int reg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
|
||||
int xyzw; // xyzw to write back (0 = Don't write back anything AND cached vfReg has all vectors valid)
|
||||
int count; // Count of when last used
|
||||
bool isNeeded; // Is needed for current instruction
|
||||
|
@ -221,10 +222,16 @@ public:
|
|||
xmmReg[reg].xyzw = 0;
|
||||
xmmReg[reg].isNeeded = 0;
|
||||
}
|
||||
void clearRegVF(int VFreg) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if (xmmReg[i].reg == VFreg) clearReg(i);
|
||||
}
|
||||
}
|
||||
void writeBackReg(int reg, bool invalidateRegs = 1) {
|
||||
if ((xmmReg[reg].reg > 0) && xmmReg[reg].xyzw) { // Reg was modified and not Temp or vf0
|
||||
if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
|
||||
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||
if (xmmReg[reg].reg == 33) SSE_MOVSS_XMM_to_M32((uptr)&vuRegs->VI[REG_I].UL, reg);
|
||||
else if (xmmReg[reg].reg == 32) mVUsaveReg(reg, (uptr)&vuRegs->ACC.UL[0], xmmReg[reg].xyzw, 1);
|
||||
else mVUsaveReg(reg, (uptr)&vuRegs->VF[xmmReg[reg].reg].UL[0], xmmReg[reg].xyzw, 1);
|
||||
if (invalidateRegs) {
|
||||
for (int i = 0; i < xmmTotal; i++) {
|
||||
if ((i == reg) || xmmReg[i].isNeeded) continue;
|
||||
|
@ -307,13 +314,15 @@ public:
|
|||
|
||||
if (vfWriteReg >= 0) { // Reg Will Be Modified (allow partial reg loading)
|
||||
if ((vfLoadReg == 0) && !(xyzw & 1)) { SSE2_PXOR_XMM_to_XMM(x, x); }
|
||||
else if (vfLoadReg == 32) mVUloadReg(x, (uptr)&vuRegs->ACC.UL[0], xyzw);
|
||||
else if (vfLoadReg >= 0) mVUloadReg(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
|
||||
else if (vfLoadReg == 33) mVUloadIreg(x, xyzw, vuRegs);
|
||||
else if (vfLoadReg == 32) mVUloadReg (x, (uptr)&vuRegs->ACC.UL[0], xyzw);
|
||||
else if (vfLoadReg >= 0) mVUloadReg (x, (uptr)&vuRegs->VF[vfLoadReg].UL[0], xyzw);
|
||||
xmmReg[x].reg = vfWriteReg;
|
||||
xmmReg[x].xyzw = xyzw;
|
||||
}
|
||||
else { // Reg Will Not Be Modified (always load full reg for caching)
|
||||
if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
|
||||
if (vfLoadReg == 33) mVUloadIreg(x, 0xf, vuRegs);
|
||||
else if (vfLoadReg == 32) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->ACC.UL[0]);
|
||||
else if (vfLoadReg >= 0) SSE_MOVAPS_M128_to_XMM(x, (uptr)&vuRegs->VF[vfLoadReg].UL[0]);
|
||||
xmmReg[x].reg = vfLoadReg;
|
||||
xmmReg[x].xyzw = 0;
|
||||
|
|
|
@ -84,6 +84,7 @@ declareAllVariables
|
|||
#define _XYZW_SS (_X+_Y+_Z+_W==1)
|
||||
#define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8))
|
||||
#define _XYZW_PS (_X_Y_Z_W == 0xf)
|
||||
#define _XYZWss(x) ((x==8) || (x==4) || (x==2) || (x==1))
|
||||
|
||||
#define _bc_ (mVU->code & 0x3)
|
||||
#define _bc_x ((mVU->code & 0x3) == 0)
|
||||
|
|
|
@ -94,6 +94,11 @@ void mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) {
|
|||
}
|
||||
}
|
||||
|
||||
void mVUloadIreg(int reg, int xyzw, VURegs* vuRegs) {
|
||||
SSE_MOVSS_M32_to_XMM(reg, (uptr)&vuRegs->VI[REG_I].UL);
|
||||
if (!_XYZWss(xyzw)) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0);
|
||||
}
|
||||
|
||||
// Modifies the Source Reg!
|
||||
void mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) {
|
||||
/*SSE_MOVAPS_M128_to_XMM(xmmT2, offset);
|
||||
|
|
|
@ -120,18 +120,18 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) {
|
|||
|
||||
// Sets Up Ft Reg for Normal, BC, I, and Q Cases
|
||||
void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
|
||||
opCase1 {
|
||||
opCase1 {
|
||||
if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; }
|
||||
else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; }
|
||||
}
|
||||
opCase2 {
|
||||
opCase2 {
|
||||
tempFt = mVU->regAlloc->allocReg(_Ft_);
|
||||
Ft = mVU->regAlloc->allocReg();
|
||||
Ft = mVU->regAlloc->allocReg();
|
||||
mVUunpack_xyzw(Ft, tempFt, _bc_);
|
||||
mVU->regAlloc->clearNeeded(tempFt);
|
||||
tempFt = Ft;
|
||||
}
|
||||
opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); }
|
||||
opCase3 { Ft = mVU->regAlloc->allocReg(33, 0, _X_Y_Z_W); tempFt = Ft; }
|
||||
opCase4 {
|
||||
if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
|
||||
else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }
|
||||
|
|
Loading…
Reference in New Issue