microVU: more optimizations

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1590 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-07-31 07:34:03 +00:00
parent dea6f1ced2
commit 8b288e8917
6 changed files with 31 additions and 27 deletions

View File

@ -128,12 +128,12 @@ microVUt(void) getPreg(mV, int reg) {
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/
}
microVUt(void) getQreg(mV, int reg) {
mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ);
microVUt(void) getQreg(int reg, int qInstance) {
mVUunpack_xyzw(reg, xmmPQ, qInstance);
/*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, 15);*/
}
microVUt(void) writeQreg(mV, int reg, int qInstance) {
microVUt(void) writeQreg(int reg, int qInstance) {
if (qInstance) {
if (!cpucaps.hasStreamingSIMD4Extensions) {
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1);

View File

@ -275,13 +275,18 @@ microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) {
//------------------------------------------------------------------
// Sflag - Status Flag Opcodes
//------------------------------------------------------------------
#define flagSet(xFLAG) { \
int curPC = iPC; \
for (int i = mVUcount, j = 0; i > 0; i--, j++) { \
incPC2(-2); \
if (sFLAG.doFlag) { xFLAG = 1; if (j >= 3) { break; } } \
} \
iPC = curPC; \
microVUt(void) flagSet(mV, bool setMacFlag) {
int curPC = iPC;
for (int i = mVUcount, j = 0; i > 0; i--, j++) {
j += mVUstall;
incPC2(-2);
if (sFLAG.doFlag && (j >= 3)) {
if (setMacFlag) { mFLAG.doFlag = 1; }
else { sFLAG.doNonSticky = 1; }
break;
}
}
iPC = curPC;
}
microVUt(void) mVUanalyzeSflag(mV, int It) {
@ -289,14 +294,10 @@ microVUt(void) mVUanalyzeSflag(mV, int It) {
analyzeVIreg2(It, mVUlow.VI_write, 1);
if (!It) { mVUlow.isNOP = 1; }
else {
mVUinfo.swapOps = 1;
mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block
flagSet(sFLAG.doNonSticky);
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf; }
if (mVUcount >= 1) { incPC2(-2); mVUlow.useSflag = 1; incPC2(2); }
// Note: useSflag is used for status flag optimizations when a FSSET instruction is called.
// Do to stalls, it can only be set one instruction prior to the status flag read instruction
// if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior.
mVUinfo.swapOps = 1;
flagSet(mVU, 0);
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf; }
}
}
@ -316,8 +317,8 @@ microVUt(void) mVUanalyzeMflag(mV, int Is, int It) {
if (!It) { mVUlow.isNOP = 1; }
else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed)
mVUinfo.swapOps = 1;
flagSet(mVU, 1);
if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << 4; }
flagSet(mFLAG.doFlag);
}
}

View File

@ -34,18 +34,18 @@ microVUt(void) mVUstatusFlagOp(mV) {
int curPC = iPC;
int i = mVUcount;
bool runLoop = 1;
if (sFLAG.doFlag) { mVUlow.useSflag = 1; }
if (sFLAG.doFlag) { sFLAG.doNonSticky = 1; }
else {
for (; i > 0; i--) {
incPC2(-2);
if (mVUlow.useSflag) { runLoop = 0; break; }
if (sFLAG.doFlag) { mVUlow.useSflag = 1; break; }
if (sFLAG.doNonSticky) { runLoop = 0; break; }
else if (sFLAG.doFlag) { sFLAG.doNonSticky = 1; break; }
}
}
if (runLoop) {
for (; i > 0; i--) {
incPC2(-2);
if (mVUlow.useSflag) break;
if (sFLAG.doNonSticky) break;
sFLAG.doFlag = 0;
}
}

View File

@ -107,7 +107,6 @@ struct microLowerOp {
u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR)
bool isNOP; // This instruction is a NOP
bool isFSSET; // This instruction is a FSSET
bool useSflag; // This instruction uses/reads Sflag
bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0)
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
bool memReadIs; // Read Is (VI reg) from memory (used by branches)
@ -245,6 +244,7 @@ public:
clearReg(reg); // Clear Reg
}
void clearNeeded(int reg) {
if ((reg < 0) || (reg >= xmmTotal)) return;
xmmReg[reg].isNeeded = 0;
if (xmmReg[reg].xyzw) { // Reg was modified
if (xmmReg[reg].reg > 0) {

View File

@ -79,7 +79,7 @@ mVUop(mVU_DIV) {
mVUclamp1(Fs, t1, 8);
x86SetJ8(djmp);
writeQreg(mVU, Fs, mVUinfo.writeQ);
writeQreg(Fs, mVUinfo.writeQ);
mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(Ft);
@ -99,7 +99,7 @@ mVUop(mVU_SQRT) {
if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(Ft, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive)
SSE_SQRTSS_XMM_to_XMM(Ft, Ft);
writeQreg(mVU, Ft, mVUinfo.writeQ);
writeQreg(Ft, mVUinfo.writeQ);
mVU->regAlloc->clearNeeded(Ft);
}
@ -138,7 +138,7 @@ mVUop(mVU_RSQRT) {
mVUclamp1(Fs, t1, 8);
x86SetJ8(djmp);
writeQreg(mVU, Fs, mVUinfo.writeQ);
writeQreg(Fs, mVUinfo.writeQ);
mVU->regAlloc->clearNeeded(Fs);
mVU->regAlloc->clearNeeded(Ft);

View File

@ -132,7 +132,10 @@ void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) {
tempFt = Ft;
}
opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); }
opCase4 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(mVU, Ft); }
opCase4 {
if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; }
else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); }
}
}
// Normal FMAC Opcodes