microVU: New improved flag speedhack.

- Now does Mac Flags as well pretty safely
- Speedup is anywhere from nothing to ~30fps (latter is DQ8 + MTVU)
This commit is contained in:
refraction 2015-05-28 21:13:48 +01:00
parent 31ffdd8c53
commit d8343cea2a
4 changed files with 114 additions and 15 deletions

View File

@ -291,7 +291,7 @@ __ri void flagSet(mV, bool setMacFlag) {
for (int i = mVUcount, j = 0; i > 0; i--, j++) { for (int i = mVUcount, j = 0; i > 0; i--, j++) {
j += mVUstall; j += mVUstall;
incPC2(-2); incPC2(-2);
if (sFLAG.doFlag && (j >= 3)) { if (sFLAG.doFlag && (j >= 3) && sFLAG.skipWrite != 1) {
if (setMacFlag) { mFLAG.doFlag = 1; } if (setMacFlag) { mFLAG.doFlag = 1; }
else { sFLAG.doNonSticky = 1; } else { sFLAG.doNonSticky = 1; }
break; break;
@ -317,7 +317,7 @@ __ri void mVUanalyzeSflag(mV, int It) {
__ri void mVUanalyzeFSSET(mV) { __ri void mVUanalyzeFSSET(mV) {
mVUlow.isFSSET = 1; mVUlow.isFSSET = 1;
mVUlow.readFlags = 1; mVUlow.readFlags = 2;
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -345,7 +345,7 @@ __ri void mVUanalyzeMflag(mV, int Is, int It) {
__fi void mVUanalyzeCflag(mV, int It) { __fi void mVUanalyzeCflag(mV, int It) {
mVUinfo.swapOps = 1; mVUinfo.swapOps = 1;
mVUlow.readFlags = 1; mVUlow.readFlags = 2;
if (mVUcount < 4) { if (mVUcount < 4) {
if (!(mVUpBlock->pState.needExactMatch & 4)) // The only time this should happen is on the first program block if (!(mVUpBlock->pState.needExactMatch & 4)) // The only time this should happen is on the first program block
DevCon.WriteLn(Color_Green, "microVU%d: pState's cFlag Info was expected to be set [%04x]", getIndex, xPC); DevCon.WriteLn(Color_Green, "microVU%d: pState's cFlag Info was expected to be set [%04x]", getIndex, xPC);

View File

@ -31,7 +31,7 @@ __fi void mVUstatusFlagOp(mV) {
int i = mVUcount; int i = mVUcount;
bool runLoop = true; bool runLoop = true;
if (sFLAG.doFlag) { if (sFLAG.doFlag && sFLAG.skipWrite != 1) {
sFLAG.doNonSticky = true; sFLAG.doNonSticky = true;
} }
else { else {
@ -41,7 +41,7 @@ __fi void mVUstatusFlagOp(mV) {
runLoop = false; runLoop = false;
break; break;
} }
else if (sFLAG.doFlag) { else if (sFLAG.doFlag && sFLAG.skipWrite != 1) {
sFLAG.doNonSticky = true; sFLAG.doNonSticky = true;
break; break;
} }
@ -92,30 +92,128 @@ void sortFullFlag(int* fFlag, int* bFlag) {
} }
} }
#define sFlagCond (sFLAG.doFlag || mVUlow.isFSSET || mVUinfo.doDivFlag) #define sFlagCond ((sFLAG.doFlag && sFLAG.skipWrite != 1) || mVUlow.isFSSET || mVUinfo.doDivFlag )
#define sHackCond (mVUsFlagHack && !sFLAG.doNonSticky) #define sHackCond (mVUsFlagHack && !sFLAG.doNonSticky)
// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! // Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch!
__fi void mVUsetFlags(mV, microFlagCycles& mFC) { __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
int endPC = iPC; int endPC = iPC;
u32 aCount = 1; // Amount of instructions needed to get valid mac flag instances for block linking u32 aCount = 0; // Amount of instructions needed to get valid mac flag instances for block linking
bool writeProtect = false;
// Ensure last ~4+ instructions update mac/status flags (if next block's first 4 instructions will read them) // Ensure last ~4+ instructions update mac/status flags (if next block's first 4 instructions will read them)
for(int i = mVUcount; i > 0; i--, aCount++) { for(int i = mVUcount; i > 0; i--, aCount++) {
if (sFLAG.doFlag) { if (sFLAG.doFlag) {
if (__Mac)
if (__Mac) {
mFLAG.doFlag = true; mFLAG.doFlag = true;
writeProtect = true;
}
if (__Status) if (__Status) {
sFLAG.doNonSticky = true; sFLAG.doNonSticky = true;
writeProtect = true;
if (aCount >= 4) }
if (writeProtect == true)
if (aCount >= 3){
break; break;
}
} }
incPC2(-2); incPC2(-2);
} }
if (mVUsFlagHack){
bool flagReadFound = false;
bool updateCalcFound = false;
int flagReadptr = 0;
incPC2((aCount * 2));
int instanceCycles = 0;
// Go through checking flags if they need to be written back or not, ignoring the last 4ish of course
for (int i = mVUcount; i > 0; i--) {
if (sFLAG.doFlag) {
if (writeProtect && (mVUcount - i) <= aCount) { //Don't actually clear any of the last ops in the block, just check for reads
//DevCon.Warning("aCnt %d, mVUcnt %d mVUcnt-i %d", aCount, mVUcount, (mVUcount - i));
sFLAG.skipWrite = 2;
}
if (flagReadFound) {
if (instanceCycles >= 4) {
if (mVUinfo.uOp.VF_write.reg != 0) { //Keep going because this is a mac/status calculation op only
if (updateCalcFound == false)
sFLAG.skipWrite = 2;
//DevCon.Warning("Flag read mac found at %d and %d cycles, rolling back to %d", i, instanceCycles, i + (flagReadptr - 1) * 2);
flagReadFound = false;
updateCalcFound = false;
instanceCycles = 0;
//Roll back in case there was other flag reads
incPC2((flagReadptr - 1) * 2);
i += flagReadptr - 1;
flagReadptr = 0;
}
else {
//DevCon.Warning("Calculation found!");
updateCalcFound = true;
sFLAG.skipWrite = 2;
}
}
//else DevCon.Warning("waiting On %d cycles, flagptr %d pos %d", instanceCycles, flagReadptr, i);
//Else do nothing, we will come back to this one
}
else {
if (sFLAG.skipWrite != 2) { //Kill only non-write protected flag updates
sFLAG.skipWrite = 1;
//DevCon.Warning("ignoring at %d", i);
}
//else DevCon.Warning("Write protected at %d", i);
}
}
if (flagReadFound) {
if (i == 1) {
//Nothing found by the end of the block
flagReadFound = false;
//Roll back in case there was other flag reads
if (flagReadptr > 0) {
incPC2((flagReadptr)* 2);
i += flagReadptr - 1;
flagReadptr = 0;
}
}
else { //Relevant flag update instructions not yet found, keep checking the stalls to get the right one
if (flagReadptr > 0){
//DevCon.Warning("adding cycles Swap ops %d", mVUinfo.swapOps);
instanceCycles += mVUstall;
if (mVUstall == 0)
instanceCycles += 1;
}
flagReadptr++;
}
}
if (mVUlow.readFlags == 1 && !flagReadFound) {
flagReadFound = true;
flagReadptr = 0;
instanceCycles = 0;
//DevCon.Warning("Found Flag on %d Stall = %d swapops = %d", i, mVUstall, mVUinfo.swapOps);
if (mVUinfo.swapOps)
instanceCycles += mVUstall;
instanceCycles += 1;
//DevCon.Warning("%d cycles on flag read", instanceCycles);
flagReadptr++;
}
incPC2(-2);
}
}
// Status/Mac Flags Setup Code // Status/Mac Flags Setup Code
int xS = 0, xM = 0, xC = 0; int xS = 0, xM = 0, xC = 0;
u32 ff0=0, ff1=0, ffOn=0, fInfo=0; u32 ff0=0, ff1=0, ffOn=0, fInfo=0;

View File

@ -133,7 +133,7 @@ struct microLowerOp {
bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR)
bool memReadIs; // Read Is (VI reg) from memory (used by branches) bool memReadIs; // Read Is (VI reg) from memory (used by branches)
bool memReadIt; // Read If (VI reg) from memory (used by branches) bool memReadIt; // Read If (VI reg) from memory (used by branches)
bool readFlags; // Current Instruction reads Status, Mac, or Clip flags u8 readFlags; // Current Instruction reads Status, Mac, or Clip flags
}; };
struct microFlagInst { struct microFlagInst {
@ -142,6 +142,7 @@ struct microFlagInst {
u8 write; // Points to the instance that should be written to (s-stage write) u8 write; // Points to the instance that should be written to (s-stage write)
u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag) u8 lastWrite; // Points to the instance that was last written to (most up-to-date flag)
u8 read; // Points to the instance that should be read by a lower instruction (t-stage read) u8 read; // Points to the instance that should be read by a lower instruction (t-stage read)
u8 skipWrite; // Makes sure the flags dont written (MAC Flag hack)
}; };
struct microFlagCycles { struct microFlagCycles {

View File

@ -31,7 +31,7 @@ static void mVUupdateFlags(mV, const xmm& reg, const xmm& regT1in = xEmptyReg, c
static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
//SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag);
if (!sFLAG.doFlag && !mFLAG.doFlag) { return; } if ((!sFLAG.doFlag && !mFLAG.doFlag) || sFLAG.skipWrite == 1) { return; }
const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in; const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in;
@ -125,7 +125,7 @@ static void setupPass1(microVU& mVU, int opCase, bool isACC, bool noFlagUpdate)
opCase3 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); } opCase3 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); }
opCase4 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); } opCase4 { mVUanalyzeFMAC1(mVU, ((isACC) ? 0 : _Fd_), _Fs_, 0); }
if (noFlagUpdate) { if (noFlagUpdate) { //Max/Min Ops
sFLAG.doFlag = false; sFLAG.doFlag = false;
} }
} }