mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Fix for falling through floor bug in MGS2 Demo Disc (not a proper fix but good enough for now) - Fixed a few random tiny bugs; might not effect anything - Coded some optimization that didn't workout well; its disabled by default, will probably remove the code later... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4640 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e821070676
commit
96b2d810c1
|
@ -45,6 +45,7 @@ private:
|
|||
int qListI, fListI;
|
||||
|
||||
public:
|
||||
inline int getFullListCount() const { return fListI; }
|
||||
microBlockManager() {
|
||||
qListI = fListI = 0;
|
||||
qBlockEnd = qBlockList = NULL;
|
||||
|
@ -71,13 +72,14 @@ public:
|
|||
microBlock* add(microBlock* pBlock) {
|
||||
microBlock* thisBlock = search(&pBlock->pState);
|
||||
if (!thisBlock) {
|
||||
u8 fullCmp = pBlock->pState.needExactMatch;
|
||||
u8 doFF = doFullFlagOpt && (pBlock->pState.flagInfo&1);
|
||||
u8 fullCmp = pBlock->pState.needExactMatch || doFF;
|
||||
if (fullCmp) fListI++; else qListI++;
|
||||
|
||||
microBlockLink*& blockList = fullCmp ? fBlockList : qBlockList;
|
||||
microBlockLink*& blockEnd = fullCmp ? fBlockEnd : qBlockEnd;
|
||||
microBlockLink* newBlock = (microBlockLink*)_aligned_malloc(sizeof(microBlockLink), 16);
|
||||
newBlock->block.jumpCache = NULL;
|
||||
newBlock->block.jumpCache = NULL;
|
||||
newBlock->next = NULL;
|
||||
|
||||
if (blockEnd) {
|
||||
|
@ -94,7 +96,8 @@ public:
|
|||
return thisBlock;
|
||||
}
|
||||
__ri microBlock* search(microRegInfo* pState) {
|
||||
if (pState->needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
u8 doFF = doFullFlagOpt && (pState->flagInfo&1);
|
||||
if (pState->needExactMatch || doFF) { // Needs Detailed Search (Exact Match of Pipeline State)
|
||||
for(microBlockLink* linkI = fBlockList; linkI != NULL; linkI = linkI->next) {
|
||||
if (mVUquickSearch((void*)pState, (void*)&linkI->block.pState, sizeof(microRegInfo)))
|
||||
return &linkI->block;
|
||||
|
@ -102,9 +105,10 @@ public:
|
|||
}
|
||||
else { // Can do Simple Search (Only Matches the Important Pipeline Stuff)
|
||||
for(microBlockLink* linkI = qBlockList; linkI != NULL; linkI = linkI->next) {
|
||||
if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue;
|
||||
if (linkI->block.pState.quick32[0] != pState->quick32[0]) continue;
|
||||
if (linkI->block.pState.quick32[1] != pState->quick32[1]) continue;
|
||||
if (doConstProp && (linkI->block.pState.vi15 != pState->vi15)) continue;
|
||||
if (doConstProp && (linkI->block.pState.vi15v != pState->vi15v)) continue;
|
||||
return &linkI->block;
|
||||
}
|
||||
}
|
||||
|
@ -119,10 +123,11 @@ public:
|
|||
for (u32 j = 0; j < 4; j++) viCRC -= ((u32*)linkI->block.pState.VI)[j];
|
||||
for (u32 j = 0; j < 32; j++) vfCRC -= linkI->block.pState.VF[j].reg;
|
||||
for (u32 j = 0; j < z; j++) crc -= ((u32*)&linkI->block.pState)[j];
|
||||
DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%08x][viBackup=%02d]"
|
||||
DevCon.WriteLn(Color_Green, "[%04x][Block #%d][crc=%08x][q=%02d][p=%02d][xgkick=%d][vi15=%04x][vi15v=%d][viBackup=%02d]"
|
||||
"[flags=%02x][exactMatch=%x][blockType=%d][viCRC=%08x][vfCRC=%08x]", pc, i, crc, linkI->block.pState.q,
|
||||
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.viBackUp,
|
||||
linkI->block.pState.flags, linkI->block.pState.needExactMatch, linkI->block.pState.blockType, viCRC, vfCRC);
|
||||
linkI->block.pState.p, linkI->block.pState.xgkick, linkI->block.pState.vi15, linkI->block.pState.vi15v,
|
||||
linkI->block.pState.viBackUp, linkI->block.pState.flagInfo, linkI->block.pState.needExactMatch,
|
||||
linkI->block.pState.blockType, viCRC, vfCRC);
|
||||
linkI = linkI->next;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -447,6 +447,9 @@ __ri int mVUbranchCheck(mV) {
|
|||
mVUlow.evilBranch = 1;
|
||||
mVUregs.blockType = 2;
|
||||
mVUregs.needExactMatch |= 7; // This might not be necessary, but w/e...
|
||||
mVUregs.flagInfo = 0;
|
||||
mVUregs.fullFlags0 = 0;
|
||||
mVUregs.fullFlags1 = 0;
|
||||
DevCon.Warning("microVU%d Warning: Branch in Branch delay slot! [%04x]", mVU.index, xPC);
|
||||
return 1;
|
||||
}
|
||||
|
@ -463,7 +466,7 @@ __ri int mVUbranchCheck(mV) {
|
|||
|
||||
__fi void mVUanalyzeCondBranch1(mV, int Is) {
|
||||
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
|
||||
if (!mVUstall && !mVUbranchCheck(mVU)) {
|
||||
if (!mVUbranchCheck(mVU) && !mVUstall) {
|
||||
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
|
||||
}
|
||||
}
|
||||
|
@ -471,7 +474,7 @@ __fi void mVUanalyzeCondBranch1(mV, int Is) {
|
|||
__fi void mVUanalyzeCondBranch2(mV, int Is, int It) {
|
||||
analyzeVIreg1(mVU, Is, mVUlow.VI_read[0]);
|
||||
analyzeVIreg1(mVU, It, mVUlow.VI_read[1]);
|
||||
if (!mVUstall && !mVUbranchCheck(mVU)) {
|
||||
if (!mVUbranchCheck(mVU) && !mVUstall) {
|
||||
analyzeBranchVI(mVU, Is, mVUlow.memReadIs);
|
||||
analyzeBranchVI(mVU, It, mVUlow.memReadIt);
|
||||
}
|
||||
|
|
|
@ -19,18 +19,20 @@ extern bool doEarlyExit (microVU& mVU);
|
|||
extern void mVUincCycles(microVU& mVU, int x);
|
||||
extern void* mVUcompile (microVU& mVU, u32 startPC, uptr pState);
|
||||
|
||||
#define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); }
|
||||
#define sI ((mVUpBlock->pState.needExactMatch & 1) ? 3 : ((mVUpBlock->pState.flags >> 0) & 3))
|
||||
#define cI ((mVUpBlock->pState.needExactMatch & 4) ? 3 : ((mVUpBlock->pState.flags >> 2) & 3))
|
||||
__fi int getLastFlagInst(microRegInfo& pState, int* xFlag, int flagType, int isEbit) {
|
||||
if (isEbit) return findFlagInst(xFlag, 0x7fffffff);
|
||||
if (pState.needExactMatch & (1<<flagType)) return 3;
|
||||
return (((pState.flagInfo >> (2*flagType+2)) & 3) - 1) & 3;
|
||||
}
|
||||
|
||||
void mVU0clearlpStateJIT() { if (!microVU0.prog.cleared) memzero(microVU0.prog.lpState); }
|
||||
void mVU1clearlpStateJIT() { if (!microVU1.prog.cleared) memzero(microVU1.prog.lpState); }
|
||||
|
||||
void mVUendProgram(mV, microFlagCycles* mFC, int isEbit) {
|
||||
|
||||
int fStatus = (isEbit) ? findFlagInst(mFC->xStatus, 0x7fffffff) : sI;
|
||||
int fMac = (isEbit) ? findFlagInst(mFC->xMac, 0x7fffffff) : 0;
|
||||
int fClip = (isEbit) ? findFlagInst(mFC->xClip, 0x7fffffff) : cI;
|
||||
int fStatus = getLastFlagInst(mVUpBlock->pState, mFC->xStatus, 0, isEbit);
|
||||
int fMac = getLastFlagInst(mVUpBlock->pState, mFC->xMac, 1, isEbit);
|
||||
int fClip = getLastFlagInst(mVUpBlock->pState, mFC->xClip, 2, isEbit);
|
||||
int qInst = 0;
|
||||
int pInst = 0;
|
||||
mVU.regAlloc->flushAll();
|
||||
|
|
|
@ -384,8 +384,8 @@ __fi void mVUinitConstValues(microVU& mVU) {
|
|||
mVUconstReg[i].isValid = 0;
|
||||
mVUconstReg[i].regValue = 0;
|
||||
}
|
||||
mVUconstReg[15].isValid = mVUregs.vi15 >> 31;
|
||||
mVUconstReg[15].regValue = mVUconstReg[15].isValid ? (mVUregs.vi15&0xffff) : 0;
|
||||
mVUconstReg[15].isValid = mVUregs.vi15v;
|
||||
mVUconstReg[15].regValue = mVUregs.vi15v ? mVUregs.vi15 : 0;
|
||||
}
|
||||
|
||||
// Initialize Variables
|
||||
|
@ -403,11 +403,13 @@ __fi void mVUinitFirstPass(microVU& mVU, uptr pState, u8* thisPtr) {
|
|||
memcpy_const((u8*)&mVU.prog.lpState, (u8*)pState, sizeof(microRegInfo));
|
||||
}
|
||||
mVUblock.x86ptrStart = thisPtr;
|
||||
mVUpBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager
|
||||
mVUregs.needExactMatch =(mVUregs.blockType || noFlagOpts) ? 7 : 0; // 1-Op blocks should just always set exactMatch (Sly Cooper)
|
||||
mVUpBlock = mVUblocks[mVUstartPC/2]->add(&mVUblock); // Add this block to block manager
|
||||
mVUregs.needExactMatch = /*(mVUregs.blockType||noFlagOpts)?7:*/0; // ToDo: Fix 1-Op block flag linking (MGS2:Demo/Sly Cooper)
|
||||
mVUregs.blockType = 0;
|
||||
mVUregs.viBackUp = 0;
|
||||
mVUregs.flags = 0;
|
||||
mVUregs.flagInfo = 0;
|
||||
mVUregs.fullFlags0 = 0;
|
||||
mVUregs.fullFlags1 = 0;
|
||||
mVUsFlagHack = CHECK_VU_FLAGHACK;
|
||||
mVUinitConstValues(mVU);
|
||||
}
|
||||
|
@ -425,7 +427,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
|
|||
// First Pass
|
||||
iPC = startPC / 4;
|
||||
mVUsetupRange(mVU, startPC, 1); // Setup Program Bounds/Range
|
||||
mVU.regAlloc->reset(); // Reset regAlloc
|
||||
mVU.regAlloc->reset(); // Reset regAlloc
|
||||
mVUinitFirstPass(mVU, pState, thisPtr);
|
||||
for(int branch = 0; mVUcount < endCount; mVUcount++) {
|
||||
incPC(1);
|
||||
|
@ -450,7 +452,8 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState) {
|
|||
}
|
||||
|
||||
// Fix up vi15 const info for propagation through blocks
|
||||
mVUregs.vi15 = (mVUconstReg[15].isValid && doConstProp) ? ((1<<31) | (mVUconstReg[15].regValue&0xffff)) : 0;
|
||||
mVUregs.vi15 = (doConstProp && mVUconstReg[15].isValid) ? (u16)mVUconstReg[15].regValue : 0;
|
||||
mVUregs.vi15v = (doConstProp && mVUconstReg[15].isValid) ? 1 : 0;
|
||||
|
||||
mVUsetFlags(mVU, mFC); // Sets Up Flag instances
|
||||
mVUoptimizePipeState(mVU); // Optimize the End Pipeline State for nicer Block Linking
|
||||
|
|
|
@ -73,6 +73,14 @@ int sortFlag(int* fFlag, int* bFlag, int cycles) {
|
|||
return x; // Returns the number of Valid Flag Instances
|
||||
}
|
||||
|
||||
void sortFullFlag(int* fFlag, int* bFlag) {
|
||||
int m = max(max(fFlag[0], fFlag[1]), max(fFlag[2], fFlag[3]));
|
||||
for(int i = 0; i < 4; i++) {
|
||||
int t = 3 - (m - fFlag[i]);
|
||||
bFlag[i] = (t < 0) ? 0 : t+1;
|
||||
}
|
||||
}
|
||||
|
||||
#define sFlagCond (sFLAG.doFlag || mVUlow.isFSSET || mVUinfo.doDivFlag)
|
||||
#define sHackCond (mVUsFlagHack && !sFLAG.doNonSticky)
|
||||
|
||||
|
@ -94,31 +102,59 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
|
|||
|
||||
// Status/Mac Flags Setup Code
|
||||
int xS = 0, xM = 0, xC = 0;
|
||||
u32 ff0=0, ff1=0, ffOn=0, fInfo=0;
|
||||
|
||||
if (doFullFlagOpt) {
|
||||
ff0 = mVUpBlock->pState.fullFlags0;
|
||||
ff1 = mVUpBlock->pState.fullFlags1;
|
||||
ffOn = mVUpBlock->pState.flagInfo&1;
|
||||
fInfo = mVUpBlock->pState.flagInfo;
|
||||
}
|
||||
|
||||
for(int i = 0; i < 4; i++) {
|
||||
mFC.xStatus[i] = i;
|
||||
mFC.xMac [i] = i;
|
||||
mFC.xClip [i] = i;
|
||||
}
|
||||
if (ffOn) { // Full Flags Enabled
|
||||
xS = (fInfo >> 2) & 3;
|
||||
xM = (fInfo >> 4) & 3;
|
||||
xC = (fInfo >> 6) & 3;
|
||||
mFC.xStatus[0] = ((ff0 >> (3*0+ 0)) & 7) - 1;
|
||||
mFC.xStatus[1] = ((ff0 >> (3*1+ 0)) & 7) - 1;
|
||||
mFC.xStatus[2] = ((ff0 >> (3*2+ 0)) & 7) - 1;
|
||||
mFC.xStatus[3] = ((ff0 >> (3*3+ 0)) & 7) - 1;
|
||||
mFC.xMac [0] = ((ff0 >> (3*0+12)) & 7) - 1;
|
||||
mFC.xMac [1] = ((ff0 >> (3*1+12)) & 7) - 1;
|
||||
mFC.xMac [2] = ((ff0 >> (3*2+12)) & 7) - 1;
|
||||
mFC.xMac [3] = ((ff0 >> (3*3+12)) & 7) - 1;
|
||||
mFC.xClip [0] = ((ff0 >> (3*0+24)) & 7) - 1;
|
||||
mFC.xClip [1] = ((ff0 >> (3*1+24)) & 7) - 1;
|
||||
mFC.xClip [2] = ((ff1 >> (3*0+ 0)) & 7) - 1;
|
||||
mFC.xClip [3] = ((ff1 >> (3*1+ 0)) & 7) - 1;
|
||||
}
|
||||
|
||||
if (!(mVUpBlock->pState.needExactMatch & 1)) {
|
||||
xS = (mVUpBlock->pState.flags >> 0) & 3;
|
||||
if(!ffOn && !(mVUpBlock->pState.needExactMatch & 1)) {
|
||||
xS = (mVUpBlock->pState.flagInfo >> 2) & 3;
|
||||
mFC.xStatus[0] = -1; mFC.xStatus[1] = -1;
|
||||
mFC.xStatus[2] = -1; mFC.xStatus[3] = -1;
|
||||
mFC.xStatus[(xS-1)&3] = 0;
|
||||
}
|
||||
|
||||
if (!(mVUpBlock->pState.needExactMatch & 4)) {
|
||||
xC = (mVUpBlock->pState.flags >> 2) & 3;
|
||||
if(!ffOn && !(mVUpBlock->pState.needExactMatch & 2)) {
|
||||
//xM = (mVUpBlock->pState.flagInfo >> 4) & 3;
|
||||
mFC.xMac[0] = -1; mFC.xMac[1] = -1;
|
||||
mFC.xMac[2] = -1; mFC.xMac[3] = -1;
|
||||
//mFC.xMac[(xM-1)&3] = 0;
|
||||
}
|
||||
|
||||
if(!ffOn && !(mVUpBlock->pState.needExactMatch & 4)) {
|
||||
xC = (mVUpBlock->pState.flagInfo >> 6) & 3;
|
||||
mFC.xClip[0] = -1; mFC.xClip[1] = -1;
|
||||
mFC.xClip[2] = -1; mFC.xClip[3] = -1;
|
||||
mFC.xClip[(xC-1)&3] = 0;
|
||||
}
|
||||
|
||||
if (!(mVUpBlock->pState.needExactMatch & 2)) {
|
||||
mFC.xMac[0] = -1; mFC.xMac[1] = -1;
|
||||
mFC.xMac[2] = -1; mFC.xMac[3] = -1;
|
||||
}
|
||||
|
||||
mFC.cycles = 0;
|
||||
u32 xCount = mVUcount; // Backup count
|
||||
iPC = mVUstartPC;
|
||||
|
@ -131,17 +167,17 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
|
|||
}
|
||||
mFC.cycles += mVUstall;
|
||||
|
||||
sFLAG.read = doFlagInsts ? findFlagInst(mFC.xStatus, mFC.cycles) : 0;
|
||||
mFLAG.read = doFlagInsts ? findFlagInst(mFC.xMac, mFC.cycles) : 0;
|
||||
cFLAG.read = doFlagInsts ? findFlagInst(mFC.xClip, mFC.cycles) : 0;
|
||||
sFLAG.read = doSFlagInsts ? findFlagInst(mFC.xStatus, mFC.cycles) : 0;
|
||||
mFLAG.read = doMFlagInsts ? findFlagInst(mFC.xMac, mFC.cycles) : 0;
|
||||
cFLAG.read = doCFlagInsts ? findFlagInst(mFC.xClip, mFC.cycles) : 0;
|
||||
|
||||
sFLAG.write = doFlagInsts ? xS : 0;
|
||||
mFLAG.write = doFlagInsts ? xM : 0;
|
||||
cFLAG.write = doFlagInsts ? xC : 0;
|
||||
sFLAG.write = doSFlagInsts ? xS : 0;
|
||||
mFLAG.write = doMFlagInsts ? xM : 0;
|
||||
cFLAG.write = doCFlagInsts ? xC : 0;
|
||||
|
||||
sFLAG.lastWrite = doFlagInsts ? (xS-1) & 3 : 0;
|
||||
mFLAG.lastWrite = doFlagInsts ? (xM-1) & 3 : 0;
|
||||
cFLAG.lastWrite = doFlagInsts ? (xC-1) & 3 : 0;
|
||||
sFLAG.lastWrite = doSFlagInsts ? (xS-1) & 3 : 0;
|
||||
mFLAG.lastWrite = doMFlagInsts ? (xM-1) & 3 : 0;
|
||||
cFLAG.lastWrite = doCFlagInsts ? (xC-1) & 3 : 0;
|
||||
|
||||
if (sHackCond) { sFLAG.doFlag = 0; }
|
||||
if (sFLAG.doFlag) { if(noFlagOpts){sFLAG.doNonSticky=1;mFLAG.doFlag=1;}}
|
||||
|
@ -153,8 +189,26 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
|
|||
incPC2(2);
|
||||
}
|
||||
|
||||
mVUregs.flags = ((__Clip) ? 0 : (xC << 2)) | ((__Status) ? 0 : xS);
|
||||
mVUregs.flagInfo |= ((__Status) ? 0 : (xS << 2));
|
||||
mVUregs.flagInfo |= ((__Mac||1) ? 0 : (xM << 4));
|
||||
mVUregs.flagInfo |= ((__Clip) ? 0 : (xC << 6));
|
||||
iPC = endPC;
|
||||
|
||||
if (doFullFlagOpt && (mVUregs.flagInfo & 1)) {
|
||||
//if (mVUregs.needExactMatch) DevCon.Error("mVU ERROR!!!");
|
||||
int bS[4], bM[4], bC[4];
|
||||
sortFullFlag(mFC.xStatus, bS);
|
||||
sortFullFlag(mFC.xMac, bM);
|
||||
sortFullFlag(mFC.xClip, bC);
|
||||
mVUregs.flagInfo = (xC<<6) | (xM<<4) | (xS<<2) | 1;
|
||||
mVUregs.fullFlags0 = ((bS[3]<<9)|(bS[2]<<6)|(bS[1]<<3)|(bS[0]<<0)) << (12*0);
|
||||
mVUregs.fullFlags0 |= ((bM[3]<<9)|(bM[2]<<6)|(bM[1]<<3)|(bM[0]<<0)) << (12*1);
|
||||
mVUregs.fullFlags0 |= ((bC[1]<<3)|(bC[0]<<0)) << (12*2);
|
||||
mVUregs.fullFlags1 = ((bC[3]<<3)|(bC[2]<<0)) << (12*0);
|
||||
mVUregs.needExactMatch = 0;
|
||||
DevCon.WriteLn("MVU FULL FLAG!!!!!!!! [0x%04x][0x%08x][0x%02x]",
|
||||
xPC, mVUregs.fullFlags0, (u32)mVUregs.fullFlags1);
|
||||
}
|
||||
}
|
||||
|
||||
#define getFlagReg2(x) ((bStatus[0] == x) ? getFlagReg(x) : gprT1)
|
||||
|
@ -166,11 +220,15 @@ __fi void mVUsetFlags(mV, microFlagCycles& mFC) {
|
|||
// Recompiles Code for Proper Flags on Block Linkings
|
||||
__fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
||||
|
||||
if (mVUregs.flagInfo & 1) {
|
||||
if (mVUregs.needExactMatch) DevCon.Error("mVU ERROR!!!");
|
||||
}
|
||||
|
||||
const bool pf = 0; // Print Flag Info
|
||||
if (pf) DevCon.WriteLn("mVU%d - [#%d][sPC=%04x][bPC=%04x][mVUBranch=%d][branch=%d]",
|
||||
mVU.index, mVU.prog.cur->idx, mVUstartPC/2*8, xPC, mVUbranch, mVUlow.branch);
|
||||
|
||||
if (__Status) {
|
||||
if (doSFlagInsts && __Status) {
|
||||
if (pf) DevCon.WriteLn("mVU%d - Status Flag", mVU.index);
|
||||
int bStatus[4];
|
||||
int sortRegs = sortFlag(mFC.xStatus, bStatus, mFC.cycles);
|
||||
|
@ -209,7 +267,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
|||
}
|
||||
}
|
||||
|
||||
if (__Mac) {
|
||||
if (doMFlagInsts && __Mac) {
|
||||
if (pf) DevCon.WriteLn("mVU%d - Mac Flag", mVU.index);
|
||||
int bMac[4];
|
||||
sortFlag(mFC.xMac, bMac, mFC.cycles);
|
||||
|
@ -218,7 +276,7 @@ __fi void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
|||
xMOVAPS(ptr128[mVU.macFlag], xmmT1);
|
||||
}
|
||||
|
||||
if (__Clip) {
|
||||
if (doCFlagInsts && __Clip) {
|
||||
if (pf) DevCon.WriteLn("mVU%d - Clip Flag", mVU.index);
|
||||
int bClip[4];
|
||||
sortFlag(mFC.xClip, bClip, mFC.cycles);
|
||||
|
@ -256,7 +314,7 @@ void _mVUflagPass(mV, u32 startPC, u32 sCount, u32 found, vector<u32>& v) {
|
|||
int aBranchAddr = 0;
|
||||
iPC = startPC / 4;
|
||||
mVUbranch = 0;
|
||||
for (int branch = 0; sCount < 4; sCount += found) {
|
||||
for(int branch = 0; sCount < 4; sCount += found) {
|
||||
mVUregs.needExactMatch &= 7;
|
||||
incPC(1);
|
||||
mVUopU(mVU, 3);
|
||||
|
@ -285,27 +343,55 @@ void mVUflagPass(mV, u32 startPC, u32 sCount = 0, u32 found = 0) {
|
|||
_mVUflagPass(mVU, startPC, sCount, found, v);
|
||||
}
|
||||
|
||||
#define branchType1 if (mVUbranch <= 2) // B/BAL
|
||||
#define branchType2 else if (mVUbranch >= 9) // JR/JALR
|
||||
#define branchType3 else // Conditional Branch
|
||||
__fi void checkFFblock(mV, u32 addr, int& ffOpt) {
|
||||
if (ffOpt && doFullFlagOpt) {
|
||||
blockCreate(addr/8);
|
||||
ffOpt = mVUblocks[addr/8]->getFullListCount() <= doFullFlagOpt;
|
||||
}
|
||||
}
|
||||
|
||||
// Checks if the first ~4 instructions of a block will read flags
|
||||
__fi void mVUsetFlagInfo(mV) {
|
||||
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr); incPC(1); }
|
||||
branchType2 { // This case can possibly be turned off via a hack for a small speedup...
|
||||
if (!mVUlow.constJump.isValid || !doConstProp) { mVUregs.needExactMatch |= 0x7; }
|
||||
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU.microMemSize-8)); }
|
||||
void mVUsetFlagInfo(mV) {
|
||||
if (noFlagOpts) {
|
||||
mVUregs.needExactMatch = 0x7;
|
||||
mVUregs.fullFlags0 = 0x0;
|
||||
mVUregs.fullFlags1 = 0x0;
|
||||
mVUregs.flagInfo = 0x0;
|
||||
return;
|
||||
}
|
||||
branchType3 {
|
||||
int ffOpt = doFullFlagOpt;
|
||||
if (mVUbranch <= 2) { // B/BAL
|
||||
incPC(-1);
|
||||
mVUflagPass(mVU, branchAddr);
|
||||
int backupFlagInfo = mVUregs.needExactMatch;
|
||||
mVUregs.needExactMatch = 0;
|
||||
incPC(4); // Branch Not Taken
|
||||
mVUflagPass(mVU, xPC);
|
||||
incPC(-3);
|
||||
mVUregs.needExactMatch |= backupFlagInfo;
|
||||
mVUflagPass (mVU, branchAddr);
|
||||
checkFFblock(mVU, branchAddr, ffOpt);
|
||||
incPC(1);
|
||||
|
||||
mVUregs.needExactMatch &= 0x7;
|
||||
if (mVUregs.needExactMatch && ffOpt) {
|
||||
mVUregs.flagInfo |= 1;
|
||||
}
|
||||
}
|
||||
elif(mVUbranch <= 8) { // Conditional Branch
|
||||
incPC(-1); // Branch Taken
|
||||
mVUflagPass (mVU, branchAddr);
|
||||
checkFFblock(mVU, branchAddr, ffOpt);
|
||||
int backupFlagInfo = mVUregs.needExactMatch;
|
||||
mVUregs.needExactMatch = 0;
|
||||
|
||||
incPC(4); // Branch Not Taken
|
||||
mVUflagPass (mVU, xPC);
|
||||
checkFFblock(mVU, xPC, ffOpt);
|
||||
incPC(-3);
|
||||
|
||||
mVUregs.needExactMatch |= backupFlagInfo;
|
||||
mVUregs.needExactMatch &= 0x7;
|
||||
if (mVUregs.needExactMatch && ffOpt) {
|
||||
mVUregs.flagInfo |= 1;
|
||||
}
|
||||
}
|
||||
else { // JR/JALR
|
||||
if (!doConstProp || !mVUlow.constJump.isValid) { mVUregs.needExactMatch |= 0x7; }
|
||||
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU.microMemSize-8)); }
|
||||
mVUregs.needExactMatch &= 0x7;
|
||||
}
|
||||
mVUregs.needExactMatch &= 0x7;
|
||||
if (noFlagOpts) mVUregs.needExactMatch |= 0x7;
|
||||
}
|
||||
|
|
|
@ -33,14 +33,12 @@ union regInfo {
|
|||
// needed by the xmm compare.
|
||||
union __aligned16 microRegInfo {
|
||||
struct {
|
||||
u32 vi15; // Constant Prop Info for vi15 (only valid if sign-bit set)
|
||||
|
||||
union {
|
||||
struct {
|
||||
u8 needExactMatch; // If set, block needs an exact match of pipeline state
|
||||
u8 flagInfo; // xC * 2 | xM * 2 | xS * 2 | 0 * 1 | fullFlag Valid * 1
|
||||
u8 q;
|
||||
u8 p;
|
||||
u8 flags; // clip x2 :: status x2
|
||||
u8 xgkick;
|
||||
u8 viBackUp; // VI reg number that was written to on branch-delay slot
|
||||
u8 blockType; // 0 = Normal; 1,2 = Compile one instruction (E-bit/Branch Ending)
|
||||
|
@ -49,6 +47,11 @@ union __aligned16 microRegInfo {
|
|||
u32 quick32[2];
|
||||
};
|
||||
|
||||
u32 fullFlags0; // clip * 6 | mac * 12 | status * 12
|
||||
u8 fullFlags1; // clip * 6
|
||||
u8 vi15v; // 'vi15' constant is valid
|
||||
u16 vi15; // Constant Prop Info for vi15
|
||||
|
||||
struct {
|
||||
u8 VI[16];
|
||||
regInfo VF[32];
|
||||
|
|
|
@ -229,6 +229,10 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
#define elif else if
|
||||
#define varPrint(x) DevCon.WriteLn(#x " = %d", (int)x)
|
||||
|
||||
#define blockCreate(addr) { \
|
||||
if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); \
|
||||
}
|
||||
|
||||
#define branchAddr ( \
|
||||
pxAssumeDev((iPC & 1) == 0, "microVU: Expected Lower Op for valid branch addr."), \
|
||||
((((iPC + 2) + (_Imm11_ * 2)) & mVU.progMemMask) * 4) \
|
||||
|
@ -292,12 +296,22 @@ static const bool noFlagOpts = 0; // Set to 1 to disable all flag setting optimi
|
|||
// flag instances between blocks...
|
||||
|
||||
// Multiple Flag Instances
|
||||
static const bool doFlagInsts = 1; // Set to 1 to enable multiple flag instances
|
||||
static const bool doSFlagInsts = 1; // Set to 1 to enable multiple status flag instances
|
||||
static const bool doMFlagInsts = 1; // Set to 1 to enable multiple mac flag instances
|
||||
static const bool doCFlagInsts = 1; // Set to 1 to enable multiple clip flag instances
|
||||
// This is the correct behavior of the VU's. Due to the pipeline of the VU's
|
||||
// there can be up to 4 different instances of values to keep track of
|
||||
// for the 3 different types of flags: Status, Mac, Clip flags.
|
||||
// Setting this to 0 acts as if there is only 1 instance of each flag,
|
||||
// which may be useful to check for potential flag pipeline bugs.
|
||||
// Setting one of these to 0 acts as if there is only 1 instance of the
|
||||
// corresponding flag, which may be useful when debugging flag pipeline bugs.
|
||||
|
||||
static const int doFullFlagOpt = 0; // Set above 0 to enable full flag optimization
|
||||
// This attempts to eliminate some flag shuffling at the end of blocks, but
|
||||
// can end up creating more recompiled code. The max amount of times this optimization
|
||||
// is performed per block can be set by changing the doFullFlagOpt value to be that limit.
|
||||
// i.e. setting doFullFlagOpt to 2 will recompile the current block at-most 2 times with
|
||||
// the full flag optimization.
|
||||
// Note: This optimization doesn't really seem to be benefitial and is buggy...
|
||||
|
||||
// Branch in Branch Delay Slots
|
||||
static const bool doBranchInDelaySlot = 1; // Set to 1 to enable evil-branches
|
||||
|
@ -334,7 +348,7 @@ static const bool doJumpAsSameProgram = 0; // Set to 1 to treat jumps as same pr
|
|||
//------------------------------------------------------------------
|
||||
|
||||
// Status Flag Speed Hack
|
||||
#define CHECK_VU_FLAGHACK (EmuConfig.Speedhacks.vuFlagHack)
|
||||
#define CHECK_VU_FLAGHACK (EmuConfig.Speedhacks.vuFlagHack)
|
||||
// This hack only updates the Status Flag on blocks that will read it.
|
||||
// Most blocks do not read status flags, so this is a big speedup.
|
||||
|
||||
|
|
Loading…
Reference in New Issue