microVU: fried my brain with some very-complex VU flag-handling logic/algorithms (hopefully they work as expected)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@959 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-04-12 08:29:53 +00:00
parent 97fac9e635
commit 5b0d9b6723
7 changed files with 173 additions and 29 deletions

View File

@ -359,7 +359,7 @@ emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset)
}
/* mov r32 to r32 */
/* mov r16 to r16 */
emitterT void MOV16RtoR( x86IntRegType to, x86IntRegType from )
{
if( to == from ) return;

View File

@ -53,10 +53,11 @@ struct microAllocInfo {
microRegInfo regs; // Pipeline info
microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle)
u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR
u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes)
//u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes)
u32 cycles; // Cycles for current block
u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block)
u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block
u32 info[pSize/8]; // Info for Instructions in current block
u8 stall[pSize/8]; // Info on how much each instruction stalled
};

View File

@ -682,8 +682,7 @@ microVUt(void) mVUallocSFLAGa(int reg, int fInstance) {
microVUt(void) mVUallocSFLAGb(int reg, int fInstance) {
getFlagReg(fInstance, fInstance);
AND32ItoR(fInstance, 0xffff0000);
OR16RtoR(fInstance, reg);
MOV16RtoR(fInstance, reg);
}
microVUt(void) mVUallocMFLAGa(int reg, int fInstance) {

View File

@ -245,19 +245,42 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) {
microVUt(void) mVUanalyzeSflag(int It) {
microVU* mVU = mVUx;
if (!It) { mVUinfo |= _isNOP; }
else { mVUinfo |= _isSflag | _swapOps; } // ToDo: set s flag at right time
else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from
mVUinfo |= _swapOps;
if (mVUcount >= 4) { incPC2(-8); mVUinfo |= _isSflag; incPC2(8); }
//else { incPC2((mVUcount*-2)); mVUinfo |= _isSflag; incPC2(mVUcount*-2); }
}
analyzeVIreg2(It, 1);
}
microVUt(void) mVUanalyzeFSSET() {
microVU* mVU = mVUx;
int i, curPC = iPC;
for (i = mVUcount; i > 0; i--) {
mVUinfo |= _isFSSSET;
}
//------------------------------------------------------------------
// Mflag - Mac Flag Opcodes
//------------------------------------------------------------------
microVUt(void) mVUanalyzeMflag(int Is, int It) {
microVU* mVU = mVUx;
if (!It) { mVUinfo |= _isNOP; }
else if (mVUcount >= 4) {
incPC2(-8);
if (doStatus) { mVUinfo |= _doMac; }
else {
int curPC = iPC;
int i = mVUcount;
for (; i > 0; i--) {
incPC2(-2);
if (isSflag) break;
mVUinfo &= ~_doStatus;
if (doStatus) { mVUinfo |= _doMac; break; }
}
iPC = curPC;
}
incPC2(8);
}
analyzeVIreg1(Is);
analyzeVIreg2(It, 1);
}
//------------------------------------------------------------------

View File

@ -19,6 +19,10 @@
#pragma once
#ifdef PCSX2_MICROVU
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define createBlock(blockEndPtr) { \
block.pipelineState = pipelineState; \
block.x86ptrStart = x86ptrStart; \
@ -46,6 +50,115 @@
#define incP() { mVU->p = (mVU->p+1) & 1; }
#define incQ() { mVU->q = (mVU->q+1) & 1; }
//------------------------------------------------------------------
// Helper Functions
//------------------------------------------------------------------
// Optimizes out unneeded status flag updates
microVUt(void) mVUstatusFlagOp() {
microVU* mVU = mVUx;
int curPC = iPC;
int i = mVUcount;
if (doStatus) { mVUinfo |= _isSflag; }
else {
for (; i > 0; i--) {
incPC2(-2);
if (doStatus) { mVUinfo |= _isSflag; break; }
}
}
for (; i > 0; i--) {
incPC2(-2);
if (isSflag) break;
mVUinfo &= ~_doStatus;
}
iPC = curPC;
}
// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch!
microVUt(void) mVUsetFlags(int* bStatus, int* bMac) {
microVU* mVU = mVUx;
// Ensure last ~4+ instructions update mac flags
int endPC = iPC;
int aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances
for (int i = mVUcount; i > 0; i++, aCount++;) {
if (doStatus) { mVUinfo |= _doMac; if (i >= 4) { break; } }
incPC2(-2);
}
// Status/Mac Flags Setup Code
int xStatus = 0; // Status Instance starts at #0 on every block
int xMac = 0; // Mac Instance starts at #0 on every block
int pStatus = 0;
int pMac = 0;
int xCount = mVUcount; // Backup count
mVUcount = 0;
iPC = mVUstartPC;
for (int i = 0; i < xCount; i++) {
if ((xCount - i) > aCount) mVUstatusFlagOp<vuIndex>(); // Don't Optimize out on the last ~4+ instructions
if (doStatus||isFSSET) { mVUinfo |= xStatus << 12; } // _fsInstance
if (doMac) { mVUinfo |= xMac << 10; } // _fmInstance
pStatus = (xStatus + ((mVUstall > 3) ? 3 : mVUstall)) & 3;
pMac = (xMac + ((mVUstall > 3) ? 3 : mVUstall)) & 3;
mVUinfo |= pStatus << 18; // _fvsInstance
mVUinfo |= pMac << 16; // _fvmInstance
if (doStatus||isFSSET) { xStatus = (xStatus+1) & 3; }
if (doMac) { xMac = (xMac+1) & 3; }
incPC2(2);
}
mVUcount = xCount; // Restore count
// Setup Last 4 instances of Status/Mac flags (needed for accurate block linking)
iPC = endPC;
for (int i = 3, int j = 3, int ii = 1, int jj = 3; aCount > 0; ii++, aCount--) {
if (doStatus && (i >= 0)) {
for (; (ii > 0 && i >= 0); ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; i--; }
}
if (doMac && (j >= 0)) {
for (; (jj > 0 && j >= 0); jj--) { xMac = (xMac-1) & 3; bMac[i] = xMac; j--; }
}
incPC2(-2);
}
}
#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0)))
#define getFlagReg2(x) ((x == bStatus[3]) ? gprESP : ((x == bStatus[2]) ? gprR : ((x == bStatus[1]) ? gprT2 : gprT1)))
// Recompiles Code for Proper Flags on Block Linkings
microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) {
PUSHR(gprR); // Backup gprR
PUSHR(gprESP); // Backup gprESP
MOV32RtoR(gprT1, getFlagReg1(bStatus[0]));
MOV32RtoR(gprT2, getFlagReg1(bStatus[1]));
MOV32RtoR(gprR, getFlagReg1(bStatus[2]));
MOV32RtoR(gprESP, getFlagReg1(bStatus[3]));
MOV32RtoR(gprF0, gprT1);
MOV32RtoR(gprF1, gprT2);
MOV32RtoR(gprF2, gprR);
MOV32RtoR(gprF3, gprESP);
AND32ItoR(gprT1, 0xffff0000);
AND32ItoR(gprT2, 0xffff0000);
AND32ItoR(gprR, 0xffff0000);
AND32ItoR(gprESP, 0xffff0000);
AND32ItoR(gprF0, 0x0000ffff);
AND32ItoR(gprF1, 0x0000ffff);
AND32ItoR(gprF2, 0x0000ffff);
AND32ItoR(gprF3, 0x0000ffff);
OR32RtoR(gprF0, getFlagReg2(bMac[0]));
OR32RtoR(gprF1, getFlagReg2(bMac[1]));
OR32RtoR(gprF2, getFlagReg2(bMac[2]));
OR32RtoR(gprF3, getFlagReg2(bMac[3]));
POPR(gprESP); // Restore gprESP
POPR(gprR); // Restore gprR
}
microVUt(void) mVUincCycles(int x) {
microVU* mVU = mVUx;
mVUcycles += x;
@ -130,13 +243,14 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
mVUcount++;
}
// Sets Up Flag instances
int bStatus[4]; int bMac[4];
mVUsetFlags<vuIndex>(bStatus, bMac);
// Second Pass
iPC = mVUstartPC;
setCode();
for (bool x = 1; x; ) {
//
// ToDo: status/mac flag stuff?
//
if (isEOB) { x = 0; }
//if (isBranch2) { mVUopU<vuIndex, 1>(); incPC(2); }
@ -156,9 +270,10 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
case 8: branchCase(JNZ32); // IBNEQ
case 2: branchCase2(); // BAL
case 1:
// search for block
// ToDo: search for block
// (remember about global variables and recursion!)
mVUsetFlagsRec<vuIndex>(bStatus, bMac);
ajmp = JMP32((uptr)0);
break; // B/BAL
case 9: branchCase2(); // JALR
case 10: break; // JR/JALR
@ -167,8 +282,8 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
return thisPtr;
}
}
// Do E-bit end stuff here
// Do E-bit end stuff here
incCycles(55); // Ensures Valid P/Q instances
mVUcycles -= 55;
if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); }
@ -176,10 +291,11 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState,
SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2);
SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ);
MOV32ItoM((uptr)&mVU->p, mVU->p);
MOV32ItoM((uptr)&mVU->q, mVU->q);
//MOV32ItoM((uptr)&mVU->p, mVU->p);
//MOV32ItoM((uptr)&mVU->q, mVU->q);
AND32ItoM((uptr)&microVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Not sure what this does but zerorecs do it...
AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif
MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC);
JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5));
return thisPtr;

View File

@ -547,10 +547,12 @@ microVUf(void) mVU_FSSET() {
microVU* mVU = mVUx;
if (!recPass) { mVUanalyzeFSSET<vuIndex>(); }
else {
int flagReg;
getFlagReg(flagReg, fsInstance);
AND32ItoR(flagReg, 0x03f);
OR32ItoR(flagReg, (_Imm12_ & 0xfc0));
int flagReg = gprT1;
if (doStatus) { getFlagReg(flagReg, fsInstance); } // Get status result from upper instruction
else { mVUallocSFLAGa<vuIndex>(flagReg, fpsInstance); } // Get status result from last status setting instruction
AND16ItoR(flagReg, 0x03f); // Remember not to modify upper 16 bits because of mac flag
OR16ItoR(flagReg, (_Imm12_ & 0xfc0));
if (!doStatus) { mVUallocSFLAGb<vuIndex>(flagReg, fsInstance); }
}
}

View File

@ -144,12 +144,13 @@ declareAllVariables
#define mVUbranch mVUallocInfo.branch
#define mVUcycles mVUallocInfo.cycles
#define mVUcount mVUallocInfo.count
#define mVUstall mVUallocInfo.maxStall
//#define mVUstall mVUallocInfo.maxStall
#define mVUregs mVUallocInfo.regs
#define mVUregsTemp mVUallocInfo.regsTemp
#define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2]
#define mVUstartPC mVUallocInfo.startPC
#define iPC mVUallocInfo.curPC
#define mVUinfo mVUallocInfo.info[iPC / 2]
#define mVUstall mVUallocInfo.stall[iPC / 2]
#define mVUstartPC mVUallocInfo.startPC
#define xPC ((iPC / 2) * 8)
#define curI mVUcurProg.data[iPC]
#define setCode() { mVU->code = curI; }
@ -183,7 +184,8 @@ declareAllVariables
#define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches)
#define _writesVI (1<<25) // Current Instruction writes to VI
#define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction
//#define _isBranch2 (1<<27) // Cur Instruction is a Branch that writes VI regs (BAL/JALR)
#define _isFSSSET (1<<27) // Cur Instruction is FSSET
//#define _isBranch2 (1<<28) // Cur Instruction is a Branch that writes VI regs (BAL/JALR)
#define isNOP (mVUinfo & (1<<0))
#define isBranch (mVUinfo & (1<<1))
@ -211,7 +213,8 @@ declareAllVariables
#define memReadIt (mVUinfo & (1<<24))
#define writesVI (mVUinfo & (1<<25))
#define swapOps (mVUinfo & (1<<26))
//#define isBranch2 (mVUinfo & (1<<27))
#define isFSSET (mVUinfo & (1<<27))
//#define isBranch2 (mVUinfo & (1<<28))
#define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9)
#define mmVI(_VIreg_) (_VIreg_ - 1)