diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 752414a1b2..eec040ac33 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -359,7 +359,7 @@ emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset) } -/* mov r32 to r32 */ +/* mov r16 to r16 */ emitterT void MOV16RtoR( x86IntRegType to, x86IntRegType from ) { if( to == from ) return; diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index cb6bbbbb5f..8de5d0737c 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -53,10 +53,11 @@ struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR - u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) + //u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block u32 info[pSize/8]; // Info for Instructions in current block + u8 stall[pSize/8]; // Info on how much each instruction stalled }; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index dee76e8f92..c591e88082 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -682,8 +682,7 @@ microVUt(void) mVUallocSFLAGa(int reg, int fInstance) { microVUt(void) mVUallocSFLAGb(int reg, int fInstance) { getFlagReg(fInstance, fInstance); - AND32ItoR(fInstance, 0xffff0000); - OR16RtoR(fInstance, reg); + MOV16RtoR(fInstance, reg); } microVUt(void) mVUallocMFLAGa(int reg, int fInstance) { diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 224ebbbd09..571435b4af 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -245,19 +245,42 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; if (!It) { mVUinfo |= _isNOP; } - else { mVUinfo |= _isSflag | _swapOps; } // ToDo: set s flag at right time + else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from + mVUinfo |= _swapOps; + if (mVUcount >= 4) { incPC2(-8); mVUinfo |= _isSflag; incPC2(8); } + //else { incPC2((mVUcount*-2)); mVUinfo |= _isSflag; incPC2(mVUcount*-2); } + } analyzeVIreg2(It, 1); } microVUt(void) mVUanalyzeFSSET() { microVU* mVU = mVUx; - int i, curPC = iPC; - for (i = mVUcount; i > 0; i--) { - incPC2(-2); - if (isSflag) break; - mVUinfo &= ~_doStatus; + mVUinfo |= _isFSSSET; +} + +//------------------------------------------------------------------ +// Mflag - Mac Flag Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeMflag(int Is, int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; } + else if (mVUcount >= 4) { + incPC2(-8); + if (doStatus) { mVUinfo |= _doMac; } + else { + int curPC = iPC; + int i = mVUcount; + for (; i > 0; i--) { + incPC2(-2); + if (doStatus) { mVUinfo |= _doMac; break; } + } + iPC = curPC; + } + incPC2(8); } - iPC = curPC; + analyzeVIreg1(Is); + analyzeVIreg2(It, 1); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 85c4938aa7..56228309ba 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -19,6 +19,10 @@ #pragma once #ifdef PCSX2_MICROVU +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ + #define createBlock(blockEndPtr) { \ block.pipelineState = pipelineState; \ block.x86ptrStart = x86ptrStart; \ @@ -46,6 +50,115 @@ #define incP() { mVU->p = (mVU->p+1) & 1; } #define incQ() { mVU->q = (mVU->q+1) & 1; } +//------------------------------------------------------------------ +// Helper Functions +//------------------------------------------------------------------ + +// Optimizes out unneeded status flag updates +microVUt(void) mVUstatusFlagOp() { + microVU* mVU = mVUx; + int curPC = iPC; + int i = mVUcount; + if (doStatus) { mVUinfo |= _isSflag; } + else { + for (; i > 0; i--) { + incPC2(-2); + if (doStatus) { mVUinfo |= _isSflag; break; } + } + } + for (; i > 0; i--) { + incPC2(-2); + if (isSflag) break; + mVUinfo &= ~_doStatus; + } + iPC = curPC; +} + +// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! +microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { + microVU* mVU = mVUx; + + // Ensure last ~4+ instructions update mac flags + int endPC = iPC; + int aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances + for (int i = mVUcount; i > 0; i++, aCount++;) { + if (doStatus) { mVUinfo |= _doMac; if (i >= 4) { break; } } + incPC2(-2); + } + + // Status/Mac Flags Setup Code + int xStatus = 0; // Status Instance starts at #0 on every block + int xMac = 0; // Mac Instance starts at #0 on every block + int pStatus = 0; + int pMac = 0; + int xCount = mVUcount; // Backup count + mVUcount = 0; + iPC = mVUstartPC; + for (int i = 0; i < xCount; i++) { + if ((xCount - i) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions + if (doStatus||isFSSET) { mVUinfo |= xStatus << 12; } // _fsInstance + if (doMac) { mVUinfo |= xMac << 10; } // _fmInstance + pStatus = (xStatus + ((mVUstall > 3) ? 3 : mVUstall)) & 3; + pMac = (xMac + ((mVUstall > 3) ? 3 : mVUstall)) & 3; + mVUinfo |= pStatus << 18; // _fvsInstance + mVUinfo |= pMac << 16; // _fvmInstance + if (doStatus||isFSSET) { xStatus = (xStatus+1) & 3; } + if (doMac) { xMac = (xMac+1) & 3; } + incPC2(2); + } + mVUcount = xCount; // Restore count + + // Setup Last 4 instances of Status/Mac flags (needed for accurate block linking) + iPC = endPC; + for (int i = 3, int j = 3, int ii = 1, int jj = 3; aCount > 0; ii++, aCount--) { + if (doStatus && (i >= 0)) { + for (; (ii > 0 && i >= 0); ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; i--; } + } + if (doMac && (j >= 0)) { + for (; (jj > 0 && j >= 0); jj--) { xMac = (xMac-1) & 3; bMac[i] = xMac; j--; } + } + incPC2(-2); + } +} + +#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0))) +#define getFlagReg2(x) ((x == bStatus[3]) ? gprESP : ((x == bStatus[2]) ? gprR : ((x == bStatus[1]) ? gprT2 : gprT1))) + +// Recompiles Code for Proper Flags on Block Linkings +microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { + + PUSHR(gprR); // Backup gprR + PUSHR(gprESP); // Backup gprESP + + MOV32RtoR(gprT1, getFlagReg1(bStatus[0])); + MOV32RtoR(gprT2, getFlagReg1(bStatus[1])); + MOV32RtoR(gprR, getFlagReg1(bStatus[2])); + MOV32RtoR(gprESP, getFlagReg1(bStatus[3])); + + MOV32RtoR(gprF0, gprT1); + MOV32RtoR(gprF1, gprT2); + MOV32RtoR(gprF2, gprR); + MOV32RtoR(gprF3, gprESP); + + AND32ItoR(gprT1, 0xffff0000); + AND32ItoR(gprT2, 0xffff0000); + AND32ItoR(gprR, 0xffff0000); + AND32ItoR(gprESP, 0xffff0000); + + AND32ItoR(gprF0, 0x0000ffff); + AND32ItoR(gprF1, 0x0000ffff); + AND32ItoR(gprF2, 0x0000ffff); + AND32ItoR(gprF3, 0x0000ffff); + + OR32RtoR(gprF0, getFlagReg2(bMac[0])); + OR32RtoR(gprF1, getFlagReg2(bMac[1])); + OR32RtoR(gprF2, getFlagReg2(bMac[2])); + OR32RtoR(gprF3, getFlagReg2(bMac[3])); + + POPR(gprESP); // Restore gprESP + POPR(gprR); // Restore gprR +} + microVUt(void) mVUincCycles(int x) { microVU* mVU = mVUx; mVUcycles += x; @@ -130,13 +243,14 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, mVUcount++; } + // Sets Up Flag instances + int bStatus[4]; int bMac[4]; + mVUsetFlags(bStatus, bMac); + // Second Pass iPC = mVUstartPC; setCode(); for (bool x = 1; x; ) { - // - // ToDo: status/mac flag stuff? - // if (isEOB) { x = 0; } //if (isBranch2) { mVUopU(); incPC(2); } @@ -156,9 +270,10 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, case 8: branchCase(JNZ32); // IBNEQ case 2: branchCase2(); // BAL case 1: - // search for block + // ToDo: search for block + // (remember about global variables and recursion!) + mVUsetFlagsRec(bStatus, bMac); ajmp = JMP32((uptr)0); - break; // B/BAL case 9: branchCase2(); // JALR case 10: break; // JR/JALR @@ -167,8 +282,8 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, return thisPtr; } } - // Do E-bit end stuff here + // Do E-bit end stuff here incCycles(55); // Ensures Valid P/Q instances mVUcycles -= 55; if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } @@ -176,10 +291,11 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); - MOV32ItoM((uptr)&mVU->p, mVU->p); - MOV32ItoM((uptr)&mVU->q, mVU->q); + //MOV32ItoM((uptr)&mVU->p, mVU->p); + //MOV32ItoM((uptr)&mVU->q, mVU->q); + AND32ItoM((uptr)µVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag - AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Not sure what this does but zerorecs do it... + AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC); JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); return thisPtr; diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index a247f00a6c..61f94a8b8c 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -547,10 +547,12 @@ microVUf(void) mVU_FSSET() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeFSSET(); } else { - int flagReg; - getFlagReg(flagReg, fsInstance); - AND32ItoR(flagReg, 0x03f); - OR32ItoR(flagReg, (_Imm12_ & 0xfc0)); + int flagReg = gprT1; + if (doStatus) { getFlagReg(flagReg, fsInstance); } // Get status result from upper instruction + else { mVUallocSFLAGa(flagReg, fpsInstance); } // Get status result from last status setting instruction + AND16ItoR(flagReg, 0x03f); // Remember not to modify upper 16 bits because of mac flag + OR16ItoR(flagReg, (_Imm12_ & 0xfc0)); + if (!doStatus) { mVUallocSFLAGb(flagReg, fsInstance); } } } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index aaf45425d4..c5e6c046cc 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -144,12 +144,13 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUcount mVUallocInfo.count -#define mVUstall mVUallocInfo.maxStall +//#define mVUstall mVUallocInfo.maxStall #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp -#define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2] -#define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC +#define mVUinfo mVUallocInfo.info[iPC / 2] +#define mVUstall mVUallocInfo.stall[iPC / 2] +#define mVUstartPC mVUallocInfo.startPC #define xPC ((iPC / 2) * 8) #define curI mVUcurProg.data[iPC] #define setCode() { mVU->code = curI; } @@ -183,7 +184,8 @@ declareAllVariables #define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) #define _writesVI (1<<25) // Current Instruction writes to VI #define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction -//#define _isBranch2 (1<<27) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) +#define _isFSSSET (1<<27) // Cur Instruction is FSSET +//#define _isBranch2 (1<<28) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -211,7 +213,8 @@ declareAllVariables #define memReadIt (mVUinfo & (1<<24)) #define writesVI (mVUinfo & (1<<25)) #define swapOps (mVUinfo & (1<<26)) -//#define isBranch2 (mVUinfo & (1<<27)) +#define isFSSET (mVUinfo & (1<<27)) +//#define isBranch2 (mVUinfo & (1<<28)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1)