From 912185348e93b2ff1c2143c001b28695fdcd684b Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Fri, 8 May 2009 02:12:02 +0000 Subject: [PATCH] microVU: - Fixed up some code so blocks are recompiled less often. - Tweaked flag code. - Minor changes... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1151 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 4 ---- pcsx2/x86/microVU.h | 11 ++++++----- pcsx2/x86/microVU_Alloc.h | 10 ++++++++++ pcsx2/x86/microVU_Alloc.inl | 18 +++++++++++------- pcsx2/x86/microVU_Analyze.inl | 13 +++++++++++-- pcsx2/x86/microVU_Compile.inl | 30 +++++++++++++++++++++++------- pcsx2/x86/microVU_Execute.inl | 16 ++++------------ pcsx2/x86/microVU_Lower.inl | 4 ++++ pcsx2/x86/microVU_Misc.h | 3 ++- 9 files changed, 71 insertions(+), 38 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 72e8aa9cf1..bc01d468a4 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -117,10 +117,6 @@ microVUt(void) mVUclear(u32 addr, u32 size) { microVU* mVU = mVUx; memset(&mVU->prog.lpState, 0, sizeof(mVU->prog.lpState)); mVU->prog.cleared = 1; // Next execution searches/creates a new microprogram - // Note: It might be better to copy old recompiled blocks to the new microprogram rec data - // however, if games primarily do big writes, its probably not worth it. - // The cost of invalidating bad blocks is also kind of expensive, which is another reason - // that its probably not worth it... } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index ecf728a65e..9e1c483b62 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -49,16 +49,17 @@ public: } microBlock* search(microRegInfo* pState) { if (listSize < 0) return NULL; - //if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) + if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) for (int i = 0; i <= listSize; i++) { if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo) - 1)) return &blockList[i]; } - /*} + } else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) for (int i = 0; i <= listSize; i++) { - if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p)) { return &blockList[i]; } + if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p) + && (blockList[i].pState.clip == pState->clip)) { return &blockList[i]; } } - }*/ + } return NULL; } }; @@ -90,7 +91,7 @@ struct microVU { u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size u32 progSize; // VU Micro Program Size (microSize/4) - static const u32 cacheSize = 0x800000; // VU Cache Size + static const u32 cacheSize = 0xb00000; // VU Cache Size microProgManager<0x4000> prog; // Micro Program Data diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index a16c3f4396..0cd2f4b48e 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -29,6 +29,10 @@ union regInfo { }; }; +#if defined(_MSC_VER) +#pragma pack(push, 1) +#pragma warning(disable:4996) +#endif struct microRegInfo { regInfo VF[32]; u8 VI[32]; @@ -38,7 +42,12 @@ struct microRegInfo { u8 xgkick; u8 clip; u8 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last byte in struct) +#if defined(_MSC_VER) }; +#pragma pack(pop) +#else +} __attribute__((packed)); +#endif struct microTempRegInfo { regInfo VF[2]; // Holds cycle info for Fd, VF[0] = Upper Instruction, VF[1] = Lower Instruction @@ -59,6 +68,7 @@ struct microBlock { template struct microAllocInfo { + microBlock* pBlock; // Pointer to a block in mVUblocks microBlock block; // Block/Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 1d972814ee..6c9c610f19 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -665,13 +665,17 @@ microVUt(void) mVUallocFMAC26b(int& ACCw, int& ACCr) { // Flag Allocators //------------------------------------------------------------------ -#define getFlagReg(regX, fInst) { \ - switch (fInst) { \ - case 0: regX = gprF0; break; \ - case 1: regX = gprF1; break; \ - case 2: regX = gprF2; break; \ - case 3: regX = gprF3; break; \ - } \ +#define getFlagReg(regX, fInst) { \ + switch (fInst) { \ + case 0: regX = gprF0; break; \ + case 1: regX = gprF1; break; \ + case 2: regX = gprF2; break; \ + case 3: regX = gprF3; break; \ + default: \ + Console::Error("microVU: Flag Instance Error (fInst = %d)", params fInst); \ + regX = gprF0; \ + break; \ + } \ } microVUt(void) mVUallocSFLAGa(int reg, int fInstance) { diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 81512ba1f9..ec91ca1fd1 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -257,7 +257,7 @@ microVUt(void) mVUanalyzeSflag(int It) { if (!It) { mVUinfo |= _isNOP; } else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from mVUinfo |= _swapOps; - if (mVUcount < 4) { mVUregs.needExactMatch = 1; } + if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; } if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } // Note: _isSflag is used for status flag optimizations. // Do to stalls, it can only be set one instruction prior to the status flag read instruction @@ -285,7 +285,7 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) { if (!It) { mVUinfo |= _isNOP; } else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo |= _swapOps; - if (mVUcount < 4) { mVUregs.needExactMatch = 1; } + if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; } int curPC = iPC; for (int i = mVUcount, j = 0; i > 1; i--, j++) { incPC2(-2); @@ -297,6 +297,15 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) { analyzeVIreg2(It, 1); } +//------------------------------------------------------------------ +// Cflag - Clip Flag Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeCflag() { + microVU* mVU = mVUx; + if (mVUcount < 4) { mVUpBlock->pState.needExactMatch = 1; } +} + //------------------------------------------------------------------ // XGkick //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 6bf8f65dc4..aeb6660033 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -127,6 +127,12 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { mVUcount = xCount; // Restore count mVUregs.clip = xClip&3; // Note: Clip timing isn't cycle-accurate between block linking; but hopefully doesn't matter + // Temp Hack-fix until flag-algorithm rewrite + for (int i = 0; i < 4; i++) { + bStatus[i] = 0; + bMac[i] = 0; + } + // Setup Last 4 instances of Status/Mac flags (needed for accurate block linking) iPC = endPC; for (int i = 3, j = 3, ii = 1, jj = 1; aCount > 0; ii++, jj++, aCount--) { @@ -238,20 +244,17 @@ microVUt(void) mVUdivSet() { if (doDivFlag) { getFlagReg(flagReg1, fsInstance); if (!doStatus) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); } - //MOV32RtoR(gprT1, flagReg1); - //AND32ItoR(gprT1, 0xffff0fcf); - //OR32MtoR (gprT1, (uptr)&mVU->divFlag); - //MOV32RtoR(flagReg1, gprT1); AND16ItoR(flagReg1, 0x0fcf); OR32MtoR (flagReg1, (uptr)&mVU->divFlag); } } -microVUt(void) mVUendProgram() { +microVUt(void) mVUendProgram(int fStatus, int fMac) { microVU* mVU = mVUx; incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0) mVUcycles -= 100; + // Save P/Q Regs if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); if (vuIndex) { @@ -259,6 +262,15 @@ microVUt(void) mVUendProgram() { SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); } + // Save Flag Instances + getFlagReg(fStatus, fStatus); + getFlagReg(fMac, fMac); + MOV32RtoR(gprT1, fStatus); + AND32ItoR(gprT1, 0xffff); + SHR32ItoR(fMac, 16); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1); + MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, fMac); + //memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); //MOV32ItoM((uptr)&mVU->prog.lpState, (int)&mVUblock.pState); // Save pipeline state (clipflag instance) //AND32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag @@ -273,7 +285,7 @@ microVUt(void) mVUtestCycles() { iPC = mVUstartPC; CMP32ItoM((uptr)&mVU->cycles, 0); u8* jmp8 = JG8(0); - mVUendProgram(); + mVUendProgram(0, 0); x86SetJ8(jmp8); SUB32ItoM((uptr)&mVU->cycles, mVUcycles); } @@ -304,6 +316,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info mVUblock.x86ptrStart = thisPtr; pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager + mVUpBlock = pBlock; for (int branch = 0;; ) { incPC(1); @@ -330,6 +343,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { int bStatus[4]; int bMac[4]; mVUsetFlags(bStatus, bMac); mVUtestCycles(); + //SysPrintf("bS[0] = %08x, bS[1] = %08x, bS[2] = %08x, bS[3] = %08x\n", bStatus[0], bStatus[1], bStatus[2], bStatus[3]); + //SysPrintf("bM[0] = %08x, bM[1] = %08x, bM[2] = %08x, bM[3] = %08x\n", bMac[0], bMac[1], bMac[2], bMac[3]); + //SysPrintf("mVUcount = %d\n", mVUcount); // Second Pass iPC = mVUstartPC; @@ -414,7 +430,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { mVUprint("microVU: Possible infinite compiling loop!"); } // Do E-bit end stuff here - mVUendProgram(); + mVUendProgram(bStatus[3], bMac[3]); return thisPtr; //ToDo: Save pipeline state? } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index a96647fbbd..b89cf2a29b 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -86,20 +86,14 @@ microVUt(void) mVUdispatcherB() { eeMXCSR = g_sseMXCSR; SSE_LDMXCSR((uptr)&eeMXCSR); - // Save Regs - MOV32RtoR(gprT1, gprF0); // ToDo: Ensure Correct Flag instances - AND32ItoR(gprT1, 0xffff); - SHR32ItoR(gprF0, 16); - MOV32RtoM((uptr)&mVU->regs->VI[REG_R].UL, gprR); - MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1); - MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprF0); - + // Save Regs (Other Regs Saved in mVUcompile) + MOV32RtoM((uptr)&mVU->regs->VI[REG_R].UL, gprR); + SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC.UL[0], xmmACC); + for (int i = 1; i < 16; i++) { if (isMMX(i)) { MOVDMMXtoM((uptr)&mVU->regs->VI[i].UL, mmVI(i)); } } - SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC.UL[0], xmmACC); - // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } else { CALLFunc((uptr)mVUcleanUpVU1); } @@ -110,8 +104,6 @@ microVUt(void) mVUdispatcherB() { POP32R(EBP); POP32R(EBX); - //write8(0xcc); - if (isMMX(1)) EMMS(); RET(); diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index c37cb37c9c..59cb754961 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -431,6 +431,7 @@ microVUf(void) mVU_ESUM() { microVUf(void) mVU_FCAND() { microVU* mVU = mVUx; + pass1 { mVUanalyzeCflag(); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, _Imm24_); @@ -443,6 +444,7 @@ microVUf(void) mVU_FCAND() { microVUf(void) mVU_FCEQ() { microVU* mVU = mVUx; + pass1 { mVUanalyzeCflag(); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); XOR32ItoR(gprT1, _Imm24_); @@ -455,6 +457,7 @@ microVUf(void) mVU_FCEQ() { microVUf(void) mVU_FCGET() { microVU* mVU = mVUx; + pass1 { mVUanalyzeCflag(); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, 0xfff); @@ -465,6 +468,7 @@ microVUf(void) mVU_FCGET() { microVUf(void) mVU_FCOR() { microVU* mVU = mVUx; + pass1 { mVUanalyzeCflag(); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); OR32ItoR(gprT1, _Imm24_); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 07cef16dbc..ac79714c6e 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -152,6 +152,7 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUcount mVUallocInfo.count +#define mVUpBlock mVUallocInfo.pBlock #define mVUblock mVUallocInfo.block #define mVUregs mVUallocInfo.block.pState #define mVUregsTemp mVUallocInfo.regsTemp @@ -263,5 +264,5 @@ declareAllVariables #define mVUcacheCheck(ptr, start, limit) { \ uptr diff = ptr - start; \ - if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ + if (diff >= limit) { Console::Error("microVU Error: Program went over its cache limit. Size = 0x%x", params diff); } \ }