diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index d26587a84b..df3c3eff2b 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -44,8 +44,8 @@ extern SessionOverrideFlags g_Session; ////////////////////////////////////////////////////////////////////////// // Pcsx2 User Configuration Options! -//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs -//#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now) +#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now) #define PCSX2_GSMULTITHREAD 1 // uses multi-threaded gs #define PCSX2_EEREC 0x10 #define PCSX2_VU0REC 0x20 diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index 5dc192f0ec..777d954cbb 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -447,6 +447,11 @@ __forceinline bool rcntUpdate_vSync() // Accumulate hsync rounding errors: hsyncCounter.sCycle += vSyncInfo.hSyncError; +#ifdef PCSX2_MICROVU + extern void mVUvsyncUpdate(); + mVUvsyncUpdate(); +#endif + # ifdef VSYNC_DEBUG vblankinc++; if( vblankinc > 1 ) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index bcf478c048..cd8d56207a 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -47,6 +47,9 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { memset(&mVU->prog, 0, sizeof(mVU->prog)); mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init"); + mVU->cache = SysMmapEx((vuIndex ? 0x0f240000 : 0x0e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); + if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache)); + mVUreset(); } @@ -54,14 +57,17 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { microVUt(void) mVUreset() { microVU* mVU = mVUx; - mVUclose(); // Close - mVUprint((vuIndex) ? "microVU1: reset" : "microVU0: reset"); + // Delete Block Managers + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { + safe_delete( mVU->prog.prog[i].block[j] ); + } + } + // Dynarec Cache - mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); - if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - memset(mVU->cache, 0xcc, mVU->cacheSize); + memset(mVU->cache, 0xcc, mVU->cacheSize + 0x1000); // Setup Entrance/Exit Points x86SetPtr(mVU->cache); @@ -72,11 +78,12 @@ microVUt(void) mVUreset() { memset(&mVU->prog, 0, sizeof(mVU->prog)); // Create Block Managers - for (int i = 0; i <= mVU->prog.max; i++) { + // Block managers are now allocated "on-demand" by the recompiler -- air + /*for (int i = 0; i <= mVU->prog.max; i++) { for (u32 j = 0; j < (mVU->progSize / 2); j++) { mVU->prog.prog[i].block[j] = new microBlockManager(); } - } + }*/ // Program Variables mVU->prog.finished = 1; @@ -87,7 +94,7 @@ microVUt(void) mVUreset() { //mVU->prog.lpState = &mVU->prog.prog[15].allocInfo.block.pState; // Blank Pipeline State (ToDo: finish implementation) // Setup Dynarec Cache Limits for Each Program - u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes + u8* z = (mVU->cache + 0x1000); // Dispatcher Code is in first page of cache for (int i = 0; i <= mVU->prog.max; i++) { mVU->prog.prog[i].x86start = z; mVU->prog.prog[i].x86ptr = z; @@ -107,7 +114,9 @@ microVUt(void) mVUclose() { // Delete Block Managers for (int i = 0; i <= mVU->prog.max; i++) { for (u32 j = 0; j < (mVU->progSize / 2); j++) { - if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j]; + if (mVU->prog.prog[i].block[j]) { + safe_delete( mVU->prog.prog[i].block[j] ); + } } } } @@ -131,7 +140,8 @@ microVUt(void) mVUclearProg(int progIndex) { mVU->prog.prog[progIndex].sFlagHack = 0; mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; for (u32 i = 0; i < (mVU->progSize / 2); i++) { - mVU->prog.prog[progIndex].block[i]->reset(); + if( mVU->prog.prog[progIndex].block[i] ) + mVU->prog.prog[progIndex].block[i]->reset(); } } @@ -153,30 +163,80 @@ microVUt(int) mVUfindLeastUsedProg() { return mVU->prog.total; } else { - int j = (mVU->prog.cur + 1) & mVU->prog.max; - /*u32 smallest = mVU->prog.prog[j].used; - for (int i = ((j+1)&mVU->prog.max), z = 0; z < mVU->prog.max; i = (i+1)&mVU->prog.max, z++) { - if (smallest > mVU->prog.prog[i].used) { - smallest = mVU->prog.prog[i].used; - j = i; + + int startidx = (mVU->prog.cur + 1) & mVU->prog.max; + int endidx = mVU->prog.cur; + int smallidx = startidx; + u32 smallval = mVU->prog.prog[startidx].used; + + for (int i = startidx; i != endidx; i = (i+1)&mVU->prog.max) + { + u32 used = mVU->prog.prog[i].used; + if (smallval > used) + { + smallval = used; + smallidx = i; } - }*/ - mVUclearProg(j); // Clear old data if overwriting old program - mVUcacheProg(j); // Cache Micro Program - //Console::Notice("microVU%d: MicroProgram Cache Full!", params vuIndex); - return j; + } + + mVUclearProg(smallidx); // Clear old data if overwriting old program + mVUcacheProg(smallidx); // Cache Micro Program + Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval ); + return smallidx; } } +// mVUvsyncUpdate --> +// This should be run at 30fps intervals from Counters.cpp (or 60fps works too, but 30fps is +// probably all we need for accurate results) +// +// To fix the program cache to more efficiently dispose of "obsolete" programs, we need to use a +// frame-based decrementing system in combination with a program-execution-based incrementing +// system. In english: if last_used >= 2 it means the program has been used for the current +// or prev frame. if it's 0, the program hasn't been used for a while. +// +microVUt(void) __mVUvsyncUpdate() { + + microVU* mVU = mVUx; + + if (mVU->prog.total < mVU->prog.max) return; + + for (int i = 0; i <= mVU->prog.total; i++) { + if( mVU->prog.prog[i].last_used != 0 ) + { + if( mVU->prog.prog[i].last_used >= 3 ) + { + // program has been used recently. Give it's program execution counter a + // 'weighted' bonus signifying it's importance: + if( mVU->prog.prog[i].used < 0x4fffffff ) + mVU->prog.prog[i].used += 0x200; + } + mVU->prog.prog[i].last_used--; + } + else + mVU->prog.prog[i].used /= 2; // penalize unused programs. + } +} + +void mVUvsyncUpdate() +{ + __mVUvsyncUpdate<0>(); + __mVUvsyncUpdate<1>(); +} + // Searches for Cached Micro Program and sets prog.cur to it (returns 1 if program found, else returns 0) microVUt(int) mVUsearchProg() { microVU* mVU = mVUx; + if (mVU->prog.cleared) { // If cleared, we need to search for new program for (int i = 0; i <= mVU->prog.total; i++) { if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { mVU->prog.cur = i; mVU->prog.cleared = 0; - mVU->prog.prog[i].used++; + if( mVU->prog.prog[i].used < 0x7fffffff ) // avoid overflows on well-used programs + mVU->prog.prog[i].used++; + + mVU->prog.prog[i].last_used = 3; // add me to the mVU structs return 1; } } diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 55c773c780..7dc1f4d804 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -19,6 +19,7 @@ #pragma once //#define mVUdebug // Prints Extra Info to Console //#define mVUlogProg // Dumps MicroPrograms into microVU0.txt/microVU1.txt + #include "Common.h" #include "VU.h" #include "GS.h" @@ -64,20 +65,21 @@ public: } }; -template +template // progSize = VU program memory size / 4 struct microProgram { - u32 data[progSize/4]; + u32 data[progSize]; u32 used; // Number of times its been used + u32 last_used; // counters # of frames since last use (starts at 3 and counts backwards to 0 for each 30fps vsync) u32 sFlagHack; // Optimize out Status Flag Updates if Program doesn't use Status Flags u8* x86ptr; // Pointer to program's recompilation code u8* x86start; // Start of program's rec-cache u8* x86end; // Limit of program's rec-cache - microBlockManager* block[progSize/8]; + microBlockManager* block[progSize/2]; microAllocInfo allocInfo; }; -#define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...) -template +#define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...) +template // pSize = VU program memory size / 4 struct microProgManager { microProgram prog[mMaxProg]; // Store MicroPrograms in memory static const int max = mMaxProg - 1; @@ -95,7 +97,7 @@ struct microVU { u32 progSize; // VU Micro Program Size (microSize/4) u32 cacheSize; // VU Cache Size - microProgManager<0x4000> prog; // Micro Program Data + microProgManager<0x4000/4> prog; // Micro Program Data FILE* logFile; // Log File Pointer VURegs* regs; // VU Regs Struct @@ -112,6 +114,12 @@ struct microVU { u32 totalCycles; // Total Cycles that mVU is expected to run for u32 cycles; // Cycles Counter + // WARNING! MSVC does not reliably guarantee alignment on structure or class member variables, + // failing around 10-20% of the time to align (random depending on various circumstances). + // GCC fails to align the members at all, failing about 50-80% of the time (barring occasional + // random luck). If you want these to be guaranteed aligned, move them to the top of the + // struct, and ensure the struct itself is aligned. :) -- air + PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution) PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution) PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index dc4e81d867..c9fd35b4a3 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -76,6 +76,6 @@ struct microAllocInfo { u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags - u32 info[pSize/8]; // Info for Instructions in current block - u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes) + u32 info[pSize/2]; // Info for Instructions in current block + u8 stall[pSize/2]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes) }; diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 4d30137cf5..f87a3e2080 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -27,6 +27,8 @@ mVUsetupBranch(xStatus, xMac, xClip, xCycles); \ CMP16ItoM((uptr)&mVU->branch, 0); \ incPC2(1); \ + if( mVUblocks[iPC/2] == NULL ) \ + mVUblocks[iPC/2] = new microBlockManager(); \ bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \ incPC2(-1); \ if (bBlock) { nJMPcc((uptr)bBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \ @@ -160,6 +162,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUprint("microVU: invalid startPC"); } startPC &= (vuIndex ? 0x3ff8 : 0xff8); + if( mVUblocks[startPC/8] == NULL ) + mVUblocks[startPC/8] = new microBlockManager(); + // Searches for Existing Compiled Block (if found, then returns; else, compile) microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); if (pBlock) { return pBlock->x86ptrStart; } @@ -225,6 +230,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { else { microBlock* bBlock = NULL; u32* ajmp = 0; + switch (mVUbranch) { case 3: branchCase(JE32, JNE32); // IBEQ case 4: branchCase(JGE32, JNGE32); // IBGEZ @@ -238,6 +244,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { incPC(-3); // Go back to branch opcode (to get branch imm addr) mVUsetupBranch(xStatus, xMac, xClip, xCycles); + if( mVUblocks[branchAddr/8] == NULL ) + mVUblocks[branchAddr/8] = new microBlockManager(); + // Check if branch-block has already been compiled pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); } @@ -265,6 +274,10 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVUprint("mVUcompile conditional branch"); if (bBlock) { // Branch non-taken has already been compiled incPC(-3); // Go back to branch opcode (to get branch imm addr) + + if( mVUblocks[branchAddr/8] == NULL ) + mVUblocks[branchAddr/8] = new microBlockManager(); + // Check if branch-block has already been compiled pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); }