From 729d90c1cea98eeb800329e01aef35e258214198 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 14 May 2009 19:34:17 +0000 Subject: [PATCH] microVU: * Smartened up the microprogram cache. Programs are (quite accurately) selected based on how often and how recently they have been used. Solves constant recompilation issues in games like FFXII and Tekken 5 [but not the SPS, sorry!]. * Changed memory allocation so that microprogram blocks are allocated *on demand* -- this saves a lot of memory and makes it possible to test 64-program caches (which would previously fail due to exceeding 2gb ram allocations!). Also fixed a few small memory bugs in Init/Alloc, and made it so mVU only allocates memory once instead of on every reset. :) * Tweaked uses of progSize in microVU.h so that it's consistent [all instances of progSize are (vumem_size / 4) now] git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1180 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 4 +- pcsx2/Counters.cpp | 5 ++ pcsx2/x86/microVU.cpp | 104 +++++++++++++++++++++++++++------- pcsx2/x86/microVU.h | 20 +++++-- pcsx2/x86/microVU_Alloc.h | 4 +- pcsx2/x86/microVU_Compile.inl | 13 +++++ 6 files changed, 118 insertions(+), 32 deletions(-) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index d26587a84b..df3c3eff2b 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -44,8 +44,8 @@ extern SessionOverrideFlags g_Session; ////////////////////////////////////////////////////////////////////////// // Pcsx2 User Configuration Options! -//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs -//#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now) +#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now) #define PCSX2_GSMULTITHREAD 1 // uses multi-threaded gs #define PCSX2_EEREC 0x10 #define PCSX2_VU0REC 0x20 diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index 5dc192f0ec..777d954cbb 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -447,6 +447,11 @@ __forceinline bool rcntUpdate_vSync() // Accumulate hsync rounding errors: hsyncCounter.sCycle += vSyncInfo.hSyncError; +#ifdef PCSX2_MICROVU + extern void mVUvsyncUpdate(); + mVUvsyncUpdate(); +#endif + # ifdef VSYNC_DEBUG vblankinc++; if( vblankinc > 1 ) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index bcf478c048..cd8d56207a 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -47,6 +47,9 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { memset(&mVU->prog, 0, sizeof(mVU->prog)); mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init"); + mVU->cache = SysMmapEx((vuIndex ? 0x0f240000 : 0x0e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); + if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache)); + mVUreset(); } @@ -54,14 +57,17 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { microVUt(void) mVUreset() { microVU* mVU = mVUx; - mVUclose(); // Close - mVUprint((vuIndex) ? "microVU1: reset" : "microVU0: reset"); + // Delete Block Managers + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { + safe_delete( mVU->prog.prog[i].block[j] ); + } + } + // Dynarec Cache - mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); - if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - memset(mVU->cache, 0xcc, mVU->cacheSize); + memset(mVU->cache, 0xcc, mVU->cacheSize + 0x1000); // Setup Entrance/Exit Points x86SetPtr(mVU->cache); @@ -72,11 +78,12 @@ microVUt(void) mVUreset() { memset(&mVU->prog, 0, sizeof(mVU->prog)); // Create Block Managers - for (int i = 0; i <= mVU->prog.max; i++) { + // Block managers are now allocated "on-demand" by the recompiler -- air + /*for (int i = 0; i <= mVU->prog.max; i++) { for (u32 j = 0; j < (mVU->progSize / 2); j++) { mVU->prog.prog[i].block[j] = new microBlockManager(); } - } + }*/ // Program Variables mVU->prog.finished = 1; @@ -87,7 +94,7 @@ microVUt(void) mVUreset() { //mVU->prog.lpState = &mVU->prog.prog[15].allocInfo.block.pState; // Blank Pipeline State (ToDo: finish implementation) // Setup Dynarec Cache Limits for Each Program - u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes + u8* z = (mVU->cache + 0x1000); // Dispatcher Code is in first page of cache for (int i = 0; i <= mVU->prog.max; i++) { mVU->prog.prog[i].x86start = z; mVU->prog.prog[i].x86ptr = z; @@ -107,7 +114,9 @@ microVUt(void) mVUclose() { // Delete Block Managers for (int i = 0; i <= mVU->prog.max; i++) { for (u32 j = 0; j < (mVU->progSize / 2); j++) { - if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j]; + if (mVU->prog.prog[i].block[j]) { + safe_delete( mVU->prog.prog[i].block[j] ); + } } } } @@ -131,7 +140,8 @@ microVUt(void) mVUclearProg(int progIndex) { mVU->prog.prog[progIndex].sFlagHack = 0; mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; for (u32 i = 0; i < (mVU->progSize / 2); i++) { - mVU->prog.prog[progIndex].block[i]->reset(); + if( mVU->prog.prog[progIndex].block[i] ) + mVU->prog.prog[progIndex].block[i]->reset(); } } @@ -153,30 +163,80 @@ microVUt(int) mVUfindLeastUsedProg() { return mVU->prog.total; } else { - int j = (mVU->prog.cur + 1) & mVU->prog.max; - /*u32 smallest = mVU->prog.prog[j].used; - for (int i = ((j+1)&mVU->prog.max), z = 0; z < mVU->prog.max; i = (i+1)&mVU->prog.max, z++) { - if (smallest > mVU->prog.prog[i].used) { - smallest = mVU->prog.prog[i].used; - j = i; + + int startidx = (mVU->prog.cur + 1) & mVU->prog.max; + int endidx = mVU->prog.cur; + int smallidx = startidx; + u32 smallval = mVU->prog.prog[startidx].used; + + for (int i = startidx; i != endidx; i = (i+1)&mVU->prog.max) + { + u32 used = mVU->prog.prog[i].used; + if (smallval > used) + { + smallval = used; + smallidx = i; } - }*/ - mVUclearProg(j); // Clear old data if overwriting old program - mVUcacheProg(j); // Cache Micro Program - //Console::Notice("microVU%d: MicroProgram Cache Full!", params vuIndex); - return j; + } + + mVUclearProg(smallidx); // Clear old data if overwriting old program + mVUcacheProg(smallidx); // Cache Micro Program + Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval ); + return smallidx; } } +// mVUvsyncUpdate --> +// This should be run at 30fps intervals from Counters.cpp (or 60fps works too, but 30fps is +// probably all we need for accurate results) +// +// To fix the program cache to more efficiently dispose of "obsolete" programs, we need to use a +// frame-based decrementing system in combination with a program-execution-based incrementing +// system. In english: if last_used >= 2 it means the program has been used for the current +// or prev frame. if it's 0, the program hasn't been used for a while. +// +microVUt(void) __mVUvsyncUpdate() { + + microVU* mVU = mVUx; + + if (mVU->prog.total < mVU->prog.max) return; + + for (int i = 0; i <= mVU->prog.total; i++) { + if( mVU->prog.prog[i].last_used != 0 ) + { + if( mVU->prog.prog[i].last_used >= 3 ) + { + // program has been used recently. Give it's program execution counter a + // 'weighted' bonus signifying it's importance: + if( mVU->prog.prog[i].used < 0x4fffffff ) + mVU->prog.prog[i].used += 0x200; + } + mVU->prog.prog[i].last_used--; + } + else + mVU->prog.prog[i].used /= 2; // penalize unused programs. + } +} + +void mVUvsyncUpdate() +{ + __mVUvsyncUpdate<0>(); + __mVUvsyncUpdate<1>(); +} + // Searches for Cached Micro Program and sets prog.cur to it (returns 1 if program found, else returns 0) microVUt(int) mVUsearchProg() { microVU* mVU = mVUx; + if (mVU->prog.cleared) { // If cleared, we need to search for new program for (int i = 0; i <= mVU->prog.total; i++) { if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { mVU->prog.cur = i; mVU->prog.cleared = 0; - mVU->prog.prog[i].used++; + if( mVU->prog.prog[i].used < 0x7fffffff ) // avoid overflows on well-used programs + mVU->prog.prog[i].used++; + + mVU->prog.prog[i].last_used = 3; // add me to the mVU structs return 1; } } diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 55c773c780..7dc1f4d804 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -19,6 +19,7 @@ #pragma once //#define mVUdebug // Prints Extra Info to Console //#define mVUlogProg // Dumps MicroPrograms into microVU0.txt/microVU1.txt + #include "Common.h" #include "VU.h" #include "GS.h" @@ -64,20 +65,21 @@ public: } }; -template +template // progSize = VU program memory size / 4 struct microProgram { - u32 data[progSize/4]; + u32 data[progSize]; u32 used; // Number of times its been used + u32 last_used; // counters # of frames since last use (starts at 3 and counts backwards to 0 for each 30fps vsync) u32 sFlagHack; // Optimize out Status Flag Updates if Program doesn't use Status Flags u8* x86ptr; // Pointer to program's recompilation code u8* x86start; // Start of program's rec-cache u8* x86end; // Limit of program's rec-cache - microBlockManager* block[progSize/8]; + microBlockManager* block[progSize/2]; microAllocInfo allocInfo; }; -#define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...) -template +#define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...) +template // pSize = VU program memory size / 4 struct microProgManager { microProgram prog[mMaxProg]; // Store MicroPrograms in memory static const int max = mMaxProg - 1; @@ -95,7 +97,7 @@ struct microVU { u32 progSize; // VU Micro Program Size (microSize/4) u32 cacheSize; // VU Cache Size - microProgManager<0x4000> prog; // Micro Program Data + microProgManager<0x4000/4> prog; // Micro Program Data FILE* logFile; // Log File Pointer VURegs* regs; // VU Regs Struct @@ -112,6 +114,12 @@ struct microVU { u32 totalCycles; // Total Cycles that mVU is expected to run for u32 cycles; // Cycles Counter + // WARNING! MSVC does not reliably guarantee alignment on structure or class member variables, + // failing around 10-20% of the time to align (random depending on various circumstances). + // GCC fails to align the members at all, failing about 50-80% of the time (barring occasional + // random luck). If you want these to be guaranteed aligned, move them to the top of the + // struct, and ensure the struct itself is aligned. :) -- air + PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution) PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution) PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index dc4e81d867..c9fd35b4a3 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -76,6 +76,6 @@ struct microAllocInfo { u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags - u32 info[pSize/8]; // Info for Instructions in current block - u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes) + u32 info[pSize/2]; // Info for Instructions in current block + u8 stall[pSize/2]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes) }; diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 4d30137cf5..f87a3e2080 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -27,6 +27,8 @@ mVUsetupBranch(xStatus, xMac, xClip, xCycles); \ CMP16ItoM((uptr)&mVU->branch, 0); \ incPC2(1); \ + if( mVUblocks[iPC/2] == NULL ) \ + mVUblocks[iPC/2] = new microBlockManager(); \ bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \ incPC2(-1); \ if (bBlock) { nJMPcc((uptr)bBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \ @@ -160,6 +162,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUprint("microVU: invalid startPC"); } startPC &= (vuIndex ? 0x3ff8 : 0xff8); + if( mVUblocks[startPC/8] == NULL ) + mVUblocks[startPC/8] = new microBlockManager(); + // Searches for Existing Compiled Block (if found, then returns; else, compile) microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); if (pBlock) { return pBlock->x86ptrStart; } @@ -225,6 +230,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { else { microBlock* bBlock = NULL; u32* ajmp = 0; + switch (mVUbranch) { case 3: branchCase(JE32, JNE32); // IBEQ case 4: branchCase(JGE32, JNGE32); // IBGEZ @@ -238,6 +244,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { incPC(-3); // Go back to branch opcode (to get branch imm addr) mVUsetupBranch(xStatus, xMac, xClip, xCycles); + if( mVUblocks[branchAddr/8] == NULL ) + mVUblocks[branchAddr/8] = new microBlockManager(); + // Check if branch-block has already been compiled pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); } @@ -265,6 +274,10 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVUprint("mVUcompile conditional branch"); if (bBlock) { // Branch non-taken has already been compiled incPC(-3); // Go back to branch opcode (to get branch imm addr) + + if( mVUblocks[branchAddr/8] == NULL ) + mVUblocks[branchAddr/8] = new microBlockManager(); + // Check if branch-block has already been compiled pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); }