* Smartened up the microprogram cache.  Programs are (quite accurately) selected based on how often and how recently they have been used.  Solves constant recompilation issues in games like FFXII and Tekken 5 [but not the SPS, sorry!].

* Changed memory allocation so that microprogram blocks are allocated *on demand* -- this saves a lot of memory and makes it possible to test 64-program caches (which would previously fail due to exceeding 2gb ram allocations!).  Also fixed a few small memory bugs in Init/Alloc, and made it so mVU only allocates memory once instead of on every reset. :)

* Tweaked uses of progSize in microVU.h so that it's consistent [all instances of progSize are (vumem_size / 4) now]

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1180 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-05-14 19:34:17 +00:00
parent 5648d07b96
commit 729d90c1ce
6 changed files with 118 additions and 32 deletions

View File

@ -44,8 +44,8 @@ extern SessionOverrideFlags g_Session;
//////////////////////////////////////////////////////////////////////////
// Pcsx2 User Configuration Options!
//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs
//#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now)
#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs
#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now)
#define PCSX2_GSMULTITHREAD 1 // uses multi-threaded gs
#define PCSX2_EEREC 0x10
#define PCSX2_VU0REC 0x20

View File

@ -447,6 +447,11 @@ __forceinline bool rcntUpdate_vSync()
// Accumulate hsync rounding errors:
hsyncCounter.sCycle += vSyncInfo.hSyncError;
#ifdef PCSX2_MICROVU
extern void mVUvsyncUpdate();
mVUvsyncUpdate();
#endif
# ifdef VSYNC_DEBUG
vblankinc++;
if( vblankinc > 1 )

View File

@ -47,6 +47,9 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
memset(&mVU->prog, 0, sizeof(mVU->prog));
mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init");
mVU->cache = SysMmapEx((vuIndex ? 0x0f240000 : 0x0e240000), mVU->cacheSize + 0x1000, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", (u32)mVU->cache));
mVUreset<vuIndex>();
}
@ -54,14 +57,17 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) {
microVUt(void) mVUreset() {
microVU* mVU = mVUx;
mVUclose<vuIndex>(); // Close
mVUprint((vuIndex) ? "microVU1: reset" : "microVU0: reset");
// Delete Block Managers
for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
safe_delete( mVU->prog.prog[i].block[j] );
}
}
// Dynarec Cache
mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0"));
if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache));
memset(mVU->cache, 0xcc, mVU->cacheSize);
memset(mVU->cache, 0xcc, mVU->cacheSize + 0x1000);
// Setup Entrance/Exit Points
x86SetPtr(mVU->cache);
@ -72,11 +78,12 @@ microVUt(void) mVUreset() {
memset(&mVU->prog, 0, sizeof(mVU->prog));
// Create Block Managers
for (int i = 0; i <= mVU->prog.max; i++) {
// Block managers are now allocated "on-demand" by the recompiler -- air
/*for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
mVU->prog.prog[i].block[j] = new microBlockManager();
}
}
}*/
// Program Variables
mVU->prog.finished = 1;
@ -87,7 +94,7 @@ microVUt(void) mVUreset() {
//mVU->prog.lpState = &mVU->prog.prog[15].allocInfo.block.pState; // Blank Pipeline State (ToDo: finish implementation)
// Setup Dynarec Cache Limits for Each Program
u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes
u8* z = (mVU->cache + 0x1000); // Dispatcher Code is in first page of cache
for (int i = 0; i <= mVU->prog.max; i++) {
mVU->prog.prog[i].x86start = z;
mVU->prog.prog[i].x86ptr = z;
@ -107,7 +114,9 @@ microVUt(void) mVUclose() {
// Delete Block Managers
for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j];
if (mVU->prog.prog[i].block[j]) {
safe_delete( mVU->prog.prog[i].block[j] );
}
}
}
}
@ -131,7 +140,8 @@ microVUt(void) mVUclearProg(int progIndex) {
mVU->prog.prog[progIndex].sFlagHack = 0;
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
mVU->prog.prog[progIndex].block[i]->reset();
if( mVU->prog.prog[progIndex].block[i] )
mVU->prog.prog[progIndex].block[i]->reset();
}
}
@ -153,30 +163,80 @@ microVUt(int) mVUfindLeastUsedProg() {
return mVU->prog.total;
}
else {
int j = (mVU->prog.cur + 1) & mVU->prog.max;
/*u32 smallest = mVU->prog.prog[j].used;
for (int i = ((j+1)&mVU->prog.max), z = 0; z < mVU->prog.max; i = (i+1)&mVU->prog.max, z++) {
if (smallest > mVU->prog.prog[i].used) {
smallest = mVU->prog.prog[i].used;
j = i;
int startidx = (mVU->prog.cur + 1) & mVU->prog.max;
int endidx = mVU->prog.cur;
int smallidx = startidx;
u32 smallval = mVU->prog.prog[startidx].used;
for (int i = startidx; i != endidx; i = (i+1)&mVU->prog.max)
{
u32 used = mVU->prog.prog[i].used;
if (smallval > used)
{
smallval = used;
smallidx = i;
}
}*/
mVUclearProg<vuIndex>(j); // Clear old data if overwriting old program
mVUcacheProg<vuIndex>(j); // Cache Micro Program
//Console::Notice("microVU%d: MicroProgram Cache Full!", params vuIndex);
return j;
}
mVUclearProg<vuIndex>(smallidx); // Clear old data if overwriting old program
mVUcacheProg<vuIndex>(smallidx); // Cache Micro Program
Console::Notice("microVU%d: Overwriting existing program in slot %d [%d times used]", params vuIndex, smallidx, smallval );
return smallidx;
}
}
// mVUvsyncUpdate -->
// This should be run at 30fps intervals from Counters.cpp (or 60fps works too, but 30fps is
// probably all we need for accurate results)
//
// To fix the program cache to more efficiently dispose of "obsolete" programs, we need to use a
// frame-based decrementing system in combination with a program-execution-based incrementing
// system. In english: if last_used >= 2 it means the program has been used for the current
// or prev frame. if it's 0, the program hasn't been used for a while.
//
microVUt(void) __mVUvsyncUpdate() {
microVU* mVU = mVUx;
if (mVU->prog.total < mVU->prog.max) return;
for (int i = 0; i <= mVU->prog.total; i++) {
if( mVU->prog.prog[i].last_used != 0 )
{
if( mVU->prog.prog[i].last_used >= 3 )
{
// program has been used recently. Give it's program execution counter a
// 'weighted' bonus signifying it's importance:
if( mVU->prog.prog[i].used < 0x4fffffff )
mVU->prog.prog[i].used += 0x200;
}
mVU->prog.prog[i].last_used--;
}
else
mVU->prog.prog[i].used /= 2; // penalize unused programs.
}
}
void mVUvsyncUpdate()
{
__mVUvsyncUpdate<0>();
__mVUvsyncUpdate<1>();
}
// Searches for Cached Micro Program and sets prog.cur to it (returns 1 if program found, else returns 0)
microVUt(int) mVUsearchProg() {
microVU* mVU = mVUx;
if (mVU->prog.cleared) { // If cleared, we need to search for new program
for (int i = 0; i <= mVU->prog.total; i++) {
if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) {
mVU->prog.cur = i;
mVU->prog.cleared = 0;
mVU->prog.prog[i].used++;
if( mVU->prog.prog[i].used < 0x7fffffff ) // avoid overflows on well-used programs
mVU->prog.prog[i].used++;
mVU->prog.prog[i].last_used = 3; // add me to the mVU structs
return 1;
}
}

View File

@ -19,6 +19,7 @@
#pragma once
//#define mVUdebug // Prints Extra Info to Console
//#define mVUlogProg // Dumps MicroPrograms into microVU0.txt/microVU1.txt
#include "Common.h"
#include "VU.h"
#include "GS.h"
@ -64,20 +65,21 @@ public:
}
};
template<u32 progSize>
template<u32 progSize> // progSize = VU program memory size / 4
struct microProgram {
u32 data[progSize/4];
u32 data[progSize];
u32 used; // Number of times its been used
u32 last_used; // counters # of frames since last use (starts at 3 and counts backwards to 0 for each 30fps vsync)
u32 sFlagHack; // Optimize out Status Flag Updates if Program doesn't use Status Flags
u8* x86ptr; // Pointer to program's recompilation code
u8* x86start; // Start of program's rec-cache
u8* x86end; // Limit of program's rec-cache
microBlockManager* block[progSize/8];
microBlockManager* block[progSize/2];
microAllocInfo<progSize> allocInfo;
};
#define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...)
template<u32 pSize>
#define mMaxProg 32 // The amount of Micro Programs Recs will 'remember' (For n = 1, 2, 4, 8, 16, etc...)
template<u32 pSize> // pSize = VU program memory size / 4
struct microProgManager {
microProgram<pSize> prog[mMaxProg]; // Store MicroPrograms in memory
static const int max = mMaxProg - 1;
@ -95,7 +97,7 @@ struct microVU {
u32 progSize; // VU Micro Program Size (microSize/4)
u32 cacheSize; // VU Cache Size
microProgManager<0x4000> prog; // Micro Program Data
microProgManager<0x4000/4> prog; // Micro Program Data
FILE* logFile; // Log File Pointer
VURegs* regs; // VU Regs Struct
@ -112,6 +114,12 @@ struct microVU {
u32 totalCycles; // Total Cycles that mVU is expected to run for
u32 cycles; // Cycles Counter
// WARNING! MSVC does not reliably guarantee alignment on structure or class member variables,
// failing around 10-20% of the time to align (random depending on various circumstances).
// GCC fails to align the members at all, failing about 50-80% of the time (barring occasional
// random luck). If you want these to be guaranteed aligned, move them to the top of the
// struct, and ensure the struct itself is aligned. :) -- air
PCSX2_ALIGNED16(u32 macFlag[4]); // 4 instances of mac flag (used in execution)
PCSX2_ALIGNED16(u32 clipFlag[4]); // 4 instances of clip flag (used in execution)
PCSX2_ALIGNED16(u32 xmmPQb[4]); // Backup for xmmPQ

View File

@ -76,6 +76,6 @@ struct microAllocInfo {
u32 curPC; // Current PC
u32 startPC; // Start PC for Cur Block
u32 sFlagHack; // Optimize out all Status flag updates if microProgram doesn't use Status flags
u32 info[pSize/8]; // Info for Instructions in current block
u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
u32 info[pSize/2]; // Info for Instructions in current block
u8 stall[pSize/2]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes)
};

View File

@ -27,6 +27,8 @@
mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles); \
CMP16ItoM((uptr)&mVU->branch, 0); \
incPC2(1); \
if( mVUblocks[iPC/2] == NULL ) \
mVUblocks[iPC/2] = new microBlockManager(); \
bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \
incPC2(-1); \
if (bBlock) { nJMPcc((uptr)bBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \
@ -160,6 +162,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUprint("microVU: invalid startPC"); }
startPC &= (vuIndex ? 0x3ff8 : 0xff8);
if( mVUblocks[startPC/8] == NULL )
mVUblocks[startPC/8] = new microBlockManager();
// Searches for Existing Compiled Block (if found, then returns; else, compile)
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
if (pBlock) { return pBlock->x86ptrStart; }
@ -225,6 +230,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
else {
microBlock* bBlock = NULL;
u32* ajmp = 0;
switch (mVUbranch) {
case 3: branchCase(JE32, JNE32); // IBEQ
case 4: branchCase(JGE32, JNGE32); // IBGEZ
@ -238,6 +244,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
incPC(-3); // Go back to branch opcode (to get branch imm addr)
mVUsetupBranch<vuIndex>(xStatus, xMac, xClip, xCycles);
if( mVUblocks[branchAddr/8] == NULL )
mVUblocks[branchAddr/8] = new microBlockManager();
// Check if branch-block has already been compiled
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); }
@ -265,6 +274,10 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
mVUprint("mVUcompile conditional branch");
if (bBlock) { // Branch non-taken has already been compiled
incPC(-3); // Go back to branch opcode (to get branch imm addr)
if( mVUblocks[branchAddr/8] == NULL )
mVUblocks[branchAddr/8] = new microBlockManager();
// Check if branch-block has already been compiled
pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs);
if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); }