- Added partial program searching support (helps to recompile less in games that send a lot of small programs like Tekken 5)
- Optimized logical min/max code some more (knocked off 2 instructions)
- Fixed some cases where 'last_used' should have been set.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1209 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-05-17 08:22:00 +00:00
parent f6ce237b8e
commit ac00db5dd5
5 changed files with 68 additions and 29 deletions

View File

@ -77,15 +77,8 @@ microVUt(void) mVUreset() {
// Clear All Program Data // Clear All Program Data
memset(&mVU->prog, 0, sizeof(mVU->prog)); memset(&mVU->prog, 0, sizeof(mVU->prog));
// Create Block Managers
// Block managers are now allocated "on-demand" by the recompiler -- air
/*for (int i = 0; i <= mVU->prog.max; i++) {
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
mVU->prog.prog[i].block[j] = new microBlockManager();
}
}*/
// Program Variables // Program Variables
mVU->prog.isSame = -1;
mVU->prog.cleared = 1; mVU->prog.cleared = 1;
mVU->prog.cur = -1; mVU->prog.cur = -1;
mVU->prog.total = -1; mVU->prog.total = -1;
@ -99,6 +92,8 @@ microVUt(void) mVUreset() {
mVU->prog.prog[i].x86ptr = z; mVU->prog.prog[i].x86ptr = z;
z += (mVU->cacheSize / (mVU->prog.max + 1)); z += (mVU->cacheSize / (mVU->prog.max + 1));
mVU->prog.prog[i].x86end = z; mVU->prog.prog[i].x86end = z;
mVU->prog.prog[i].range[0] = -1; // Set range to
mVU->prog.prog[i].range[1] = -1; // indeterminable status
} }
} }
@ -138,7 +133,10 @@ microVUt(void) mVUclear(u32 addr, u32 size) {
microVUt(void) mVUclearProg(int progIndex) { microVUt(void) mVUclearProg(int progIndex) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
mVU->prog.prog[progIndex].used = 1; mVU->prog.prog[progIndex].used = 1;
mVU->prog.prog[progIndex].last_used = 3;
mVU->prog.prog[progIndex].sFlagHack = 0; mVU->prog.prog[progIndex].sFlagHack = 0;
mVU->prog.prog[progIndex].range[0] = -1;
mVU->prog.prog[progIndex].range[1] = -1;
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
for (u32 i = 0; i < (mVU->progSize / 2); i++) { for (u32 i = 0; i < (mVU->progSize / 2); i++) {
if( mVU->prog.prog[progIndex].block[i] ) if( mVU->prog.prog[progIndex].block[i] )
@ -160,6 +158,8 @@ microVUt(int) mVUfindLeastUsedProg() {
if (mVU->prog.total < mVU->prog.max) { if (mVU->prog.total < mVU->prog.max) {
mVU->prog.total++; mVU->prog.total++;
mVUcacheProg<vuIndex>(mVU->prog.total); // Cache Micro Program mVUcacheProg<vuIndex>(mVU->prog.total); // Cache Micro Program
mVU->prog.prog[mVU->prog.total].used = 1;
mVU->prog.prog[mVU->prog.total].last_used = 3;
Console::Notice("microVU%d: Cached MicroPrograms = %d", params vuIndex, mVU->prog.total+1); Console::Notice("microVU%d: Cached MicroPrograms = %d", params vuIndex, mVU->prog.total+1);
return mVU->prog.total; return mVU->prog.total;
} }
@ -214,13 +214,16 @@ microVUt(void) __mVUvsyncUpdate() {
} }
} }
microVUt(int) mVUcmpProg(int progIndex, bool progUsed, bool needOverflowCheck) { // Compare Cached microProgram to mVU->regs->Micro
microVUt(int) mVUcmpProg(int progIndex, bool progUsed, bool needOverflowCheck, bool cmpWholeProg) {
microVU* mVU = mVUx; microVU* mVU = mVUx;
if (progUsed) { if (progUsed) {
if (!memcmp_mmx(mVU->prog.prog[progIndex].data, mVU->regs->Micro, mVU->microSize)) { if (cmpWholeProg && (!memcmp_mmx((u8*)mVUprogI.data, mVU->regs->Micro, mVU->microSize)) ||
(!cmpWholeProg && (!memcmp_mmx((u8*)mVUprogI.data + mVUprogI.range[0], (u8*)mVU->regs->Micro + mVUprogI.range[0], ((mVUprogI.range[1] + 8) - mVUprogI.range[0]))))) {
mVU->prog.cur = progIndex; mVU->prog.cur = progIndex;
mVU->prog.cleared = 0; mVU->prog.cleared = 0;
mVU->prog.isSame = cmpWholeProg ? 1 : -1;
mVU->prog.prog[progIndex].last_used = 3; mVU->prog.prog[progIndex].last_used = 3;
if (!needOverflowCheck || mVU->prog.prog[progIndex].used < 0x7fffffff) { if (!needOverflowCheck || mVU->prog.prog[progIndex].used < 0x7fffffff) {
mVU->prog.prog[progIndex].used++; // increment 'used' (avoiding overflows if necessary) mVU->prog.prog[progIndex].used++; // increment 'used' (avoiding overflows if necessary)
@ -237,18 +240,24 @@ microVUt(int) mVUsearchProg() {
if (mVU->prog.cleared) { // If cleared, we need to search for new program if (mVU->prog.cleared) { // If cleared, we need to search for new program
for (int i = 0; i <= mVU->prog.total; i++) { for (int i = 0; i <= mVU->prog.total; i++) {
if (mVUcmpProg<vuIndex>(i, !!mVU->prog.prog[i].used, 1)) if (mVUcmpProg<vuIndex>(i, !!mVU->prog.prog[i].used, 1, 0))
return 1; // Check Recently Used Programs return 1; // Check Recently Used Programs
} }
for (int i = 0; i <= mVU->prog.total; i++) { for (int i = 0; i <= mVU->prog.total; i++) {
if (mVUcmpProg<vuIndex>(i, !mVU->prog.prog[i].used, 0)) if (mVUcmpProg<vuIndex>(i, !mVU->prog.prog[i].used, 0, 0))
return 1; // Check Older Programs return 1; // Check Older Programs
} }
/*for (int i = 0; i <= mVU->prog.total; i++) {
if (mVUcmpProg<vuIndex>(i, 1, 1, 0))
return 1; // Check Partial Program
}*/
mVU->prog.cur = mVUfindLeastUsedProg<vuIndex>(); // If cleared and program not found, make a new program instance mVU->prog.cur = mVUfindLeastUsedProg<vuIndex>(); // If cleared and program not found, make a new program instance
mVU->prog.cleared = 0; mVU->prog.cleared = 0;
mVU->prog.isSame = 1;
return 0; return 0;
} }
mVU->prog.prog[mVU->prog.cur].used++; mVU->prog.prog[mVU->prog.cur].used++;
mVU->prog.prog[mVU->prog.cur].last_used = 3;
return 1; // If !cleared, then we're still on the same program as last-time ;) return 1; // If !cleared, then we're still on the same program as last-time ;)
} }
/* /*

View File

@ -88,7 +88,7 @@ struct microProgManager {
static const int max = mMaxProg - 1; static const int max = mMaxProg - 1;
int cur; // Index to Current MicroProgram thats running (-1 = uncached) int cur; // Index to Current MicroProgram thats running (-1 = uncached)
int total; // Total Number of valid MicroPrograms minus 1 int total; // Total Number of valid MicroPrograms minus 1
int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro (-1 = unknown, 0 = No, 1 = Yes)
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one) int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution) microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
}; };

View File

@ -150,6 +150,34 @@ microVUt(void) mVUtestCycles() {
SUB32ItoM((uptr)&mVU->cycles, mVUcycles); SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
} }
microVUt(void) mVUcheckIsSame() {
microVU* mVU = mVUx;
if (mVU->prog.isSame == -1) {
mVU->prog.isSame = !!memcmp_mmx(mVU->prog.prog[mVU->prog.cur].data, mVU->regs->Micro, mVU->microSize);
}
if (mVU->prog.isSame == 0) {
mVUcacheProg<vuIndex>(mVU->prog.cur);
}
}
microVUt(void) mVUsetupRange(u32 pc) {
microVU* mVU = mVUx;
if (mVUcurProg.range[0] == -1) {
mVUcurProg.range[0] = (s32)pc;
mVUcurProg.range[1] = (s32)pc;
}
else if (mVUcurProg.range[0] > (s32)pc) {
mVUcurProg.range[0] = (s32)pc;
mVUcheckIsSame<vuIndex>();
}
else if (mVUcurProg.range[1] < (s32)pc) {
mVUcurProg.range[1] = (s32)pc;
mVUcheckIsSame<vuIndex>();
}
}
//------------------------------------------------------------------ //------------------------------------------------------------------
// Recompiler // Recompiler
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -169,6 +197,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
if (pBlock) { return pBlock->x86ptrStart; } if (pBlock) { return pBlock->x86ptrStart; }
// Setup Program Bounds/Range
mVUsetupRange<vuIndex>(startPC);
// First Pass // First Pass
iPC = startPC / 4; iPC = startPC / 4;
setCode(); setCode();
@ -230,6 +261,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
else { else {
microBlock* bBlock = NULL; microBlock* bBlock = NULL;
u32* ajmp = 0; u32* ajmp = 0;
mVUsetupRange<vuIndex>(xPC);
switch (mVUbranch) { switch (mVUbranch) {
case 3: branchCase(JE32, JNE32); // IBEQ case 3: branchCase(JE32, JNE32); // IBEQ
@ -308,6 +340,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { Console::Error("microVU%d: Possible infinite compiling loop!", params vuIndex); } if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { Console::Error("microVU%d: Possible infinite compiling loop!", params vuIndex); }
// Do E-bit end stuff here // Do E-bit end stuff here
mVUsetupRange<vuIndex>(xPC - 8);
mVUendProgram<vuIndex>(findFlagInst(xStatus, 0x7fffffff), findFlagInst(xMac, 0x7fffffff), findFlagInst(xClip, 0x7fffffff)); mVUendProgram<vuIndex>(findFlagInst(xStatus, 0x7fffffff), findFlagInst(xMac, 0x7fffffff), findFlagInst(xClip, 0x7fffffff));
return thisPtr; //ToDo: Save pipeline state? return thisPtr; //ToDo: Save pipeline state?

View File

@ -150,6 +150,7 @@ declareAllVariables
#define pass4 if (recPass == 3) #define pass4 if (recPass == 3)
// Misc Macros... // Misc Macros...
#define mVUprogI mVU->prog.prog[progIndex]
#define mVUcurProg mVU->prog.prog[mVU->prog.cur] #define mVUcurProg mVU->prog.prog[mVU->prog.cur]
#define mVUblocks mVU->prog.prog[mVU->prog.cur].block #define mVUblocks mVU->prog.prog[mVU->prog.cur].block
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
@ -161,13 +162,14 @@ declareAllVariables
#define mVUregs mVUallocInfo.block.pState #define mVUregs mVUallocInfo.block.pState
#define mVUregsTemp mVUallocInfo.regsTemp #define mVUregsTemp mVUallocInfo.regsTemp
#define iPC mVUallocInfo.curPC #define iPC mVUallocInfo.curPC
#define mVUflagInfo mVUregs.needExactMatch
#define mVUsFlagHack mVUallocInfo.sFlagHack #define mVUsFlagHack mVUallocInfo.sFlagHack
#define mVUinfo mVUallocInfo.info[iPC / 2] #define mVUinfo mVUallocInfo.info[iPC / 2]
#define mVUstall mVUallocInfo.stall[iPC / 2] #define mVUstall mVUallocInfo.stall[iPC / 2]
#define mVUstartPC mVUallocInfo.startPC #define mVUstartPC mVUallocInfo.startPC
#define mVUflagInfo mVUregs.needExactMatch
#define mVUflagHack (mVUcurProg.sFlagHack)
#define xPC ((iPC / 2) * 8) #define xPC ((iPC / 2) * 8)
#define curI mVUcurProg.data[iPC] #define curI ((u32*)mVU->regs->Micro)[iPC]//mVUcurProg.data[iPC]
#define setCode() { mVU->code = curI; } #define setCode() { mVU->code = curI; }
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } #define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
@ -175,7 +177,11 @@ declareAllVariables
#define bSaveAddr (((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) / 8) #define bSaveAddr (((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) / 8)
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) #define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8))
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04)) #define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
#define mVUflagHack (mVUcurProg.sFlagHack)
// Flag Info
#define __Status (mVUflagInfo & (0xf<<0))
#define __Mac (mVUflagInfo & (0xf<<4))
#define __Clip (mVUflagInfo & (0xf<<8))
// Pass 1 uses these to set mVUinfo // Pass 1 uses these to set mVUinfo
#define _isNOP (1<<0) // Skip Lower Instruction #define _isNOP (1<<0) // Skip Lower Instruction
@ -254,13 +260,6 @@ declareAllVariables
#define mVUlogQ() { mVUlog(", Q"); } #define mVUlogQ() { mVUlog(", Q"); }
#define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); } #define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); }
// Flag Info
//#define __NeedExact (1<<12)
//#define __ExactMatch (mVUregs.needExactMatch & (1<<12))
#define __Status (mVUflagInfo & (0xf<<0))
#define __Mac (mVUflagInfo & (0xf<<4))
#define __Clip (mVUflagInfo & (0xf<<8))
// Store VI regs in mmx regs? // Store VI regs in mmx regs?
#define isMMX(_VIreg_) 0 //(_VIreg_ >= 1 && _VIreg_ <=8) #define isMMX(_VIreg_) 0 //(_VIreg_ >= 1 && _VIreg_ <=8)
#define mmVI(_VIreg_) (_VIreg_ - 1) #define mmVI(_VIreg_) (_VIreg_ - 1)

View File

@ -348,12 +348,10 @@ void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
// Warning: Modifies from and to's upper 3 vectors // Warning: Modifies from and to's upper 3 vectors
void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) { void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) {
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50); SSE_SHUFPS_XMM_to_XMM (to, from, 0);
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1); SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2); SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
SSE2_PSHUFD_XMM_to_XMM(from, from, 0x50); SSE2_PSHUFD_XMM_to_XMM(from, to, 0xee);
SSE2_PAND_M128_to_XMM (from, (uptr)MIN_MAX_MASK1);
SSE2_POR_M128_to_XMM (from, (uptr)MIN_MAX_MASK2);
if (min) SSE2_MINPD_XMM_to_XMM(to, from); if (min) SSE2_MINPD_XMM_to_XMM(to, from);
else SSE2_MAXPD_XMM_to_XMM(to, from); else SSE2_MAXPD_XMM_to_XMM(to, from);
} }