mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Added partial program searching support (helps to recompile less in games that send a lot of small programs like Tekken 5) - Optimized logical min/max code some more (knocked off 2 instructions) - Fixed some cases where 'last_used' should have been set. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1209 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
f6ce237b8e
commit
ac00db5dd5
|
@ -77,15 +77,8 @@ microVUt(void) mVUreset() {
|
||||||
// Clear All Program Data
|
// Clear All Program Data
|
||||||
memset(&mVU->prog, 0, sizeof(mVU->prog));
|
memset(&mVU->prog, 0, sizeof(mVU->prog));
|
||||||
|
|
||||||
// Create Block Managers
|
|
||||||
// Block managers are now allocated "on-demand" by the recompiler -- air
|
|
||||||
/*for (int i = 0; i <= mVU->prog.max; i++) {
|
|
||||||
for (u32 j = 0; j < (mVU->progSize / 2); j++) {
|
|
||||||
mVU->prog.prog[i].block[j] = new microBlockManager();
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
// Program Variables
|
// Program Variables
|
||||||
|
mVU->prog.isSame = -1;
|
||||||
mVU->prog.cleared = 1;
|
mVU->prog.cleared = 1;
|
||||||
mVU->prog.cur = -1;
|
mVU->prog.cur = -1;
|
||||||
mVU->prog.total = -1;
|
mVU->prog.total = -1;
|
||||||
|
@ -99,6 +92,8 @@ microVUt(void) mVUreset() {
|
||||||
mVU->prog.prog[i].x86ptr = z;
|
mVU->prog.prog[i].x86ptr = z;
|
||||||
z += (mVU->cacheSize / (mVU->prog.max + 1));
|
z += (mVU->cacheSize / (mVU->prog.max + 1));
|
||||||
mVU->prog.prog[i].x86end = z;
|
mVU->prog.prog[i].x86end = z;
|
||||||
|
mVU->prog.prog[i].range[0] = -1; // Set range to
|
||||||
|
mVU->prog.prog[i].range[1] = -1; // indeterminable status
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -138,7 +133,10 @@ microVUt(void) mVUclear(u32 addr, u32 size) {
|
||||||
microVUt(void) mVUclearProg(int progIndex) {
|
microVUt(void) mVUclearProg(int progIndex) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
mVU->prog.prog[progIndex].used = 1;
|
mVU->prog.prog[progIndex].used = 1;
|
||||||
|
mVU->prog.prog[progIndex].last_used = 3;
|
||||||
mVU->prog.prog[progIndex].sFlagHack = 0;
|
mVU->prog.prog[progIndex].sFlagHack = 0;
|
||||||
|
mVU->prog.prog[progIndex].range[0] = -1;
|
||||||
|
mVU->prog.prog[progIndex].range[1] = -1;
|
||||||
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
|
mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start;
|
||||||
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
|
for (u32 i = 0; i < (mVU->progSize / 2); i++) {
|
||||||
if( mVU->prog.prog[progIndex].block[i] )
|
if( mVU->prog.prog[progIndex].block[i] )
|
||||||
|
@ -160,6 +158,8 @@ microVUt(int) mVUfindLeastUsedProg() {
|
||||||
if (mVU->prog.total < mVU->prog.max) {
|
if (mVU->prog.total < mVU->prog.max) {
|
||||||
mVU->prog.total++;
|
mVU->prog.total++;
|
||||||
mVUcacheProg<vuIndex>(mVU->prog.total); // Cache Micro Program
|
mVUcacheProg<vuIndex>(mVU->prog.total); // Cache Micro Program
|
||||||
|
mVU->prog.prog[mVU->prog.total].used = 1;
|
||||||
|
mVU->prog.prog[mVU->prog.total].last_used = 3;
|
||||||
Console::Notice("microVU%d: Cached MicroPrograms = %d", params vuIndex, mVU->prog.total+1);
|
Console::Notice("microVU%d: Cached MicroPrograms = %d", params vuIndex, mVU->prog.total+1);
|
||||||
return mVU->prog.total;
|
return mVU->prog.total;
|
||||||
}
|
}
|
||||||
|
@ -214,13 +214,16 @@ microVUt(void) __mVUvsyncUpdate() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUt(int) mVUcmpProg(int progIndex, bool progUsed, bool needOverflowCheck) {
|
// Compare Cached microProgram to mVU->regs->Micro
|
||||||
|
microVUt(int) mVUcmpProg(int progIndex, bool progUsed, bool needOverflowCheck, bool cmpWholeProg) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
|
|
||||||
if (progUsed) {
|
if (progUsed) {
|
||||||
if (!memcmp_mmx(mVU->prog.prog[progIndex].data, mVU->regs->Micro, mVU->microSize)) {
|
if (cmpWholeProg && (!memcmp_mmx((u8*)mVUprogI.data, mVU->regs->Micro, mVU->microSize)) ||
|
||||||
|
(!cmpWholeProg && (!memcmp_mmx((u8*)mVUprogI.data + mVUprogI.range[0], (u8*)mVU->regs->Micro + mVUprogI.range[0], ((mVUprogI.range[1] + 8) - mVUprogI.range[0]))))) {
|
||||||
mVU->prog.cur = progIndex;
|
mVU->prog.cur = progIndex;
|
||||||
mVU->prog.cleared = 0;
|
mVU->prog.cleared = 0;
|
||||||
|
mVU->prog.isSame = cmpWholeProg ? 1 : -1;
|
||||||
mVU->prog.prog[progIndex].last_used = 3;
|
mVU->prog.prog[progIndex].last_used = 3;
|
||||||
if (!needOverflowCheck || mVU->prog.prog[progIndex].used < 0x7fffffff) {
|
if (!needOverflowCheck || mVU->prog.prog[progIndex].used < 0x7fffffff) {
|
||||||
mVU->prog.prog[progIndex].used++; // increment 'used' (avoiding overflows if necessary)
|
mVU->prog.prog[progIndex].used++; // increment 'used' (avoiding overflows if necessary)
|
||||||
|
@ -237,18 +240,24 @@ microVUt(int) mVUsearchProg() {
|
||||||
|
|
||||||
if (mVU->prog.cleared) { // If cleared, we need to search for new program
|
if (mVU->prog.cleared) { // If cleared, we need to search for new program
|
||||||
for (int i = 0; i <= mVU->prog.total; i++) {
|
for (int i = 0; i <= mVU->prog.total; i++) {
|
||||||
if (mVUcmpProg<vuIndex>(i, !!mVU->prog.prog[i].used, 1))
|
if (mVUcmpProg<vuIndex>(i, !!mVU->prog.prog[i].used, 1, 0))
|
||||||
return 1; // Check Recently Used Programs
|
return 1; // Check Recently Used Programs
|
||||||
}
|
}
|
||||||
for (int i = 0; i <= mVU->prog.total; i++) {
|
for (int i = 0; i <= mVU->prog.total; i++) {
|
||||||
if (mVUcmpProg<vuIndex>(i, !mVU->prog.prog[i].used, 0))
|
if (mVUcmpProg<vuIndex>(i, !mVU->prog.prog[i].used, 0, 0))
|
||||||
return 1; // Check Older Programs
|
return 1; // Check Older Programs
|
||||||
}
|
}
|
||||||
|
/*for (int i = 0; i <= mVU->prog.total; i++) {
|
||||||
|
if (mVUcmpProg<vuIndex>(i, 1, 1, 0))
|
||||||
|
return 1; // Check Partial Program
|
||||||
|
}*/
|
||||||
mVU->prog.cur = mVUfindLeastUsedProg<vuIndex>(); // If cleared and program not found, make a new program instance
|
mVU->prog.cur = mVUfindLeastUsedProg<vuIndex>(); // If cleared and program not found, make a new program instance
|
||||||
mVU->prog.cleared = 0;
|
mVU->prog.cleared = 0;
|
||||||
|
mVU->prog.isSame = 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
mVU->prog.prog[mVU->prog.cur].used++;
|
mVU->prog.prog[mVU->prog.cur].used++;
|
||||||
|
mVU->prog.prog[mVU->prog.cur].last_used = 3;
|
||||||
return 1; // If !cleared, then we're still on the same program as last-time ;)
|
return 1; // If !cleared, then we're still on the same program as last-time ;)
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -88,7 +88,7 @@ struct microProgManager {
|
||||||
static const int max = mMaxProg - 1;
|
static const int max = mMaxProg - 1;
|
||||||
int cur; // Index to Current MicroProgram thats running (-1 = uncached)
|
int cur; // Index to Current MicroProgram thats running (-1 = uncached)
|
||||||
int total; // Total Number of valid MicroPrograms minus 1
|
int total; // Total Number of valid MicroPrograms minus 1
|
||||||
int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro
|
int isSame; // Current cached microProgram is Exact Same program as mVU->regs->Micro (-1 = unknown, 0 = No, 1 = Yes)
|
||||||
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
|
int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one)
|
||||||
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
|
microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution)
|
||||||
};
|
};
|
||||||
|
|
|
@ -150,6 +150,34 @@ microVUt(void) mVUtestCycles() {
|
||||||
SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
|
SUB32ItoM((uptr)&mVU->cycles, mVUcycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
microVUt(void) mVUcheckIsSame() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
|
||||||
|
if (mVU->prog.isSame == -1) {
|
||||||
|
mVU->prog.isSame = !!memcmp_mmx(mVU->prog.prog[mVU->prog.cur].data, mVU->regs->Micro, mVU->microSize);
|
||||||
|
}
|
||||||
|
if (mVU->prog.isSame == 0) {
|
||||||
|
mVUcacheProg<vuIndex>(mVU->prog.cur);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
microVUt(void) mVUsetupRange(u32 pc) {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
|
||||||
|
if (mVUcurProg.range[0] == -1) {
|
||||||
|
mVUcurProg.range[0] = (s32)pc;
|
||||||
|
mVUcurProg.range[1] = (s32)pc;
|
||||||
|
}
|
||||||
|
else if (mVUcurProg.range[0] > (s32)pc) {
|
||||||
|
mVUcurProg.range[0] = (s32)pc;
|
||||||
|
mVUcheckIsSame<vuIndex>();
|
||||||
|
}
|
||||||
|
else if (mVUcurProg.range[1] < (s32)pc) {
|
||||||
|
mVUcurProg.range[1] = (s32)pc;
|
||||||
|
mVUcheckIsSame<vuIndex>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Recompiler
|
// Recompiler
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
@ -169,6 +197,9 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
||||||
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
|
microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState);
|
||||||
if (pBlock) { return pBlock->x86ptrStart; }
|
if (pBlock) { return pBlock->x86ptrStart; }
|
||||||
|
|
||||||
|
// Setup Program Bounds/Range
|
||||||
|
mVUsetupRange<vuIndex>(startPC);
|
||||||
|
|
||||||
// First Pass
|
// First Pass
|
||||||
iPC = startPC / 4;
|
iPC = startPC / 4;
|
||||||
setCode();
|
setCode();
|
||||||
|
@ -230,6 +261,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
||||||
else {
|
else {
|
||||||
microBlock* bBlock = NULL;
|
microBlock* bBlock = NULL;
|
||||||
u32* ajmp = 0;
|
u32* ajmp = 0;
|
||||||
|
mVUsetupRange<vuIndex>(xPC);
|
||||||
|
|
||||||
switch (mVUbranch) {
|
switch (mVUbranch) {
|
||||||
case 3: branchCase(JE32, JNE32); // IBEQ
|
case 3: branchCase(JE32, JNE32); // IBEQ
|
||||||
|
@ -308,6 +340,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) {
|
||||||
if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { Console::Error("microVU%d: Possible infinite compiling loop!", params vuIndex); }
|
if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { Console::Error("microVU%d: Possible infinite compiling loop!", params vuIndex); }
|
||||||
|
|
||||||
// Do E-bit end stuff here
|
// Do E-bit end stuff here
|
||||||
|
mVUsetupRange<vuIndex>(xPC - 8);
|
||||||
mVUendProgram<vuIndex>(findFlagInst(xStatus, 0x7fffffff), findFlagInst(xMac, 0x7fffffff), findFlagInst(xClip, 0x7fffffff));
|
mVUendProgram<vuIndex>(findFlagInst(xStatus, 0x7fffffff), findFlagInst(xMac, 0x7fffffff), findFlagInst(xClip, 0x7fffffff));
|
||||||
|
|
||||||
return thisPtr; //ToDo: Save pipeline state?
|
return thisPtr; //ToDo: Save pipeline state?
|
||||||
|
|
|
@ -150,6 +150,7 @@ declareAllVariables
|
||||||
#define pass4 if (recPass == 3)
|
#define pass4 if (recPass == 3)
|
||||||
|
|
||||||
// Misc Macros...
|
// Misc Macros...
|
||||||
|
#define mVUprogI mVU->prog.prog[progIndex]
|
||||||
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
|
#define mVUcurProg mVU->prog.prog[mVU->prog.cur]
|
||||||
#define mVUblocks mVU->prog.prog[mVU->prog.cur].block
|
#define mVUblocks mVU->prog.prog[mVU->prog.cur].block
|
||||||
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
|
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
|
||||||
|
@ -161,13 +162,14 @@ declareAllVariables
|
||||||
#define mVUregs mVUallocInfo.block.pState
|
#define mVUregs mVUallocInfo.block.pState
|
||||||
#define mVUregsTemp mVUallocInfo.regsTemp
|
#define mVUregsTemp mVUallocInfo.regsTemp
|
||||||
#define iPC mVUallocInfo.curPC
|
#define iPC mVUallocInfo.curPC
|
||||||
#define mVUflagInfo mVUregs.needExactMatch
|
|
||||||
#define mVUsFlagHack mVUallocInfo.sFlagHack
|
#define mVUsFlagHack mVUallocInfo.sFlagHack
|
||||||
#define mVUinfo mVUallocInfo.info[iPC / 2]
|
#define mVUinfo mVUallocInfo.info[iPC / 2]
|
||||||
#define mVUstall mVUallocInfo.stall[iPC / 2]
|
#define mVUstall mVUallocInfo.stall[iPC / 2]
|
||||||
#define mVUstartPC mVUallocInfo.startPC
|
#define mVUstartPC mVUallocInfo.startPC
|
||||||
|
#define mVUflagInfo mVUregs.needExactMatch
|
||||||
|
#define mVUflagHack (mVUcurProg.sFlagHack)
|
||||||
#define xPC ((iPC / 2) * 8)
|
#define xPC ((iPC / 2) * 8)
|
||||||
#define curI mVUcurProg.data[iPC]
|
#define curI ((u32*)mVU->regs->Micro)[iPC]//mVUcurProg.data[iPC]
|
||||||
#define setCode() { mVU->code = curI; }
|
#define setCode() { mVU->code = curI; }
|
||||||
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
|
#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); }
|
||||||
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
|
#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); }
|
||||||
|
@ -175,7 +177,11 @@ declareAllVariables
|
||||||
#define bSaveAddr (((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) / 8)
|
#define bSaveAddr (((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) / 8)
|
||||||
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8))
|
#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8))
|
||||||
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
|
#define shufflePQ (((mVU->p) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04))
|
||||||
#define mVUflagHack (mVUcurProg.sFlagHack)
|
|
||||||
|
// Flag Info
|
||||||
|
#define __Status (mVUflagInfo & (0xf<<0))
|
||||||
|
#define __Mac (mVUflagInfo & (0xf<<4))
|
||||||
|
#define __Clip (mVUflagInfo & (0xf<<8))
|
||||||
|
|
||||||
// Pass 1 uses these to set mVUinfo
|
// Pass 1 uses these to set mVUinfo
|
||||||
#define _isNOP (1<<0) // Skip Lower Instruction
|
#define _isNOP (1<<0) // Skip Lower Instruction
|
||||||
|
@ -254,13 +260,6 @@ declareAllVariables
|
||||||
#define mVUlogQ() { mVUlog(", Q"); }
|
#define mVUlogQ() { mVUlog(", Q"); }
|
||||||
#define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); }
|
#define mVUlogCLIP() { mVUlog("w.xyz vf%02d, vf%02dw", _Fs_, _Ft_); }
|
||||||
|
|
||||||
// Flag Info
|
|
||||||
//#define __NeedExact (1<<12)
|
|
||||||
//#define __ExactMatch (mVUregs.needExactMatch & (1<<12))
|
|
||||||
#define __Status (mVUflagInfo & (0xf<<0))
|
|
||||||
#define __Mac (mVUflagInfo & (0xf<<4))
|
|
||||||
#define __Clip (mVUflagInfo & (0xf<<8))
|
|
||||||
|
|
||||||
// Store VI regs in mmx regs?
|
// Store VI regs in mmx regs?
|
||||||
#define isMMX(_VIreg_) 0 //(_VIreg_ >= 1 && _VIreg_ <=8)
|
#define isMMX(_VIreg_) 0 //(_VIreg_ >= 1 && _VIreg_ <=8)
|
||||||
#define mmVI(_VIreg_) (_VIreg_ - 1)
|
#define mmVI(_VIreg_) (_VIreg_ - 1)
|
||||||
|
|
|
@ -348,12 +348,10 @@ void MIN_MAX_(x86SSERegType to, x86SSERegType from, bool min) {
|
||||||
|
|
||||||
// Warning: Modifies from and to's upper 3 vectors
|
// Warning: Modifies from and to's upper 3 vectors
|
||||||
void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) {
|
void MIN_MAX_SS(x86SSERegType to, x86SSERegType from, bool min) {
|
||||||
SSE2_PSHUFD_XMM_to_XMM(to, to, 0x50);
|
SSE_SHUFPS_XMM_to_XMM (to, from, 0);
|
||||||
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
|
SSE2_PAND_M128_to_XMM (to, (uptr)MIN_MAX_MASK1);
|
||||||
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
|
SSE2_POR_M128_to_XMM (to, (uptr)MIN_MAX_MASK2);
|
||||||
SSE2_PSHUFD_XMM_to_XMM(from, from, 0x50);
|
SSE2_PSHUFD_XMM_to_XMM(from, to, 0xee);
|
||||||
SSE2_PAND_M128_to_XMM (from, (uptr)MIN_MAX_MASK1);
|
|
||||||
SSE2_POR_M128_to_XMM (from, (uptr)MIN_MAX_MASK2);
|
|
||||||
if (min) SSE2_MINPD_XMM_to_XMM(to, from);
|
if (min) SSE2_MINPD_XMM_to_XMM(to, from);
|
||||||
else SSE2_MAXPD_XMM_to_XMM(to, from);
|
else SSE2_MAXPD_XMM_to_XMM(to, from);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue