mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Added a macro option to turn off all of mVU's flag optimizations for debugging purposes. - Minor fix for program logging (was indexing starting at -1 instead of 0) - Commented in detail a lot of the macro options in microVU_Misc.h git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2826 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
a57761c934
commit
f5cc2cf97d
|
@ -122,7 +122,7 @@ _f void mVUreset(mV) {
|
||||||
mVU->prog.cleared = 1;
|
mVU->prog.cleared = 1;
|
||||||
mVU->prog.isSame = -1;
|
mVU->prog.isSame = -1;
|
||||||
mVU->prog.cur = NULL;
|
mVU->prog.cur = NULL;
|
||||||
mVU->prog.total = -1;
|
mVU->prog.total = 0;
|
||||||
mVU->prog.curFrame = 0;
|
mVU->prog.curFrame = 0;
|
||||||
|
|
||||||
// Setup Dynarec Cache Limits for Each Program
|
// Setup Dynarec Cache Limits for Each Program
|
||||||
|
@ -205,7 +205,7 @@ _mVUt _f microProgram* mVUcreateProg(int startPC) {
|
||||||
float cacheUsed =((float)((u32)mVU->prog.x86ptr - (u32)mVU->prog.x86start)) / cacheSize * 100;
|
float cacheUsed =((float)((u32)mVU->prog.x86ptr - (u32)mVU->prog.x86start)) / cacheSize * 100;
|
||||||
ConsoleColors c = vuIndex ? Color_Orange : Color_Magenta;
|
ConsoleColors c = vuIndex ? Color_Orange : Color_Magenta;
|
||||||
Console.WriteLn(c, "microVU%d: Cached MicroPrograms = [%03d] [PC=%04x] [List=%02d] (Cache = %f%%)",
|
Console.WriteLn(c, "microVU%d: Cached MicroPrograms = [%03d] [PC=%04x] [List=%02d] (Cache = %f%%)",
|
||||||
vuIndex, mVU->prog.total, startPC, mVU->prog.prog[startPC].list->size()+1, cacheUsed);
|
vuIndex, prog->idx, startPC, mVU->prog.prog[startPC].list->size()+1, cacheUsed);
|
||||||
return prog;
|
return prog;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ _f void mVUdivSet(mV) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optimizes out unneeded status flag updates
|
// Optimizes out unneeded status flag updates
|
||||||
|
// This can safely be done when there is an FSSET opcode
|
||||||
_f void mVUstatusFlagOp(mV) {
|
_f void mVUstatusFlagOp(mV) {
|
||||||
int curPC = iPC;
|
int curPC = iPC;
|
||||||
int i = mVUcount;
|
int i = mVUcount;
|
||||||
|
@ -124,7 +125,7 @@ _f void mVUsetFlags(mV, microFlagCycles& mFC) {
|
||||||
u32 xCount = mVUcount; // Backup count
|
u32 xCount = mVUcount; // Backup count
|
||||||
iPC = mVUstartPC;
|
iPC = mVUstartPC;
|
||||||
for (mVUcount = 0; mVUcount < xCount; mVUcount++) {
|
for (mVUcount = 0; mVUcount < xCount; mVUcount++) {
|
||||||
if (mVUlow.isFSSET) {
|
if (mVUlow.isFSSET && !noFlagOpts) {
|
||||||
if (__Status) { // Don't Optimize out on the last ~4+ instructions
|
if (__Status) { // Don't Optimize out on the last ~4+ instructions
|
||||||
if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp(mVU); }
|
if ((xCount - mVUcount) > aCount) { mVUstatusFlagOp(mVU); }
|
||||||
}
|
}
|
||||||
|
@ -145,6 +146,7 @@ _f void mVUsetFlags(mV, microFlagCycles& mFC) {
|
||||||
cFLAG.lastWrite = (xC-1) & 3;
|
cFLAG.lastWrite = (xC-1) & 3;
|
||||||
|
|
||||||
if (sHackCond) { sFLAG.doFlag = 0; }
|
if (sHackCond) { sFLAG.doFlag = 0; }
|
||||||
|
if (sFLAG.doFlag) { if(noFlagOpts){sFLAG.doNonSticky=1;mFLAG.doFlag=1;}}
|
||||||
if (sFlagCond) { mFC.xStatus[xS] = mFC.cycles + 4; xS = (xS+1) & 3; }
|
if (sFlagCond) { mFC.xStatus[xS] = mFC.cycles + 4; xS = (xS+1) & 3; }
|
||||||
if (mFLAG.doFlag) { mFC.xMac [xM] = mFC.cycles + 4; xM = (xM+1) & 3; }
|
if (mFLAG.doFlag) { mFC.xMac [xM] = mFC.cycles + 4; xM = (xM+1) & 3; }
|
||||||
if (cFLAG.doFlag) { mFC.xClip [xC] = mFC.cycles + 4; xC = (xC+1) & 3; }
|
if (cFLAG.doFlag) { mFC.xClip [xC] = mFC.cycles + 4; xC = (xC+1) & 3; }
|
||||||
|
@ -279,5 +281,6 @@ _f void mVUsetFlagInfo(mV) {
|
||||||
mVUregs.needExactMatch |= backupFlagInfo;
|
mVUregs.needExactMatch |= backupFlagInfo;
|
||||||
}
|
}
|
||||||
mVUregs.needExactMatch &= 0x7;
|
mVUregs.needExactMatch &= 0x7;
|
||||||
|
if (noFlagOpts) mVUregs.needExactMatch |= 0x7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -136,8 +136,9 @@ extern const __aligned(32) mVU_Globals mVUglob;
|
||||||
#define opCase3 if (opCase == 3) // I Opcodes
|
#define opCase3 if (opCase == 3) // I Opcodes
|
||||||
#define opCase4 if (opCase == 4) // Q Opcodes
|
#define opCase4 if (opCase == 4) // Q Opcodes
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
// Define mVUquickSearch
|
// Define mVUquickSearch
|
||||||
|
//------------------------------------------------------------------
|
||||||
// FIXME: I changed the below saerchXMM extern from __aligned16 to __pagealigned.
|
// FIXME: I changed the below saerchXMM extern from __aligned16 to __pagealigned.
|
||||||
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
|
// This *probably* fixes the crashing bugs in linux when using the optimized memcmp.
|
||||||
// Needs testing... --air
|
// Needs testing... --air
|
||||||
|
@ -153,6 +154,7 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
#define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
|
#define mVUquickSearch(dest, src, size) (!memcmp_mmx(dest, src, size))
|
||||||
#define mVUemitSearch()
|
#define mVUemitSearch()
|
||||||
#endif
|
#endif
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
// Misc Macros...
|
// Misc Macros...
|
||||||
#define __four(val) { val, val, val, val }
|
#define __four(val) { val, val, val, val }
|
||||||
|
@ -197,12 +199,12 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
#define clampE CHECK_VU_EXTRA_OVERFLOW
|
#define clampE CHECK_VU_EXTRA_OVERFLOW
|
||||||
#define elif else if
|
#define elif else if
|
||||||
|
|
||||||
// Flag Info
|
// Flag Info (Set if next-block's first 4 ops will read current-block's flags)
|
||||||
#define __Status (mVUregs.needExactMatch & 1)
|
#define __Status (mVUregs.needExactMatch & 1)
|
||||||
#define __Mac (mVUregs.needExactMatch & 2)
|
#define __Mac (mVUregs.needExactMatch & 2)
|
||||||
#define __Clip (mVUregs.needExactMatch & 4)
|
#define __Clip (mVUregs.needExactMatch & 4)
|
||||||
|
|
||||||
// Pass 3 Helper Macros
|
// Pass 3 Helper Macros (Used for program logging)
|
||||||
#define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x")))
|
#define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x")))
|
||||||
#define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x")))
|
#define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x")))
|
||||||
#define xyzwStr(x,s) (_X_Y_Z_W == x) ? s :
|
#define xyzwStr(x,s) (_X_Y_Z_W == x) ? s :
|
||||||
|
@ -226,24 +228,54 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
||||||
#define mVUdumpProg(...) if (0) {}
|
#define mVUdumpProg(...) if (0) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
// Optimization Options
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
// Reg Alloc
|
// Reg Alloc
|
||||||
#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction (Turns off regAlloc)
|
#define doRegAlloc 1 // Set to 0 to flush every 64bit Instruction
|
||||||
|
// This turns off reg alloc for the most part, but reg alloc will still
|
||||||
|
// be done between Upper/Lower and within instructions...
|
||||||
|
|
||||||
|
// No Flag Optimizations
|
||||||
|
#define noFlagOpts 0 // Set to 1 to disable all flag setting optimizations
|
||||||
|
// Note: The flag optimizations this disables should all be harmless, so
|
||||||
|
// this option is mainly just for debugging... it effectively forces mVU
|
||||||
|
// to always update Mac and Status Flags (both sticky and non-sticky) whenever
|
||||||
|
// an Upper Instruction updates them. It also always transfers the 4 possible
|
||||||
|
// flag instances between blocks...
|
||||||
|
|
||||||
// Constant Propagation
|
// Constant Propagation
|
||||||
#define CHECK_VU_CONSTPROP 0
|
#define CHECK_VU_CONSTPROP 0 // Set to 1 to turn on vi15 const propagation
|
||||||
// Enables Constant Propagation for Jumps based on vi15
|
// Enables Constant Propagation for Jumps based on vi15 'link-register'
|
||||||
// allowing us to know many indirect jump target addresses.
|
// allowing us to know many indirect jump target addresses.
|
||||||
// Makes GoW a lot slower due to extra recompilation time!
|
// Makes GoW a lot slower due to extra recompilation time and extra code-gen!
|
||||||
|
|
||||||
// Speed Hacks
|
//------------------------------------------------------------------
|
||||||
#define CHECK_VU_FLAGHACK (EmuConfig.Speedhacks.vuFlagHack) // (Can cause Infinite loops, SPS, etc...)
|
// Speed Hacks (can cause infinite loops, SPS, Black Screens, etc...)
|
||||||
#define CHECK_VU_MINMAXHACK (EmuConfig.Speedhacks.vuMinMax) // (Can cause SPS, Black Screens, etc...)
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Status Flag Speed Hack
|
||||||
|
#define CHECK_VU_FLAGHACK (EmuConfig.Speedhacks.vuFlagHack)
|
||||||
|
// This hack only updates the Status Flag on blocks that will read it.
|
||||||
|
// Most blocks do not read status flags, so this is a big speedup.
|
||||||
|
|
||||||
|
// Min/Max Speed Hack
|
||||||
|
#define CHECK_VU_MINMAXHACK (EmuConfig.Speedhacks.vuMinMax)
|
||||||
|
// This hack uses SSE min/max instructions instead of emulated "logical min/max"
|
||||||
|
// The PS2 does not consider denormals as zero on the mini/max opcodes.
|
||||||
|
// This speedup is minor, but on AMD X2 CPUs it can be a 1~3% speedup
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
// Unknown Data
|
// Unknown Data
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
// XG Kick Transfer Delay Amount
|
||||||
#define mVU_XGKICK_CYCLES ((CHECK_XGKICKHACK) ? 3 : 1)
|
#define mVU_XGKICK_CYCLES ((CHECK_XGKICKHACK) ? 3 : 1)
|
||||||
// Its unknown at recompile time how long the xgkick transfer will take
|
// Its unknown at recompile time how long the xgkick transfer will take
|
||||||
// so give it a value that makes games happy :) (SO3 is fine at 1 cycle delay)
|
// so give it a value that makes games happy :) (SO3 is fine at 1 cycle delay)
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
// Cache Limit Check
|
// Cache Limit Check
|
||||||
#define mVUcacheCheck(ptr, start, limit) { \
|
#define mVUcacheCheck(ptr, start, limit) { \
|
||||||
|
|
Loading…
Reference in New Issue