mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Fixed some logic for flag propagation. Its a 3% speedup in the titlescreen of GoW (68fps vs 66fps). - Added a new mVU speedhack which should be very safe. Its a 7% speedup over the new code (73fps vs 68fps), so a 10% speedup over the last revision (73fps vs 66fps). What the speedhack does: The VUs have 3 separate flags: Status, Mac, and Clip flags. Due to the VU's pipeline, there can be up to 4 live instances of these flags during emulation. The tricky part arises when you're recompiling the end of a block (after a branch), then you must predict if another block will need older flag instances. These can be accurately predicted in all cases except for indirect jumps, because the destination block is unknown at recompile time. In this case mVU assumes the worst-case scenario, that all flag instances will be needed, so it performs some flag shuffling to prep the flag instances in such a way that the next block can read them nicely. What the new "Block Hack" does, is it assumes that the old flag instances won't be needed, which eliminates a lot of flag shuffling causing a speedup. Currently the Block Hack plays it very safe, in the case of the current block ending with an indirect jump, it still assumes the flag instances will be needed for the subsequent block, so it does nothing different there; The times the speedhack actually does something is when your current block ends with a branch, and then the following block ends with an indirect jump (or similar cases). In these cases it would be very odd for future blocks to care about old flag instances from 2-blocks ago, which is why this speed hack should be very safe. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3298 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
8d25bb8a69
commit
25e63c243d
|
@ -485,8 +485,9 @@ struct Pcsx2Config
|
|||
IopCycleRate_X2 :1, // enables the x2 multiplier of the IOP cyclerate
|
||||
IntcStat :1, // tells Pcsx2 to fast-forward through intc_stat waits.
|
||||
WaitLoop :1, // enables constant loop detection and fast-forwarding
|
||||
vuFlagHack :1, // microVU specific flag hack; Can cause Infinite loops, SPS, etc...
|
||||
vuMinMax :1; // microVU specific MinMax hack; Can cause SPS, Black Screens, etc...
|
||||
vuFlagHack :1, // microVU specific flag hack
|
||||
vuBlockHack :1, // microVU specific block flag no-propagation hack
|
||||
vuMinMax :1; // microVU specific MinMax hack
|
||||
BITFIELD_END
|
||||
|
||||
u8 EECycleRate; // EE cycle rate selector (1.0, 1.5, 2.0)
|
||||
|
|
|
@ -74,6 +74,7 @@ void Pcsx2Config::SpeedhackOptions::LoadSave( IniInterface& ini )
|
|||
IniBitBool( IntcStat );
|
||||
IniBitBool( WaitLoop );
|
||||
IniBitBool( vuFlagHack );
|
||||
IniBitBool( vuBlockHack );
|
||||
IniBitBool( vuMinMax );
|
||||
}
|
||||
|
||||
|
|
|
@ -300,6 +300,7 @@ namespace Panels
|
|||
pxCheckBox* m_check_waitloop;
|
||||
pxCheckBox* m_check_IOPx2;
|
||||
pxCheckBox* m_check_vuFlagHack;
|
||||
pxCheckBox* m_check_vuBlockHack;
|
||||
pxCheckBox* m_check_vuMinMax;
|
||||
|
||||
public:
|
||||
|
|
|
@ -163,6 +163,9 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
|
|||
m_check_vuFlagHack = new pxCheckBox( vuHacksPanel, _("mVU Flag Hack"),
|
||||
_("Good Speedup and High Compatibility; may cause garbage graphics, SPS, etc... [Recommended]") );
|
||||
|
||||
m_check_vuBlockHack = new pxCheckBox( vuHacksPanel, _("mVU Block Hack"),
|
||||
_("Good Speedup and High Compatibility; may cause garbage graphics, SPS, etc... [Recommended]") );
|
||||
|
||||
m_check_vuMinMax = new pxCheckBox( vuHacksPanel, _("mVU Min/Max Hack"),
|
||||
_("Small Speedup; may cause black screens, garbage graphics, SPS, etc... [Not Recommended]") );
|
||||
|
||||
|
@ -171,6 +174,11 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
|
|||
L"This is safe most of the time, and Super VU does something similar by default."
|
||||
) );
|
||||
|
||||
m_check_vuBlockHack->SetToolTip( pxE( ".Tooltip:Speedhacks:vuBlockHack",
|
||||
L"Assumes that very far into future blocks will not need old flag instance data. "
|
||||
L"This should be pretty safe. It is unknown if this breaks any game..."
|
||||
) );
|
||||
|
||||
m_check_vuMinMax->SetToolTip( pxE( ".Tooltip:Speedhacks:vuMinMax",
|
||||
L"Uses SSE's Min/Max Floating Point Operations instead of custom logical Min/Max routines. "
|
||||
L"Known to break Gran Turismo 4, Tekken 5."
|
||||
|
@ -226,6 +234,7 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
|
|||
*vuSliderPanel += m_msg_vustealer | sliderFlags;
|
||||
|
||||
*vuHacksPanel += m_check_vuFlagHack;
|
||||
*vuHacksPanel += m_check_vuBlockHack;
|
||||
*vuHacksPanel += m_check_vuMinMax;
|
||||
|
||||
*miscHacksPanel += m_check_intc;
|
||||
|
@ -298,6 +307,7 @@ void Panels::SpeedHacksPanel::AppStatusEvent_OnSettingsApplied( const Pcsx2Confi
|
|||
SetVUcycleSliderMsg();
|
||||
|
||||
m_check_vuFlagHack ->SetValue(opts.vuFlagHack);
|
||||
m_check_vuBlockHack ->SetValue(opts.vuBlockHack);
|
||||
m_check_vuMinMax ->SetValue(opts.vuMinMax);
|
||||
m_check_intc ->SetValue(opts.IntcStat);
|
||||
m_check_waitloop ->SetValue(opts.WaitLoop);
|
||||
|
@ -322,6 +332,7 @@ void Panels::SpeedHacksPanel::Apply()
|
|||
opts.IopCycleRate_X2 = m_check_IOPx2->GetValue();
|
||||
opts.IntcStat = m_check_intc->GetValue();
|
||||
opts.vuFlagHack = m_check_vuFlagHack->GetValue();
|
||||
opts.vuBlockHack = m_check_vuBlockHack->GetValue();
|
||||
opts.vuMinMax = m_check_vuMinMax->GetValue();
|
||||
|
||||
// If the user has a command line override specified, we need to disable it
|
||||
|
|
|
@ -206,7 +206,7 @@ _f void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
|||
MOV32RtoR(gprF2, gprT3);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (__Mac) {
|
||||
int bMac[4];
|
||||
sortFlag(mFC.xMac, bMac, mFC.cycles);
|
||||
|
@ -224,26 +224,32 @@ _f void mVUsetupFlags(mV, microFlagCycles& mFC) {
|
|||
}
|
||||
}
|
||||
|
||||
#define shortBranch() \
|
||||
if ((branch == 3) || (branch == 4)) { \
|
||||
mVUflagPass(mVU, aBranchAddr, (xCount - (mVUcount+1))); \
|
||||
if (branch == 3) { mVUcount = 4; break; } \
|
||||
} \
|
||||
else break
|
||||
#define shortBranch() { \
|
||||
if ((branch == 3) || (branch == 4)) { /*Branches*/ \
|
||||
mVUflagPass(mVU, aBranchAddr, sCount+1, found); \
|
||||
if (branch == 3) break; /*Non-conditional Branch*/ \
|
||||
} \
|
||||
else if (branch == 5) { /*JR/JARL*/ \
|
||||
if(!CHECK_VU_BLOCKHACK && (!found||(sCount+1<4))) { \
|
||||
mVUregs.needExactMatch |= 7; \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
else break; /*E-Bit End*/ \
|
||||
}
|
||||
|
||||
// Scan through instructions and check if flags are read (FSxxx, FMxxx, FCxxx opcodes)
|
||||
void mVUflagPass(mV, u32 startPC, u32 xCount) {
|
||||
void mVUflagPass(mV, u32 startPC, u32 sCount = 0, bool found = 0) {
|
||||
|
||||
int oldPC = iPC;
|
||||
int oldCount = mVUcount;
|
||||
int oldBranch = mVUbranch;
|
||||
int aBranchAddr;
|
||||
int oldPC = iPC;
|
||||
int oldBranch = mVUbranch;
|
||||
int aBranchAddr = 0;
|
||||
iPC = startPC / 4;
|
||||
mVUcount = 0;
|
||||
mVUbranch = 0;
|
||||
for (int branch = 0; mVUcount < xCount; mVUcount=(mVUregs.needExactMatch&8)?(mVUcount+1):mVUcount) {
|
||||
for (int branch = 0; (sCount < 4) || !found; sCount++) {
|
||||
incPC(1);
|
||||
mVUopU(mVU, 3);
|
||||
if (mVUregs.needExactMatch&8) found = 1;
|
||||
if ( curI & _Ebit_ ) { branch = 1; }
|
||||
if ( curI & _DTbit_ ) { branch = 6; }
|
||||
if (!(curI & _Ibit_) ) { incPC(-1); mVUopL(mVU, 3); incPC(1); }
|
||||
|
@ -251,10 +257,9 @@ void mVUflagPass(mV, u32 startPC, u32 xCount) {
|
|||
else if (branch == 1) { branch = 2; }
|
||||
if (mVUbranch) { branch = ((mVUbranch>8)?(5):((mVUbranch<3)?3:4)); aBranchAddr = branchAddr; mVUbranch = 0; }
|
||||
incPC(1);
|
||||
if ((mVUregs.needExactMatch&7)==7) break;
|
||||
}
|
||||
if (mVUcount < 4) { mVUregs.needExactMatch |= 0x7; }
|
||||
iPC = oldPC;
|
||||
mVUcount = oldCount;
|
||||
mVUbranch = oldBranch;
|
||||
setCode();
|
||||
}
|
||||
|
@ -265,22 +270,21 @@ void mVUflagPass(mV, u32 startPC, u32 xCount) {
|
|||
|
||||
// Checks if the first 4 instructions of a block will read flags
|
||||
_f void mVUsetFlagInfo(mV) {
|
||||
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr, 4); incPC(1); }
|
||||
branchType2 {
|
||||
branchType1 { incPC(-1); mVUflagPass(mVU, branchAddr); incPC(1); }
|
||||
branchType2 { // This case can possibly be turned off via a hack for a small speedup...
|
||||
if (!mVUlow.constJump.isValid || !CHECK_VU_CONSTPROP) { mVUregs.needExactMatch |= 0x7; }
|
||||
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8), 4); }
|
||||
else { mVUflagPass(mVU, (mVUlow.constJump.regValue*8)&(mVU->microMemSize-8)); }
|
||||
}
|
||||
branchType3 {
|
||||
incPC(-1);
|
||||
mVUflagPass(mVU, branchAddr, 4);
|
||||
mVUflagPass(mVU, branchAddr);
|
||||
int backupFlagInfo = mVUregs.needExactMatch;
|
||||
mVUregs.needExactMatch = 0;
|
||||
incPC(4); // Branch Not Taken
|
||||
mVUflagPass(mVU, xPC, 4);
|
||||
mVUflagPass(mVU, xPC);
|
||||
incPC(-3);
|
||||
mVUregs.needExactMatch |= backupFlagInfo;
|
||||
}
|
||||
mVUregs.needExactMatch &= 0x7;
|
||||
if (noFlagOpts) mVUregs.needExactMatch |= 0x7;
|
||||
}
|
||||
|
||||
|
|
|
@ -260,6 +260,13 @@ typedef u32 (__fastcall *mVUCall)(void*, void*);
|
|||
// This hack only updates the Status Flag on blocks that will read it.
|
||||
// Most blocks do not read status flags, so this is a big speedup.
|
||||
|
||||
// Block Flag Instance No-Propagation Hack
|
||||
#define CHECK_VU_BLOCKHACK (EmuConfig.Speedhacks.vuBlockHack)
|
||||
// There are times when it is unknown if future blocks will need old
|
||||
// flag instance data (due to indirect jumps). This hack assumes
|
||||
// that they won't need old flag data. This effectively removes a lot
|
||||
// of end-of-block flag instance shuffling, causing nice speedups.
|
||||
|
||||
// Min/Max Speed Hack
|
||||
#define CHECK_VU_MINMAXHACK (EmuConfig.Speedhacks.vuMinMax)
|
||||
// This hack uses SSE min/max instructions instead of emulated "logical min/max"
|
||||
|
|
|
@ -272,7 +272,7 @@ void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0) {
|
|||
|
||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||
_f void mVUaddrFix(mV, int gprReg) {
|
||||
if (mVU == µVU1) {
|
||||
if (isVU1) {
|
||||
AND32ItoR(gprReg, 0x3ff); // wrap around
|
||||
SHL32ItoR(gprReg, 4);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue