mirror of https://github.com/PCSX2/pcsx2.git
R5900: Replaced 0x81FC0 address check with constant loop detection logic and renamed the hack appropriately. This is what I originally intended back before INTC_STAT and 81FC0 hacks, but it seems to have pretty minimal gains over them. I don't think I've broken anything though, it might help some game and could perhaps be extended to handle more complicated loops later, with inlining or multiple branch logic.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2814 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ea04f34136
commit
8da2dc7df9
|
@ -456,7 +456,7 @@ struct Pcsx2Config
|
|||
bool
|
||||
IopCycleRate_X2 :1, // enables the x2 multiplier of the IOP cyclerate
|
||||
IntcStat :1, // tells Pcsx2 to fast-forward through intc_stat waits.
|
||||
BIFC0 :1, // enables BIFC0 detection and fast-forwarding
|
||||
WaitLoop :1, // enables constant loop detection and fast-forwarding
|
||||
vuFlagHack :1, // microVU specific flag hack; Can cause Infinite loops, SPS, etc...
|
||||
vuMinMax :1; // microVU specific MinMax hack; Can cause SPS, Black Screens, etc...
|
||||
BITFIELD_END
|
||||
|
|
|
@ -72,7 +72,7 @@ void Pcsx2Config::SpeedhackOptions::LoadSave( IniInterface& ini )
|
|||
IniBitfield( VUCycleSteal );
|
||||
IniBitBool( IopCycleRate_X2 );
|
||||
IniBitBool( IntcStat );
|
||||
IniBitBool( BIFC0 );
|
||||
IniBitBool( WaitLoop );
|
||||
IniBitBool( vuFlagHack );
|
||||
IniBitBool( vuMinMax );
|
||||
}
|
||||
|
|
|
@ -293,7 +293,7 @@ namespace Panels
|
|||
pxStaticText* m_msg_vustealer;
|
||||
|
||||
pxCheckBox* m_check_intc;
|
||||
pxCheckBox* m_check_b1fc0;
|
||||
pxCheckBox* m_check_waitloop;
|
||||
pxCheckBox* m_check_IOPx2;
|
||||
pxCheckBox* m_check_vuFlagHack;
|
||||
pxCheckBox* m_check_vuMinMax;
|
||||
|
|
|
@ -192,8 +192,8 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
|
|||
m_check_intc = new pxCheckBox( miscHacksPanel, _("Enable INTC Spin Detection"),
|
||||
_("Huge speedup for some games, with almost no compatibility side effects. [Recommended]") );
|
||||
|
||||
m_check_b1fc0 = new pxCheckBox( miscHacksPanel, _("Enable BIFC0 Spin Detection"),
|
||||
_("Moderate speedup for some games, with no known side effects. [Recommended]" ) );
|
||||
m_check_waitloop = new pxCheckBox( miscHacksPanel, _("Enable Wait Loop Detection"),
|
||||
_("Moderate speedup for some games, with no known side effects. [Recommended???]" ) );
|
||||
|
||||
m_check_IOPx2 = new pxCheckBox( miscHacksPanel, _("IOP x2 cycle rate hack"),
|
||||
_("Small Speedup and works well with most games; may cause some games to hang during startup.") );
|
||||
|
@ -204,10 +204,11 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
|
|||
L"RPG titles. Games that do not use this method of vsync will see little or no speedup from this hack."
|
||||
) );
|
||||
|
||||
m_check_b1fc0->SetToolTip( pxE( ".Tooltips:Speedhacks:BIFC0",
|
||||
L"This hack works especially well for Final Fantasy X and Kingdom Hearts. BIFC0 is the address of a specific block of "
|
||||
L"code in the EE kernel that's run repeatedly when the EE is waiting for the IOP to complete a task. This hack detects "
|
||||
L"that and responds by fast-forwarding the EE until the IOP signals that the task is complete."
|
||||
m_check_waitloop->SetToolTip( pxE( ".Tooltips:Speedhacks:BIFC0",
|
||||
L"Primarily targetting the EE idle loop at address 0x81FC0 in the kernel, this hack attempts to "
|
||||
L"detect loops whose bodies are guaranteed to result in the same machine state for every iteration "
|
||||
L"until a scheduled event triggers emulation of another unit. After a single iteration of such loops, "
|
||||
L"we advance to the time of the next event or the end of the processor's timeslice, whichever comes first."
|
||||
) );
|
||||
|
||||
m_check_IOPx2->SetToolTip( pxE( ".Tooltips:Speedhacks:IOPx2",
|
||||
|
@ -233,7 +234,7 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent )
|
|||
*vuHacksPanel += m_check_vuMinMax;
|
||||
|
||||
*miscHacksPanel += m_check_intc;
|
||||
*miscHacksPanel += m_check_b1fc0;
|
||||
*miscHacksPanel += m_check_waitloop;
|
||||
*miscHacksPanel += m_check_IOPx2;
|
||||
|
||||
*left += eeSliderPanel | StdExpand();
|
||||
|
@ -303,7 +304,7 @@ void Panels::SpeedHacksPanel::AppStatusEvent_OnSettingsApplied( const Pcsx2Confi
|
|||
m_check_vuFlagHack ->SetValue(opts.vuFlagHack);
|
||||
m_check_vuMinMax ->SetValue(opts.vuMinMax);
|
||||
m_check_intc ->SetValue(opts.IntcStat);
|
||||
m_check_b1fc0 ->SetValue(opts.BIFC0);
|
||||
m_check_waitloop ->SetValue(opts.WaitLoop);
|
||||
m_check_IOPx2 ->SetValue(opts.IopCycleRate_X2);
|
||||
|
||||
EnableStuff();
|
||||
|
@ -321,7 +322,7 @@ void Panels::SpeedHacksPanel::Apply()
|
|||
opts.EECycleRate = m_slider_eecycle->GetValue()-1;
|
||||
opts.VUCycleSteal = m_slider_vustealer->GetValue();
|
||||
|
||||
opts.BIFC0 = m_check_b1fc0->GetValue();
|
||||
opts.WaitLoop = m_check_waitloop->GetValue();
|
||||
opts.IopCycleRate_X2 = m_check_IOPx2->GetValue();
|
||||
opts.IntcStat = m_check_intc->GetValue();
|
||||
opts.vuFlagHack = m_check_vuFlagHack->GetValue();
|
||||
|
|
|
@ -75,6 +75,7 @@ static BASEBLOCK* s_pCurBlock = NULL;
|
|||
static BASEBLOCKEX* s_pCurBlockEx = NULL;
|
||||
|
||||
static u32 s_nEndBlock = 0; // what psxpc the current block ends
|
||||
static u32 s_branchTo;
|
||||
static bool s_nBlockFF;
|
||||
|
||||
static u32 s_saveConstRegs[32];
|
||||
|
@ -1007,7 +1008,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
|
|||
{
|
||||
u32 blockCycles = psxScaleBlockCycles();
|
||||
|
||||
if (EmuConfig.Speedhacks.BIFC0 && s_nBlockFF)
|
||||
if (EmuConfig.Speedhacks.WaitLoop && s_nBlockFF && newpc == s_branchTo)
|
||||
{
|
||||
xMOV(eax, ptr32[&psxRegs.cycle]);
|
||||
xMOV(ecx, eax);
|
||||
|
@ -1176,7 +1177,6 @@ static void printfn()
|
|||
static void __fastcall iopRecRecompile( const u32 startpc )
|
||||
{
|
||||
u32 i;
|
||||
u32 branchTo = -1;
|
||||
u32 willbranch3 = 0;
|
||||
|
||||
if( IsDebugBuild && (psxdump & 4) )
|
||||
|
@ -1224,6 +1224,7 @@ static void __fastcall iopRecRecompile( const u32 startpc )
|
|||
// go until the next branch
|
||||
i = startpc;
|
||||
s_nEndBlock = 0xffffffff;
|
||||
s_branchTo = -1;
|
||||
|
||||
while(1) {
|
||||
BASEBLOCK* pblock = PSX_GETBLOCK(i);
|
||||
|
@ -1251,8 +1252,8 @@ static void __fastcall iopRecRecompile( const u32 startpc )
|
|||
|
||||
if( _Rt_ == 0 || _Rt_ == 1 || _Rt_ == 16 || _Rt_ == 17 ) {
|
||||
|
||||
branchTo = _Imm_ * 4 + i + 4;
|
||||
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
|
||||
s_branchTo = _Imm_ * 4 + i + 4;
|
||||
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo;
|
||||
else s_nEndBlock = i+8;
|
||||
|
||||
goto StartRecomp;
|
||||
|
@ -1262,15 +1263,15 @@ static void __fastcall iopRecRecompile( const u32 startpc )
|
|||
|
||||
case 2: // J
|
||||
case 3: // JAL
|
||||
branchTo = _Target_ << 2 | (i + 4) & 0xf0000000;
|
||||
s_branchTo = _Target_ << 2 | (i + 4) & 0xf0000000;
|
||||
s_nEndBlock = i + 8;
|
||||
goto StartRecomp;
|
||||
|
||||
// branches
|
||||
case 4: case 5: case 6: case 7:
|
||||
|
||||
branchTo = _Imm_ * 4 + i + 4;
|
||||
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
|
||||
s_branchTo = _Imm_ * 4 + i + 4;
|
||||
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo;
|
||||
else s_nEndBlock = i+8;
|
||||
|
||||
goto StartRecomp;
|
||||
|
@ -1282,7 +1283,7 @@ static void __fastcall iopRecRecompile( const u32 startpc )
|
|||
StartRecomp:
|
||||
|
||||
s_nBlockFF = false;
|
||||
if (branchTo == startpc) {
|
||||
if (s_branchTo == startpc) {
|
||||
s_nBlockFF = true;
|
||||
for (i = startpc; i < s_nEndBlock; i += 4) {
|
||||
if (i != s_nEndBlock - 8) {
|
||||
|
|
|
@ -73,6 +73,7 @@ static u32 s_nInstCacheSize = 0;
|
|||
static BASEBLOCK* s_pCurBlock = NULL;
|
||||
static BASEBLOCKEX* s_pCurBlockEx = NULL;
|
||||
u32 s_nEndBlock = 0; // what pc the current block ends
|
||||
u32 s_branchTo;
|
||||
static bool s_nBlockFF;
|
||||
|
||||
// save states for branches
|
||||
|
@ -973,6 +974,7 @@ void SetBranchImm( u32 imm )
|
|||
|
||||
// end the current block
|
||||
iFlushCall(FLUSH_EVERYTHING);
|
||||
xMOV(ptr32[&cpuRegs.pc], imm);
|
||||
iBranchTest(imm);
|
||||
}
|
||||
|
||||
|
@ -1132,26 +1134,18 @@ static void iBranchTest(u32 newpc)
|
|||
// cpuRegs.cycle += blockcycles;
|
||||
// if( cpuRegs.cycle > g_nextBranchCycle ) { DoEvents(); }
|
||||
|
||||
if (EmuConfig.Speedhacks.BIFC0 && s_nBlockFF)
|
||||
if (EmuConfig.Speedhacks.WaitLoop && s_nBlockFF && newpc == s_branchTo)
|
||||
{
|
||||
xMOV(eax, ptr32[&g_nextBranchCycle]);
|
||||
xADD(ptr32[&cpuRegs.cycle], eeScaleBlockCycles());
|
||||
xCMP(eax, ptr32[&cpuRegs.cycle]);
|
||||
xCMOVL(eax, ptr32[&cpuRegs.cycle]);
|
||||
xCMOVS(eax, ptr32[&cpuRegs.cycle]);
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax);
|
||||
|
||||
xJMP( DispatcherEvent );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Optimization -- we need to load cpuRegs.pc on static block links, but doing it inside
|
||||
// the if() block below (it would be paired with recBlocks.Link) breaks the sub/jcc
|
||||
// pairing that modern CPUs optimize (applies to all P4+ and AMD X2+ CPUs). So let's do
|
||||
// it up here instead. :D
|
||||
|
||||
if( newpc != 0xffffffff )
|
||||
xMOV( ptr32[&cpuRegs.pc], newpc );
|
||||
|
||||
xMOV(eax, &cpuRegs.cycle);
|
||||
xADD(eax, eeScaleBlockCycles());
|
||||
xMOV(&cpuRegs.cycle, eax); // update cycles
|
||||
|
@ -1367,7 +1361,6 @@ void __fastcall dyna_page_reset(u32 start,u32 sz)
|
|||
static void __fastcall recRecompile( const u32 startpc )
|
||||
{
|
||||
u32 i = 0;
|
||||
u32 branchTo;
|
||||
u32 willbranch3 = 0;
|
||||
u32 usecop2;
|
||||
|
||||
|
@ -1389,10 +1382,6 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
xSetPtr( recPtr );
|
||||
recPtr = xGetAlignedCallTarget();
|
||||
|
||||
s_nBlockFF = false;
|
||||
if (HWADDR(startpc) == 0x81fc0)
|
||||
s_nBlockFF = true;
|
||||
|
||||
s_pCurBlock = PC_GETBLOCK(startpc);
|
||||
|
||||
pxAssert(s_pCurBlock->GetFnptr() == (uptr)JITCompile
|
||||
|
@ -1432,6 +1421,7 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
// go until the next branch
|
||||
i = startpc;
|
||||
s_nEndBlock = 0xffffffff;
|
||||
s_branchTo = -1;
|
||||
|
||||
while(1) {
|
||||
BASEBLOCK* pblock = PC_GETBLOCK(i);
|
||||
|
@ -1470,8 +1460,8 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
|
||||
if( _Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20) ) {
|
||||
// branches
|
||||
branchTo = _Imm_ * 4 + i + 4;
|
||||
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
|
||||
s_branchTo = _Imm_ * 4 + i + 4;
|
||||
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo;
|
||||
else s_nEndBlock = i+8;
|
||||
|
||||
goto StartRecomp;
|
||||
|
@ -1480,14 +1470,15 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
|
||||
case 2: // J
|
||||
case 3: // JAL
|
||||
s_branchTo = _Target_ << 2 | (i + 4) & 0xf0000000;
|
||||
s_nEndBlock = i + 8;
|
||||
goto StartRecomp;
|
||||
|
||||
// branches
|
||||
case 4: case 5: case 6: case 7:
|
||||
case 20: case 21: case 22: case 23:
|
||||
branchTo = _Imm_ * 4 + i + 4;
|
||||
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
|
||||
s_branchTo = _Imm_ * 4 + i + 4;
|
||||
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo;
|
||||
else s_nEndBlock = i+8;
|
||||
|
||||
goto StartRecomp;
|
||||
|
@ -1507,8 +1498,8 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
if( _Rs_ == 8 ) {
|
||||
// BC1F, BC1T, BC1FL, BC1TL
|
||||
// BC2F, BC2T, BC2FL, BC2TL
|
||||
branchTo = _Imm_ * 4 + i + 4;
|
||||
if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo;
|
||||
s_branchTo = _Imm_ * 4 + i + 4;
|
||||
if( s_branchTo > startpc && s_branchTo < i ) s_nEndBlock = s_branchTo;
|
||||
else s_nEndBlock = i+8;
|
||||
|
||||
goto StartRecomp;
|
||||
|
@ -1521,6 +1512,86 @@ static void __fastcall recRecompile( const u32 startpc )
|
|||
|
||||
StartRecomp:
|
||||
|
||||
// The idea here is that as long as a loop doesn't write to a register it's already read
|
||||
// (excepting registers initialised with constants or memory loads) or use any instructions
|
||||
// which alter the machine state apart from registers, it will do the same thing on every
|
||||
// iteration.
|
||||
// TODO: special handling for counting loops. God of war wastes time in a loop which just
|
||||
// counts to some large number and does nothing else, many other games use a counter as a
|
||||
// timeout on a register read. AFAICS the only way to optimise this for non-const cases
|
||||
// without a significant loss in cycle accuracy is with a division, but games would probably
|
||||
// be happy with time wasting loops completing in 0 cycles and timeouts waiting forever.
|
||||
s_nBlockFF = false;
|
||||
if (s_branchTo == startpc) {
|
||||
s_nBlockFF = true;
|
||||
|
||||
u32 reads = 0, loads = 1;
|
||||
|
||||
for (i = startpc; i < s_nEndBlock; i += 4) {
|
||||
if (i == s_nEndBlock - 8)
|
||||
continue;
|
||||
cpuRegs.code = *(u32*)PSM(i);
|
||||
// nop
|
||||
if (cpuRegs.code == 0)
|
||||
continue;
|
||||
// cache, sync
|
||||
else if (_Opcode_ == 057 || _Opcode_ == 0 && _Funct_ == 013)
|
||||
continue;
|
||||
// imm arithmetic
|
||||
else if ((_Opcode_ & 070) == 010 || (_Opcode_ & 076) == 030)
|
||||
{
|
||||
if (loads & 1 << _Rs_) {
|
||||
loads |= 1 << _Rt_;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
reads |= 1 << _Rs_;
|
||||
if (reads & 1 << _Rt_) {
|
||||
s_nBlockFF = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// common register arithmetic instructions
|
||||
else if (_Opcode_ == 0 && (_Funct_ & 060) == 040 && (_Funct_ & 076) != 050)
|
||||
{
|
||||
if (loads & 1 << _Rs_ && loads & 1 << _Rt_) {
|
||||
loads |= 1 << _Rd_;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
reads |= 1 << _Rs_ | 1 << _Rt_;
|
||||
if (reads & 1 << _Rd_) {
|
||||
s_nBlockFF = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// loads
|
||||
else if ((_Opcode_ & 070) == 040 || (_Opcode_ & 076) == 032 || _Opcode_ == 067)
|
||||
{
|
||||
if (loads & 1 << _Rs_) {
|
||||
loads |= 1 << _Rt_;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
reads |= 1 << _Rs_;
|
||||
if (reads & 1 << _Rt_) {
|
||||
s_nBlockFF = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// mfc*, cfc*
|
||||
else if ((_Opcode_ & 074) == 020 && _Rs_ < 4)
|
||||
{
|
||||
loads |= 1 << _Rt_;
|
||||
}
|
||||
else
|
||||
{
|
||||
s_nBlockFF = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// rec info //
|
||||
{
|
||||
EEINST* pcur;
|
||||
|
@ -1753,7 +1824,7 @@ StartRecomp:
|
|||
|
||||
int numinsts = (pc - startpc) / 4;
|
||||
if( numinsts > 6 )
|
||||
iBranchTest(pc);
|
||||
SetBranchImm(pc);
|
||||
else
|
||||
{
|
||||
xMOV( ptr32[&cpuRegs.pc], pc );
|
||||
|
|
Loading…
Reference in New Issue