Merge pull request #3601 from phire/AccurateEventScheduling

Adjust cycle counts so they are accurate to the JIT block level (Fixes OoT virtual console and other games)
This commit is contained in:
Pierre Bourdon 2016-03-25 17:02:30 +01:00
commit 8f74f1f4e9
5 changed files with 105 additions and 61 deletions

View File

@ -50,17 +50,21 @@ static Common::FifoQueue<BaseEvent, false> tsQueue;
// event pools
static Event *eventPool = nullptr;
static float lastOCFactor;
int slicelength;
static int maxSliceLength = MAX_SLICE_LENGTH;
static float s_lastOCFactor;
float g_lastOCFactor_inverted;
int g_slicelength;
static int maxslicelength = MAX_SLICE_LENGTH;
static s64 idledCycles;
static u32 fakeDecStartValue;
static u64 fakeDecStartTicks;
s64 globalTimer;
u64 fakeTBStartValue;
u64 fakeTBStartTicks;
// Are we in a function that has been called from Advance()
static bool globalTimerIsSane;
s64 g_globalTimer;
u64 g_fakeTBStartValue;
u64 g_fakeTBStartTicks;
static int ev_lost;
@ -91,12 +95,12 @@ static void EmptyTimedCallback(u64 userdata, int cyclesLate) {}
// but the effect is largely the same.
static int DowncountToCycles(int downcount)
{
return (int)(downcount / lastOCFactor);
return (int)(downcount * g_lastOCFactor_inverted);
}
static int CyclesToDowncount(int cycles)
{
return (int)(cycles * lastOCFactor);
return (int)(cycles * s_lastOCFactor);
}
int RegisterEvent(const std::string& name, TimedCallback callback)
@ -132,11 +136,13 @@ void UnregisterAllEvents()
void Init()
{
lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
PowerPC::ppcState.downcount = CyclesToDowncount(maxSliceLength);
slicelength = maxSliceLength;
globalTimer = 0;
s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
g_lastOCFactor_inverted = 1.0f / s_lastOCFactor;
PowerPC::ppcState.downcount = CyclesToDowncount(maxslicelength);
g_slicelength = maxslicelength;
g_globalTimer = 0;
idledCycles = 0;
globalTimerIsSane = true;
ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
}
@ -193,14 +199,17 @@ static void EventDoState(PointerWrap &p, BaseEvent* ev)
void DoState(PointerWrap &p)
{
std::lock_guard<std::mutex> lk(tsWriteLock);
p.Do(slicelength);
p.Do(globalTimer);
p.Do(g_slicelength);
p.Do(g_globalTimer);
p.Do(idledCycles);
p.Do(fakeDecStartValue);
p.Do(fakeDecStartTicks);
p.Do(fakeTBStartValue);
p.Do(fakeTBStartTicks);
p.Do(lastOCFactor);
p.Do(g_fakeTBStartValue);
p.Do(g_fakeTBStartTicks);
p.Do(s_lastOCFactor);
if (p.GetMode() == PointerWrap::MODE_READ)
g_lastOCFactor_inverted = 1.0f / s_lastOCFactor;
p.DoMarker("CoreTimingData");
MoveEvents();
@ -209,9 +218,16 @@ void DoState(PointerWrap &p)
p.DoMarker("CoreTimingEvents");
}
// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil
u64 GetTicks()
{
return (u64)globalTimer;
u64 ticks = (u64)g_globalTimer;
if (!globalTimerIsSane)
{
int downcount = DowncountToCycles(PowerPC::ppcState.downcount);
ticks += g_slicelength - downcount;
}
return ticks;
}
u64 GetIdleTicks()
@ -221,7 +237,7 @@ u64 GetIdleTicks()
// This is to be called when outside threads, such as the graphics thread, wants to
// schedule things to be executed on the main thread.
void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata)
void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata)
{
_assert_msg_(POWERPC, !Core::IsCPUThread(), "ScheduleEvent_Threadsafe from wrong thread");
if (Core::g_want_determinism)
@ -232,7 +248,7 @@ void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata
}
std::lock_guard<std::mutex> lk(tsWriteLock);
Event ne;
ne.time = globalTimer + cyclesIntoFuture;
ne.time = g_globalTimer + cyclesIntoFuture;
ne.type = event_type;
ne.userdata = userdata;
tsQueue.Push(ne);
@ -260,7 +276,7 @@ void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata)
}
// To be used from any thread, including the CPU thread
void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata)
void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata)
{
if (Core::IsCPUThread())
ScheduleEvent(cyclesIntoFuture, event_type, userdata);
@ -299,14 +315,21 @@ static void AddEventToQueue(Event* ne)
// This must be run ONLY from within the CPU thread
// cyclesIntoFuture may be VERY inaccurate if called from anything else
// than Advance
void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata)
void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata)
{
_assert_msg_(POWERPC, Core::IsCPUThread() || Core::GetState() == Core::CORE_PAUSE,
"ScheduleEvent from wrong thread");
Event *ne = GetNewEvent();
ne->userdata = userdata;
ne->type = event_type;
ne->time = globalTimer + cyclesIntoFuture;
ne->time = GetTicks() + cyclesIntoFuture;
// If this event needs to be scheduled before the next advance(), force one early
if (!globalTimerIsSane)
ForceExceptionCheck(cyclesIntoFuture);
AddEventToQueue(ne);
}
@ -346,12 +369,13 @@ void RemoveAllEvents(int event_type)
RemoveEvent(event_type);
}
void ForceExceptionCheck(int cycles)
void ForceExceptionCheck(s64 cycles)
{
if (DowncountToCycles(PowerPC::ppcState.downcount) > cycles)
if (s64(DowncountToCycles(PowerPC::ppcState.downcount)) > cycles)
{
slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - cycles); // Account for cycles already executed by adjusting the slicelength
PowerPC::ppcState.downcount = CyclesToDowncount(cycles);
// downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int here.
g_slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - (int)cycles); // Account for cycles already executed by adjusting the g_slicelength
PowerPC::ppcState.downcount = CyclesToDowncount((int)cycles);
}
}
@ -366,11 +390,11 @@ void ProcessFifoWaitEvents()
while (first)
{
if (first->time <= globalTimer)
if (first->time <= g_globalTimer)
{
Event* evt = first;
first = first->next;
event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time));
event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time));
FreeEvent(evt);
}
else
@ -397,21 +421,26 @@ void Advance()
{
MoveEvents();
int cyclesExecuted = slicelength - DowncountToCycles(PowerPC::ppcState.downcount);
globalTimer += cyclesExecuted;
lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
PowerPC::ppcState.downcount = CyclesToDowncount(slicelength);
int cyclesExecuted = g_slicelength - DowncountToCycles(PowerPC::ppcState.downcount);
g_globalTimer += cyclesExecuted;
s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
g_lastOCFactor_inverted = 1.0f / s_lastOCFactor;
PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength);
while (first && first->time <= globalTimer)
globalTimerIsSane = true;
while (first && first->time <= g_globalTimer)
{
//LOG(POWERPC, "[Scheduler] %s (%lld, %lld) ",
// event_types[first->type].name ? event_types[first->type].name : "?", (u64)globalTimer, (u64)first->time);
// event_types[first->type].name ? event_types[first->type].name : "?", (u64)g_globalTimer, (u64)first->time);
Event* evt = first;
first = first->next;
event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time));
event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time));
FreeEvent(evt);
}
globalTimerIsSane = false;
if (!first)
{
WARN_LOG(POWERPC, "WARNING - no events in queue. Setting downcount to 10000");
@ -419,10 +448,10 @@ void Advance()
}
else
{
slicelength = (int)(first->time - globalTimer);
if (slicelength > maxSliceLength)
slicelength = maxSliceLength;
PowerPC::ppcState.downcount = CyclesToDowncount(slicelength);
g_slicelength = (int)(first->time - g_globalTimer);
if (g_slicelength > maxslicelength)
g_slicelength = maxslicelength;
PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength);
}
}
@ -431,7 +460,7 @@ void LogPendingEvents()
Event *ptr = first;
while (ptr)
{
INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", globalTimer, ptr->time, ptr->type);
INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", g_globalTimer, ptr->time, ptr->type);
ptr = ptr->next;
}
}
@ -494,22 +523,22 @@ void SetFakeDecStartTicks(u64 val)
u64 GetFakeTBStartValue()
{
return fakeTBStartValue;
return g_fakeTBStartValue;
}
void SetFakeTBStartValue(u64 val)
{
fakeTBStartValue = val;
g_fakeTBStartValue = val;
}
u64 GetFakeTBStartTicks()
{
return fakeTBStartTicks;
return g_fakeTBStartTicks;
}
void SetFakeTBStartTicks(u64 val)
{
fakeTBStartTicks = val;
g_fakeTBStartTicks = val;
}
} // namespace

View File

@ -25,15 +25,19 @@ class PointerWrap;
namespace CoreTiming
{
extern s64 globalTimer;
extern u64 fakeTBStartValue;
extern u64 fakeTBStartTicks;
// These really shouldn't be global, but jit64 accesses them directly
extern s64 g_globalTimer;
extern u64 g_fakeTBStartValue;
extern u64 g_fakeTBStartTicks;
extern int g_slicelength;
extern float g_lastOCFactor_inverted;
void Init();
void Shutdown();
typedef void (*TimedCallback)(u64 userdata, int cyclesLate);
// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil
u64 GetTicks();
u64 GetIdleTicks();
@ -44,11 +48,11 @@ int RegisterEvent(const std::string& name, TimedCallback callback);
void UnregisterAllEvents();
// userdata MAY NOT CONTAIN POINTERS. userdata might get written and reloaded from savestates.
void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata = 0);
void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata = 0);
void ScheduleEvent_Immediate(int event_type, u64 userdata = 0);
void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata = 0);
void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata = 0);
void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata = 0);
void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata = 0);
void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata = 0);
// We only permit one event of each type in the queue at a time.
void RemoveEvent(int event_type);
@ -76,8 +80,8 @@ void SetFakeTBStartValue(u64 val);
u64 GetFakeTBStartTicks();
void SetFakeTBStartTicks(u64 val);
void ForceExceptionCheck(int cycles);
void ForceExceptionCheck(s64 cycles);
extern int slicelength;
} // end of namespace

View File

@ -178,7 +178,7 @@ void Interpreter::SingleStep()
{
SingleStepInner();
CoreTiming::slicelength = 1;
CoreTiming::g_slicelength = 1;
PowerPC::ppcState.downcount = 0;
CoreTiming::Advance();

View File

@ -283,8 +283,14 @@ void Jit64::mfspr(UGeckoInstruction inst)
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant.
MOV(64, R(RAX), M(&CoreTiming::globalTimer));
SUB(64, R(RAX), M(&CoreTiming::fakeTBStartTicks));
// Scale downcount by the CPU overclocking factor.
CVTSI2SS(XMM0, PPCSTATE(downcount));
MULSS(XMM0, M(&CoreTiming::g_lastOCFactor_inverted));
CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor
MOV(32, R(RAX), M(&CoreTiming::g_slicelength));
SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength - Scaled_downcount)
ADD(64, R(RAX), M(&CoreTiming::g_globalTimer));
SUB(64, R(RAX), M(&CoreTiming::g_fakeTBStartTicks));
// It might seem convenient to correct the timer for the block position here for even more accurate
// timing, but as of currently, this can break games. If we end up reading a time *after* the time
// at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only
@ -292,10 +298,11 @@ void Jit64::mfspr(UGeckoInstruction inst)
// which won't get past the loading screen.
//if (js.downcountAmount)
// ADD(64, R(RAX), Imm32(js.downcountAmount));
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, R(RDX));
MOV(64, R(RAX), M(&CoreTiming::fakeTBStartValue));
MOV(64, R(RAX), M(&CoreTiming::g_fakeTBStartValue));
SHR(64, R(RDX), Imm8(3));
ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));

View File

@ -224,6 +224,10 @@ void JitArm64::mfspr(UGeckoInstruction inst)
case SPR_TL:
case SPR_TU:
{
// The inline implementation here is inaccurate and out of date as of PR3601
FALLBACK_IF(true); // Fallback to interpreted version.
/*
ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
@ -231,9 +235,9 @@ void JitArm64::mfspr(UGeckoInstruction inst)
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant.
MOVI2R(XA, (u64)&CoreTiming::globalTimer);
MOVI2R(XA, (u64)&CoreTiming::g_globalTimer);
LDR(INDEX_UNSIGNED, XA, XA, 0);
MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks);
MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartTicks);
LDR(INDEX_UNSIGNED, XB, XB, 0);
SUB(XA, XA, XB);
@ -247,7 +251,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
ADD(XB, XB, 1);
UMULH(XA, XA, XB);
MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue);
MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartValue);
LDR(INDEX_UNSIGNED, XB, XB, 0);
ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(spr[SPR_TL]));
@ -285,7 +289,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));
else
MOV(gpr.R(d), WA);
gpr.Unlock(WA, WB);
gpr.Unlock(WA, WB);*/
}
break;
case SPR_XER: