Merge pull request #3601 from phire/AccurateEventScheduling

Adjust cycle counts so they are accurate to the JIT block level (Fixes OoT virtual console and other games)
This commit is contained in:
Pierre Bourdon 2016-03-25 17:02:30 +01:00
commit 8f74f1f4e9
5 changed files with 105 additions and 61 deletions

View File

@ -50,17 +50,21 @@ static Common::FifoQueue<BaseEvent, false> tsQueue;
// event pools // event pools
static Event *eventPool = nullptr; static Event *eventPool = nullptr;
static float lastOCFactor; static float s_lastOCFactor;
int slicelength; float g_lastOCFactor_inverted;
static int maxSliceLength = MAX_SLICE_LENGTH; int g_slicelength;
static int maxslicelength = MAX_SLICE_LENGTH;
static s64 idledCycles; static s64 idledCycles;
static u32 fakeDecStartValue; static u32 fakeDecStartValue;
static u64 fakeDecStartTicks; static u64 fakeDecStartTicks;
s64 globalTimer; // Are we in a function that has been called from Advance()
u64 fakeTBStartValue; static bool globalTimerIsSane;
u64 fakeTBStartTicks;
s64 g_globalTimer;
u64 g_fakeTBStartValue;
u64 g_fakeTBStartTicks;
static int ev_lost; static int ev_lost;
@ -91,12 +95,12 @@ static void EmptyTimedCallback(u64 userdata, int cyclesLate) {}
// but the effect is largely the same. // but the effect is largely the same.
static int DowncountToCycles(int downcount) static int DowncountToCycles(int downcount)
{ {
return (int)(downcount / lastOCFactor); return (int)(downcount * g_lastOCFactor_inverted);
} }
static int CyclesToDowncount(int cycles) static int CyclesToDowncount(int cycles)
{ {
return (int)(cycles * lastOCFactor); return (int)(cycles * s_lastOCFactor);
} }
int RegisterEvent(const std::string& name, TimedCallback callback) int RegisterEvent(const std::string& name, TimedCallback callback)
@ -132,11 +136,13 @@ void UnregisterAllEvents()
void Init() void Init()
{ {
lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
PowerPC::ppcState.downcount = CyclesToDowncount(maxSliceLength); g_lastOCFactor_inverted = 1.0f / s_lastOCFactor;
slicelength = maxSliceLength; PowerPC::ppcState.downcount = CyclesToDowncount(maxslicelength);
globalTimer = 0; g_slicelength = maxslicelength;
g_globalTimer = 0;
idledCycles = 0; idledCycles = 0;
globalTimerIsSane = true;
ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback); ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
} }
@ -193,14 +199,17 @@ static void EventDoState(PointerWrap &p, BaseEvent* ev)
void DoState(PointerWrap &p) void DoState(PointerWrap &p)
{ {
std::lock_guard<std::mutex> lk(tsWriteLock); std::lock_guard<std::mutex> lk(tsWriteLock);
p.Do(slicelength); p.Do(g_slicelength);
p.Do(globalTimer); p.Do(g_globalTimer);
p.Do(idledCycles); p.Do(idledCycles);
p.Do(fakeDecStartValue); p.Do(fakeDecStartValue);
p.Do(fakeDecStartTicks); p.Do(fakeDecStartTicks);
p.Do(fakeTBStartValue); p.Do(g_fakeTBStartValue);
p.Do(fakeTBStartTicks); p.Do(g_fakeTBStartTicks);
p.Do(lastOCFactor); p.Do(s_lastOCFactor);
if (p.GetMode() == PointerWrap::MODE_READ)
g_lastOCFactor_inverted = 1.0f / s_lastOCFactor;
p.DoMarker("CoreTimingData"); p.DoMarker("CoreTimingData");
MoveEvents(); MoveEvents();
@ -209,9 +218,16 @@ void DoState(PointerWrap &p)
p.DoMarker("CoreTimingEvents"); p.DoMarker("CoreTimingEvents");
} }
// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil
u64 GetTicks() u64 GetTicks()
{ {
return (u64)globalTimer; u64 ticks = (u64)g_globalTimer;
if (!globalTimerIsSane)
{
int downcount = DowncountToCycles(PowerPC::ppcState.downcount);
ticks += g_slicelength - downcount;
}
return ticks;
} }
u64 GetIdleTicks() u64 GetIdleTicks()
@ -221,7 +237,7 @@ u64 GetIdleTicks()
// This is to be called when outside threads, such as the graphics thread, wants to // This is to be called when outside threads, such as the graphics thread, wants to
// schedule things to be executed on the main thread. // schedule things to be executed on the main thread.
void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata) void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata)
{ {
_assert_msg_(POWERPC, !Core::IsCPUThread(), "ScheduleEvent_Threadsafe from wrong thread"); _assert_msg_(POWERPC, !Core::IsCPUThread(), "ScheduleEvent_Threadsafe from wrong thread");
if (Core::g_want_determinism) if (Core::g_want_determinism)
@ -232,7 +248,7 @@ void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata
} }
std::lock_guard<std::mutex> lk(tsWriteLock); std::lock_guard<std::mutex> lk(tsWriteLock);
Event ne; Event ne;
ne.time = globalTimer + cyclesIntoFuture; ne.time = g_globalTimer + cyclesIntoFuture;
ne.type = event_type; ne.type = event_type;
ne.userdata = userdata; ne.userdata = userdata;
tsQueue.Push(ne); tsQueue.Push(ne);
@ -260,7 +276,7 @@ void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata)
} }
// To be used from any thread, including the CPU thread // To be used from any thread, including the CPU thread
void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata) void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata)
{ {
if (Core::IsCPUThread()) if (Core::IsCPUThread())
ScheduleEvent(cyclesIntoFuture, event_type, userdata); ScheduleEvent(cyclesIntoFuture, event_type, userdata);
@ -299,14 +315,21 @@ static void AddEventToQueue(Event* ne)
// This must be run ONLY from within the CPU thread // This must be run ONLY from within the CPU thread
// cyclesIntoFuture may be VERY inaccurate if called from anything else // cyclesIntoFuture may be VERY inaccurate if called from anything else
// than Advance // than Advance
void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata) void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata)
{ {
_assert_msg_(POWERPC, Core::IsCPUThread() || Core::GetState() == Core::CORE_PAUSE, _assert_msg_(POWERPC, Core::IsCPUThread() || Core::GetState() == Core::CORE_PAUSE,
"ScheduleEvent from wrong thread"); "ScheduleEvent from wrong thread");
Event *ne = GetNewEvent(); Event *ne = GetNewEvent();
ne->userdata = userdata; ne->userdata = userdata;
ne->type = event_type; ne->type = event_type;
ne->time = globalTimer + cyclesIntoFuture; ne->time = GetTicks() + cyclesIntoFuture;
// If this event needs to be scheduled before the next advance(), force one early
if (!globalTimerIsSane)
ForceExceptionCheck(cyclesIntoFuture);
AddEventToQueue(ne); AddEventToQueue(ne);
} }
@ -346,12 +369,13 @@ void RemoveAllEvents(int event_type)
RemoveEvent(event_type); RemoveEvent(event_type);
} }
void ForceExceptionCheck(int cycles) void ForceExceptionCheck(s64 cycles)
{ {
if (DowncountToCycles(PowerPC::ppcState.downcount) > cycles) if (s64(DowncountToCycles(PowerPC::ppcState.downcount)) > cycles)
{ {
slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - cycles); // Account for cycles already executed by adjusting the slicelength // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int here.
PowerPC::ppcState.downcount = CyclesToDowncount(cycles); g_slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - (int)cycles); // Account for cycles already executed by adjusting the g_slicelength
PowerPC::ppcState.downcount = CyclesToDowncount((int)cycles);
} }
} }
@ -366,11 +390,11 @@ void ProcessFifoWaitEvents()
while (first) while (first)
{ {
if (first->time <= globalTimer) if (first->time <= g_globalTimer)
{ {
Event* evt = first; Event* evt = first;
first = first->next; first = first->next;
event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time)); event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time));
FreeEvent(evt); FreeEvent(evt);
} }
else else
@ -397,21 +421,26 @@ void Advance()
{ {
MoveEvents(); MoveEvents();
int cyclesExecuted = slicelength - DowncountToCycles(PowerPC::ppcState.downcount); int cyclesExecuted = g_slicelength - DowncountToCycles(PowerPC::ppcState.downcount);
globalTimer += cyclesExecuted; g_globalTimer += cyclesExecuted;
lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f;
PowerPC::ppcState.downcount = CyclesToDowncount(slicelength); g_lastOCFactor_inverted = 1.0f / s_lastOCFactor;
PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength);
while (first && first->time <= globalTimer) globalTimerIsSane = true;
while (first && first->time <= g_globalTimer)
{ {
//LOG(POWERPC, "[Scheduler] %s (%lld, %lld) ", //LOG(POWERPC, "[Scheduler] %s (%lld, %lld) ",
// event_types[first->type].name ? event_types[first->type].name : "?", (u64)globalTimer, (u64)first->time); // event_types[first->type].name ? event_types[first->type].name : "?", (u64)g_globalTimer, (u64)first->time);
Event* evt = first; Event* evt = first;
first = first->next; first = first->next;
event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time)); event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time));
FreeEvent(evt); FreeEvent(evt);
} }
globalTimerIsSane = false;
if (!first) if (!first)
{ {
WARN_LOG(POWERPC, "WARNING - no events in queue. Setting downcount to 10000"); WARN_LOG(POWERPC, "WARNING - no events in queue. Setting downcount to 10000");
@ -419,10 +448,10 @@ void Advance()
} }
else else
{ {
slicelength = (int)(first->time - globalTimer); g_slicelength = (int)(first->time - g_globalTimer);
if (slicelength > maxSliceLength) if (g_slicelength > maxslicelength)
slicelength = maxSliceLength; g_slicelength = maxslicelength;
PowerPC::ppcState.downcount = CyclesToDowncount(slicelength); PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength);
} }
} }
@ -431,7 +460,7 @@ void LogPendingEvents()
Event *ptr = first; Event *ptr = first;
while (ptr) while (ptr)
{ {
INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", globalTimer, ptr->time, ptr->type); INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", g_globalTimer, ptr->time, ptr->type);
ptr = ptr->next; ptr = ptr->next;
} }
} }
@ -494,22 +523,22 @@ void SetFakeDecStartTicks(u64 val)
u64 GetFakeTBStartValue() u64 GetFakeTBStartValue()
{ {
return fakeTBStartValue; return g_fakeTBStartValue;
} }
void SetFakeTBStartValue(u64 val) void SetFakeTBStartValue(u64 val)
{ {
fakeTBStartValue = val; g_fakeTBStartValue = val;
} }
u64 GetFakeTBStartTicks() u64 GetFakeTBStartTicks()
{ {
return fakeTBStartTicks; return g_fakeTBStartTicks;
} }
void SetFakeTBStartTicks(u64 val) void SetFakeTBStartTicks(u64 val)
{ {
fakeTBStartTicks = val; g_fakeTBStartTicks = val;
} }
} // namespace } // namespace

View File

@ -25,15 +25,19 @@ class PointerWrap;
namespace CoreTiming namespace CoreTiming
{ {
extern s64 globalTimer; // These really shouldn't be global, but jit64 accesses them directly
extern u64 fakeTBStartValue; extern s64 g_globalTimer;
extern u64 fakeTBStartTicks; extern u64 g_fakeTBStartValue;
extern u64 g_fakeTBStartTicks;
extern int g_slicelength;
extern float g_lastOCFactor_inverted;
void Init(); void Init();
void Shutdown(); void Shutdown();
typedef void (*TimedCallback)(u64 userdata, int cyclesLate); typedef void (*TimedCallback)(u64 userdata, int cyclesLate);
// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil
u64 GetTicks(); u64 GetTicks();
u64 GetIdleTicks(); u64 GetIdleTicks();
@ -44,11 +48,11 @@ int RegisterEvent(const std::string& name, TimedCallback callback);
void UnregisterAllEvents(); void UnregisterAllEvents();
// userdata MAY NOT CONTAIN POINTERS. userdata might get written and reloaded from savestates. // userdata MAY NOT CONTAIN POINTERS. userdata might get written and reloaded from savestates.
void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata = 0); void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata = 0);
void ScheduleEvent_Immediate(int event_type, u64 userdata = 0); void ScheduleEvent_Immediate(int event_type, u64 userdata = 0);
void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata = 0); void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata = 0);
void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata = 0); void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata = 0);
void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata = 0); void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata = 0);
// We only permit one event of each type in the queue at a time. // We only permit one event of each type in the queue at a time.
void RemoveEvent(int event_type); void RemoveEvent(int event_type);
@ -76,8 +80,8 @@ void SetFakeTBStartValue(u64 val);
u64 GetFakeTBStartTicks(); u64 GetFakeTBStartTicks();
void SetFakeTBStartTicks(u64 val); void SetFakeTBStartTicks(u64 val);
void ForceExceptionCheck(int cycles); void ForceExceptionCheck(s64 cycles);
extern int slicelength;
} // end of namespace } // end of namespace

View File

@ -178,7 +178,7 @@ void Interpreter::SingleStep()
{ {
SingleStepInner(); SingleStepInner();
CoreTiming::slicelength = 1; CoreTiming::g_slicelength = 1;
PowerPC::ppcState.downcount = 0; PowerPC::ppcState.downcount = 0;
CoreTiming::Advance(); CoreTiming::Advance();

View File

@ -283,8 +283,14 @@ void Jit64::mfspr(UGeckoInstruction inst)
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant. // cost of calling out to C for this is actually significant.
MOV(64, R(RAX), M(&CoreTiming::globalTimer)); // Scale downcount by the CPU overclocking factor.
SUB(64, R(RAX), M(&CoreTiming::fakeTBStartTicks)); CVTSI2SS(XMM0, PPCSTATE(downcount));
MULSS(XMM0, M(&CoreTiming::g_lastOCFactor_inverted));
CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor
MOV(32, R(RAX), M(&CoreTiming::g_slicelength));
SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength - Scaled_downcount)
ADD(64, R(RAX), M(&CoreTiming::g_globalTimer));
SUB(64, R(RAX), M(&CoreTiming::g_fakeTBStartTicks));
// It might seem convenient to correct the timer for the block position here for even more accurate // It might seem convenient to correct the timer for the block position here for even more accurate
// timing, but as of currently, this can break games. If we end up reading a time *after* the time // timing, but as of currently, this can break games. If we end up reading a time *after* the time
// at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only // at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only
@ -292,10 +298,11 @@ void Jit64::mfspr(UGeckoInstruction inst)
// which won't get past the loading screen. // which won't get past the loading screen.
//if (js.downcountAmount) //if (js.downcountAmount)
// ADD(64, R(RAX), Imm32(js.downcountAmount)); // ADD(64, R(RAX), Imm32(js.downcountAmount));
// a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67
MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL)); MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL));
MUL(64, R(RDX)); MUL(64, R(RDX));
MOV(64, R(RAX), M(&CoreTiming::fakeTBStartValue)); MOV(64, R(RAX), M(&CoreTiming::g_fakeTBStartValue));
SHR(64, R(RDX), Imm8(3)); SHR(64, R(RDX), Imm8(3));
ADD(64, R(RAX), R(RDX)); ADD(64, R(RAX), R(RDX));
MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX));

View File

@ -224,6 +224,10 @@ void JitArm64::mfspr(UGeckoInstruction inst)
case SPR_TL: case SPR_TL:
case SPR_TU: case SPR_TU:
{ {
// The inline implementation here is inaccurate and out of date as of PR3601
FALLBACK_IF(true); // Fallback to interpreted version.
/*
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = gpr.GetReg(); ARM64Reg WB = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
@ -231,9 +235,9 @@ void JitArm64::mfspr(UGeckoInstruction inst)
// An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the
// cost of calling out to C for this is actually significant. // cost of calling out to C for this is actually significant.
MOVI2R(XA, (u64)&CoreTiming::globalTimer); MOVI2R(XA, (u64)&CoreTiming::g_globalTimer);
LDR(INDEX_UNSIGNED, XA, XA, 0); LDR(INDEX_UNSIGNED, XA, XA, 0);
MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks); MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartTicks);
LDR(INDEX_UNSIGNED, XB, XB, 0); LDR(INDEX_UNSIGNED, XB, XB, 0);
SUB(XA, XA, XB); SUB(XA, XA, XB);
@ -247,7 +251,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
ADD(XB, XB, 1); ADD(XB, XB, 1);
UMULH(XA, XA, XB); UMULH(XA, XA, XB);
MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue); MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartValue);
LDR(INDEX_UNSIGNED, XB, XB, 0); LDR(INDEX_UNSIGNED, XB, XB, 0);
ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3)); ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(spr[SPR_TL])); STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(spr[SPR_TL]));
@ -285,7 +289,7 @@ void JitArm64::mfspr(UGeckoInstruction inst)
ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32));
else else
MOV(gpr.R(d), WA); MOV(gpr.R(d), WA);
gpr.Unlock(WA, WB); gpr.Unlock(WA, WB);*/
} }
break; break;
case SPR_XER: case SPR_XER: