From 2ebbfd6f856d7fad39676568529bc48f9369e103 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 7 Feb 2016 05:59:45 +1300 Subject: [PATCH 1/5] Adjust cycle counts so they are accurate to the jit block level Previously GlobalTimer was only updated at the end of each slice when CoreTiming::Advance() was called, so it could be upto 20,000 cycles off. This was causing huge problems with games which made heavy use of the time base register, such as OoT (virtual console) and Pokemon puzzle. I've also made it so event scheduling will be accurate to the jit block level, instead of accurate to the slice. --- Source/Core/Core/CoreTiming.cpp | 27 ++++++++++++++++--- Source/Core/Core/CoreTiming.h | 2 ++ .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 8 +++++- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 59d40727ae..7479d03922 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -50,7 +50,7 @@ static Common::FifoQueue tsQueue; // event pools static Event *eventPool = nullptr; -static float lastOCFactor; +float lastOCFactor; int slicelength; static int maxSliceLength = MAX_SLICE_LENGTH; @@ -58,6 +58,9 @@ static s64 idledCycles; static u32 fakeDecStartValue; static u64 fakeDecStartTicks; +// Are we in a function that has been called from Advance() +static bool GlobalTimerIsSane; + s64 globalTimer; u64 fakeTBStartValue; u64 fakeTBStartTicks; @@ -137,6 +140,7 @@ void Init() slicelength = maxSliceLength; globalTimer = 0; idledCycles = 0; + GlobalTimerIsSane = true; ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback); } @@ -209,9 +213,16 @@ void DoState(PointerWrap &p) p.DoMarker("CoreTimingEvents"); } +// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil u64 GetTicks() { - return (u64)globalTimer; + u64 ticks = (u64)globalTimer; + if (!GlobalTimerIsSane) + { + int downcount = DowncountToCycles(PowerPC::ppcState.downcount); + ticks += slicelength - downcount; + } + return ticks; } u64 GetIdleTicks() @@ -303,10 +314,16 @@ void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata) { _assert_msg_(POWERPC, Core::IsCPUThread() || Core::GetState() == Core::CORE_PAUSE, "ScheduleEvent from wrong thread"); + Event *ne = GetNewEvent(); ne->userdata = userdata; ne->type = event_type; - ne->time = globalTimer + cyclesIntoFuture; + ne->time = GetTicks() + cyclesIntoFuture; + + // If this event needs to be scheduled before the next advance(), force one early + if (!GlobalTimerIsSane) + ForceExceptionCheck(cyclesIntoFuture); + AddEventToQueue(ne); } @@ -402,6 +419,8 @@ void Advance() lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; PowerPC::ppcState.downcount = CyclesToDowncount(slicelength); + GlobalTimerIsSane = true; + while (first && first->time <= globalTimer) { //LOG(POWERPC, "[Scheduler] %s (%lld, %lld) ", @@ -412,6 +431,8 @@ void Advance() FreeEvent(evt); } + GlobalTimerIsSane = false; + if (!first) { WARN_LOG(POWERPC, "WARNING - no events in queue. Setting downcount to 10000"); diff --git a/Source/Core/Core/CoreTiming.h b/Source/Core/Core/CoreTiming.h index c87cbd55f5..287ea38ccc 100644 --- a/Source/Core/Core/CoreTiming.h +++ b/Source/Core/Core/CoreTiming.h @@ -34,6 +34,7 @@ void Shutdown(); typedef void (*TimedCallback)(u64 userdata, int cyclesLate); +// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil u64 GetTicks(); u64 GetIdleTicks(); @@ -79,5 +80,6 @@ void SetFakeTBStartTicks(u64 val); void ForceExceptionCheck(int cycles); extern int slicelength; +extern float lastOCFactor; } // end of namespace diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 8d14610f4b..6669436885 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -283,7 +283,13 @@ void Jit64::mfspr(UGeckoInstruction inst) // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. - MOV(64, R(RAX), M(&CoreTiming::globalTimer)); + // Scale downcount by the CPU overclocking factor. + CVTSI2SS(XMM0, PPCSTATE(downcount)); + DIVSS(XMM0, M(&CoreTiming::lastOCFactor)); + CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor + MOV(32, R(RAX), M(&CoreTiming::slicelength)); + SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength - Scaled_downcount) + ADD(64, R(RAX), M(&CoreTiming::globalTimer)); SUB(64, R(RAX), M(&CoreTiming::fakeTBStartTicks)); // It might seem convenient to correct the timer for the block position here for even more accurate // timing, but as of currently, this can break games. If we end up reading a time *after* the time From 67dc26cf1d872d61261fff24fd39f5866457dd0b Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Wed, 23 Mar 2016 12:23:17 +1300 Subject: [PATCH 2/5] CoreTiming: Fix 31bit overflow for events scheduling. Events scheduled more than 4.12 seconds in the future (2.96 seconds for Wii games) would overflow the sign bit and get scheduled in the past instead, causing them to fire instantly. --- Source/Core/Core/CoreTiming.cpp | 15 ++++++++------- Source/Core/Core/CoreTiming.h | 8 ++++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 7479d03922..a7b390c4d0 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -232,7 +232,7 @@ u64 GetIdleTicks() // This is to be called when outside threads, such as the graphics thread, wants to // schedule things to be executed on the main thread. -void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata) +void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata) { _assert_msg_(POWERPC, !Core::IsCPUThread(), "ScheduleEvent_Threadsafe from wrong thread"); if (Core::g_want_determinism) @@ -271,7 +271,7 @@ void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata) } // To be used from any thread, including the CPU thread -void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata) +void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata) { if (Core::IsCPUThread()) ScheduleEvent(cyclesIntoFuture, event_type, userdata); @@ -310,7 +310,7 @@ static void AddEventToQueue(Event* ne) // This must be run ONLY from within the CPU thread // cyclesIntoFuture may be VERY inaccurate if called from anything else // than Advance -void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata) +void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata) { _assert_msg_(POWERPC, Core::IsCPUThread() || Core::GetState() == Core::CORE_PAUSE, "ScheduleEvent from wrong thread"); @@ -363,12 +363,13 @@ void RemoveAllEvents(int event_type) RemoveEvent(event_type); } -void ForceExceptionCheck(int cycles) +void ForceExceptionCheck(s64 cycles) { - if (DowncountToCycles(PowerPC::ppcState.downcount) > cycles) + if (s64(DowncountToCycles(PowerPC::ppcState.downcount)) > cycles) { - slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - cycles); // Account for cycles already executed by adjusting the slicelength - PowerPC::ppcState.downcount = CyclesToDowncount(cycles); + // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int here. + slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - (int)cycles); // Account for cycles already executed by adjusting the g_slicelength + PowerPC::ppcState.downcount = CyclesToDowncount((int)cycles); } } diff --git a/Source/Core/Core/CoreTiming.h b/Source/Core/Core/CoreTiming.h index 287ea38ccc..066ef6ae29 100644 --- a/Source/Core/Core/CoreTiming.h +++ b/Source/Core/Core/CoreTiming.h @@ -45,11 +45,11 @@ int RegisterEvent(const std::string& name, TimedCallback callback); void UnregisterAllEvents(); // userdata MAY NOT CONTAIN POINTERS. userdata might get written and reloaded from savestates. -void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata = 0); +void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata = 0); void ScheduleEvent_Immediate(int event_type, u64 userdata = 0); -void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata = 0); +void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata = 0); void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata = 0); -void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata = 0); +void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata = 0); // We only permit one event of each type in the queue at a time. void RemoveEvent(int event_type); @@ -77,7 +77,7 @@ void SetFakeTBStartValue(u64 val); u64 GetFakeTBStartTicks(); void SetFakeTBStartTicks(u64 val); -void ForceExceptionCheck(int cycles); +void ForceExceptionCheck(s64 cycles); extern int slicelength; extern float lastOCFactor; From 407f86e01a297947523cdd92c536868bda8bc939 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Thu, 24 Mar 2016 04:04:18 +1300 Subject: [PATCH 3/5] Mark global variables with g_ prefix --- Source/Core/Core/CoreTiming.cpp | 91 ++++++++++--------- Source/Core/Core/CoreTiming.h | 12 ++- .../Core/PowerPC/Interpreter/Interpreter.cpp | 2 +- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 11 ++- 4 files changed, 60 insertions(+), 56 deletions(-) diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index a7b390c4d0..9bc4b1d97f 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -50,20 +50,20 @@ static Common::FifoQueue tsQueue; // event pools static Event *eventPool = nullptr; -float lastOCFactor; -int slicelength; -static int maxSliceLength = MAX_SLICE_LENGTH; +float g_lastOCFactor; +int g_slicelength; +static int maxslicelength = MAX_SLICE_LENGTH; static s64 idledCycles; static u32 fakeDecStartValue; static u64 fakeDecStartTicks; // Are we in a function that has been called from Advance() -static bool GlobalTimerIsSane; +static bool globalTimerIsSane; -s64 globalTimer; -u64 fakeTBStartValue; -u64 fakeTBStartTicks; +s64 g_globalTimer; +u64 g_fakeTBStartValue; +u64 g_fakeTBStartTicks; static int ev_lost; @@ -94,12 +94,12 @@ static void EmptyTimedCallback(u64 userdata, int cyclesLate) {} // but the effect is largely the same. static int DowncountToCycles(int downcount) { - return (int)(downcount / lastOCFactor); + return (int)(downcount / g_lastOCFactor); } static int CyclesToDowncount(int cycles) { - return (int)(cycles * lastOCFactor); + return (int)(cycles * g_lastOCFactor); } int RegisterEvent(const std::string& name, TimedCallback callback) @@ -135,12 +135,12 @@ void UnregisterAllEvents() void Init() { - lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; - PowerPC::ppcState.downcount = CyclesToDowncount(maxSliceLength); - slicelength = maxSliceLength; - globalTimer = 0; + g_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + PowerPC::ppcState.downcount = CyclesToDowncount(maxslicelength); + g_slicelength = maxslicelength; + g_globalTimer = 0; idledCycles = 0; - GlobalTimerIsSane = true; + globalTimerIsSane = true; ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback); } @@ -197,14 +197,14 @@ static void EventDoState(PointerWrap &p, BaseEvent* ev) void DoState(PointerWrap &p) { std::lock_guard lk(tsWriteLock); - p.Do(slicelength); - p.Do(globalTimer); + p.Do(g_slicelength); + p.Do(g_globalTimer); p.Do(idledCycles); p.Do(fakeDecStartValue); p.Do(fakeDecStartTicks); - p.Do(fakeTBStartValue); - p.Do(fakeTBStartTicks); - p.Do(lastOCFactor); + p.Do(g_fakeTBStartValue); + p.Do(g_fakeTBStartTicks); + p.Do(g_lastOCFactor); p.DoMarker("CoreTimingData"); MoveEvents(); @@ -216,11 +216,11 @@ void DoState(PointerWrap &p) // This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil u64 GetTicks() { - u64 ticks = (u64)globalTimer; - if (!GlobalTimerIsSane) + u64 ticks = (u64)g_globalTimer; + if (!globalTimerIsSane) { int downcount = DowncountToCycles(PowerPC::ppcState.downcount); - ticks += slicelength - downcount; + ticks += g_slicelength - downcount; } return ticks; } @@ -243,7 +243,7 @@ void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata } std::lock_guard lk(tsWriteLock); Event ne; - ne.time = globalTimer + cyclesIntoFuture; + ne.time = g_globalTimer + cyclesIntoFuture; ne.type = event_type; ne.userdata = userdata; tsQueue.Push(ne); @@ -321,9 +321,10 @@ void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata) ne->time = GetTicks() + cyclesIntoFuture; // If this event needs to be scheduled before the next advance(), force one early - if (!GlobalTimerIsSane) + if (!globalTimerIsSane) ForceExceptionCheck(cyclesIntoFuture); + AddEventToQueue(ne); } @@ -368,7 +369,7 @@ void ForceExceptionCheck(s64 cycles) if (s64(DowncountToCycles(PowerPC::ppcState.downcount)) > cycles) { // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int here. - slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - (int)cycles); // Account for cycles already executed by adjusting the g_slicelength + g_slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - (int)cycles); // Account for cycles already executed by adjusting the g_slicelength PowerPC::ppcState.downcount = CyclesToDowncount((int)cycles); } } @@ -384,11 +385,11 @@ void ProcessFifoWaitEvents() while (first) { - if (first->time <= globalTimer) + if (first->time <= g_globalTimer) { Event* evt = first; first = first->next; - event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time)); + event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time)); FreeEvent(evt); } else @@ -415,24 +416,24 @@ void Advance() { MoveEvents(); - int cyclesExecuted = slicelength - DowncountToCycles(PowerPC::ppcState.downcount); - globalTimer += cyclesExecuted; - lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; - PowerPC::ppcState.downcount = CyclesToDowncount(slicelength); + int cyclesExecuted = g_slicelength - DowncountToCycles(PowerPC::ppcState.downcount); + g_globalTimer += cyclesExecuted; + g_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength); - GlobalTimerIsSane = true; + globalTimerIsSane = true; - while (first && first->time <= globalTimer) + while (first && first->time <= g_globalTimer) { //LOG(POWERPC, "[Scheduler] %s (%lld, %lld) ", - // event_types[first->type].name ? event_types[first->type].name : "?", (u64)globalTimer, (u64)first->time); + // event_types[first->type].name ? event_types[first->type].name : "?", (u64)g_globalTimer, (u64)first->time); Event* evt = first; first = first->next; - event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time)); + event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time)); FreeEvent(evt); } - GlobalTimerIsSane = false; + globalTimerIsSane = false; if (!first) { @@ -441,10 +442,10 @@ void Advance() } else { - slicelength = (int)(first->time - globalTimer); - if (slicelength > maxSliceLength) - slicelength = maxSliceLength; - PowerPC::ppcState.downcount = CyclesToDowncount(slicelength); + g_slicelength = (int)(first->time - g_globalTimer); + if (g_slicelength > maxslicelength) + g_slicelength = maxslicelength; + PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength); } } @@ -453,7 +454,7 @@ void LogPendingEvents() Event *ptr = first; while (ptr) { - INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", globalTimer, ptr->time, ptr->type); + INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", g_globalTimer, ptr->time, ptr->type); ptr = ptr->next; } } @@ -516,22 +517,22 @@ void SetFakeDecStartTicks(u64 val) u64 GetFakeTBStartValue() { - return fakeTBStartValue; + return g_fakeTBStartValue; } void SetFakeTBStartValue(u64 val) { - fakeTBStartValue = val; + g_fakeTBStartValue = val; } u64 GetFakeTBStartTicks() { - return fakeTBStartTicks; + return g_fakeTBStartTicks; } void SetFakeTBStartTicks(u64 val) { - fakeTBStartTicks = val; + g_fakeTBStartTicks = val; } } // namespace diff --git a/Source/Core/Core/CoreTiming.h b/Source/Core/Core/CoreTiming.h index 066ef6ae29..566e0ee757 100644 --- a/Source/Core/Core/CoreTiming.h +++ b/Source/Core/Core/CoreTiming.h @@ -25,9 +25,12 @@ class PointerWrap; namespace CoreTiming { -extern s64 globalTimer; -extern u64 fakeTBStartValue; -extern u64 fakeTBStartTicks; +// These really shouldn't be global, but jit64 accesses them directly +extern s64 g_globalTimer; +extern u64 g_fakeTBStartValue; +extern u64 g_fakeTBStartTicks; +extern int g_slicelength; +extern float g_lastOCFactor; void Init(); void Shutdown(); @@ -79,7 +82,6 @@ void SetFakeTBStartTicks(u64 val); void ForceExceptionCheck(s64 cycles); -extern int slicelength; -extern float lastOCFactor; + } // end of namespace diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index a018183c8c..f6312db207 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -178,7 +178,7 @@ void Interpreter::SingleStep() { SingleStepInner(); - CoreTiming::slicelength = 1; + CoreTiming::g_slicelength = 1; PowerPC::ppcState.downcount = 0; CoreTiming::Advance(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 6669436885..acbb5f7e26 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -285,12 +285,12 @@ void Jit64::mfspr(UGeckoInstruction inst) // cost of calling out to C for this is actually significant. // Scale downcount by the CPU overclocking factor. CVTSI2SS(XMM0, PPCSTATE(downcount)); - DIVSS(XMM0, M(&CoreTiming::lastOCFactor)); + DIVSS(XMM0, M(&CoreTiming::g_lastOCFactor)); CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor - MOV(32, R(RAX), M(&CoreTiming::slicelength)); + MOV(32, R(RAX), M(&CoreTiming::g_slicelength)); SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength - Scaled_downcount) - ADD(64, R(RAX), M(&CoreTiming::globalTimer)); - SUB(64, R(RAX), M(&CoreTiming::fakeTBStartTicks)); + ADD(64, R(RAX), M(&CoreTiming::g_globalTimer)); + SUB(64, R(RAX), M(&CoreTiming::g_fakeTBStartTicks)); // It might seem convenient to correct the timer for the block position here for even more accurate // timing, but as of currently, this can break games. If we end up reading a time *after* the time // at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only @@ -298,10 +298,11 @@ void Jit64::mfspr(UGeckoInstruction inst) // which won't get past the loading screen. //if (js.downcountAmount) // ADD(64, R(RAX), Imm32(js.downcountAmount)); + // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL)); MUL(64, R(RDX)); - MOV(64, R(RAX), M(&CoreTiming::fakeTBStartValue)); + MOV(64, R(RAX), M(&CoreTiming::g_fakeTBStartValue)); SHR(64, R(RDX), Imm8(3)); ADD(64, R(RAX), R(RDX)); MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); From 27beef1ff483020b6afb5fd8ea8fb0de3ed66c77 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Thu, 24 Mar 2016 04:42:13 +1300 Subject: [PATCH 4/5] Store an inverted copy of lastOCfactor. The inverse operation is more common, especially when games check the timer rapidly. So we do the division once and store the inverted copy. --- Source/Core/Core/CoreTiming.cpp | 18 ++++++++++++------ Source/Core/Core/CoreTiming.h | 2 +- .../Core/PowerPC/Jit64/Jit_SystemRegisters.cpp | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 9bc4b1d97f..c2fb7918b8 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -50,7 +50,8 @@ static Common::FifoQueue tsQueue; // event pools static Event *eventPool = nullptr; -float g_lastOCFactor; +static float s_lastOCFactor; +float g_lastOCFactor_inverted; int g_slicelength; static int maxslicelength = MAX_SLICE_LENGTH; @@ -94,12 +95,12 @@ static void EmptyTimedCallback(u64 userdata, int cyclesLate) {} // but the effect is largely the same. static int DowncountToCycles(int downcount) { - return (int)(downcount / g_lastOCFactor); + return (int)(downcount * g_lastOCFactor_inverted); } static int CyclesToDowncount(int cycles) { - return (int)(cycles * g_lastOCFactor); + return (int)(cycles * s_lastOCFactor); } int RegisterEvent(const std::string& name, TimedCallback callback) @@ -135,7 +136,8 @@ void UnregisterAllEvents() void Init() { - g_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + g_lastOCFactor_inverted = 1.0f / s_lastOCFactor; PowerPC::ppcState.downcount = CyclesToDowncount(maxslicelength); g_slicelength = maxslicelength; g_globalTimer = 0; @@ -204,7 +206,10 @@ void DoState(PointerWrap &p) p.Do(fakeDecStartTicks); p.Do(g_fakeTBStartValue); p.Do(g_fakeTBStartTicks); - p.Do(g_lastOCFactor); + p.Do(s_lastOCFactor); + if (p.GetMode() == PointerWrap::MODE_READ) + g_lastOCFactor_inverted = 1.0f / s_lastOCFactor; + p.DoMarker("CoreTimingData"); MoveEvents(); @@ -418,7 +423,8 @@ void Advance() int cyclesExecuted = g_slicelength - DowncountToCycles(PowerPC::ppcState.downcount); g_globalTimer += cyclesExecuted; - g_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + g_lastOCFactor_inverted = 1.0f / s_lastOCFactor; PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength); globalTimerIsSane = true; diff --git a/Source/Core/Core/CoreTiming.h b/Source/Core/Core/CoreTiming.h index 566e0ee757..ab369bc2b3 100644 --- a/Source/Core/Core/CoreTiming.h +++ b/Source/Core/Core/CoreTiming.h @@ -30,7 +30,7 @@ extern s64 g_globalTimer; extern u64 g_fakeTBStartValue; extern u64 g_fakeTBStartTicks; extern int g_slicelength; -extern float g_lastOCFactor; +extern float g_lastOCFactor_inverted; void Init(); void Shutdown(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index acbb5f7e26..224b4501ad 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -285,7 +285,7 @@ void Jit64::mfspr(UGeckoInstruction inst) // cost of calling out to C for this is actually significant. // Scale downcount by the CPU overclocking factor. CVTSI2SS(XMM0, PPCSTATE(downcount)); - DIVSS(XMM0, M(&CoreTiming::g_lastOCFactor)); + MULSS(XMM0, M(&CoreTiming::g_lastOCFactor_inverted)); CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor MOV(32, R(RAX), M(&CoreTiming::g_slicelength)); SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength - Scaled_downcount) From d61baef2f67c4104e35491396e3990f45861bc8c Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Thu, 24 Mar 2016 05:11:54 +1300 Subject: [PATCH 5/5] Disable JitArm64's inline timebase implemenation, as it's incorrect. The interpeted version is correct. --- .../PowerPC/JitArm64/JitArm64_SystemRegisters.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index d275bdab3b..b0764012cf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -224,6 +224,10 @@ void JitArm64::mfspr(UGeckoInstruction inst) case SPR_TL: case SPR_TU: { + // The inline implementation here is inaccurate and out of date as of PR3601 + FALLBACK_IF(true); // Fallback to interpreted version. + + /* ARM64Reg WA = gpr.GetReg(); ARM64Reg WB = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -231,9 +235,9 @@ void JitArm64::mfspr(UGeckoInstruction inst) // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. - MOVI2R(XA, (u64)&CoreTiming::globalTimer); + MOVI2R(XA, (u64)&CoreTiming::g_globalTimer); LDR(INDEX_UNSIGNED, XA, XA, 0); - MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks); + MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartTicks); LDR(INDEX_UNSIGNED, XB, XB, 0); SUB(XA, XA, XB); @@ -247,7 +251,7 @@ void JitArm64::mfspr(UGeckoInstruction inst) ADD(XB, XB, 1); UMULH(XA, XA, XB); - MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue); + MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartValue); LDR(INDEX_UNSIGNED, XB, XB, 0); ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3)); STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(spr[SPR_TL])); @@ -285,7 +289,7 @@ void JitArm64::mfspr(UGeckoInstruction inst) ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); else MOV(gpr.R(d), WA); - gpr.Unlock(WA, WB); + gpr.Unlock(WA, WB);*/ } break; case SPR_XER: