diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 59d40727ae..c2fb7918b8 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -50,17 +50,21 @@ static Common::FifoQueue tsQueue; // event pools static Event *eventPool = nullptr; -static float lastOCFactor; -int slicelength; -static int maxSliceLength = MAX_SLICE_LENGTH; +static float s_lastOCFactor; +float g_lastOCFactor_inverted; +int g_slicelength; +static int maxslicelength = MAX_SLICE_LENGTH; static s64 idledCycles; static u32 fakeDecStartValue; static u64 fakeDecStartTicks; -s64 globalTimer; -u64 fakeTBStartValue; -u64 fakeTBStartTicks; +// Are we in a function that has been called from Advance() +static bool globalTimerIsSane; + +s64 g_globalTimer; +u64 g_fakeTBStartValue; +u64 g_fakeTBStartTicks; static int ev_lost; @@ -91,12 +95,12 @@ static void EmptyTimedCallback(u64 userdata, int cyclesLate) {} // but the effect is largely the same. static int DowncountToCycles(int downcount) { - return (int)(downcount / lastOCFactor); + return (int)(downcount * g_lastOCFactor_inverted); } static int CyclesToDowncount(int cycles) { - return (int)(cycles * lastOCFactor); + return (int)(cycles * s_lastOCFactor); } int RegisterEvent(const std::string& name, TimedCallback callback) @@ -132,11 +136,13 @@ void UnregisterAllEvents() void Init() { - lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; - PowerPC::ppcState.downcount = CyclesToDowncount(maxSliceLength); - slicelength = maxSliceLength; - globalTimer = 0; + s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + g_lastOCFactor_inverted = 1.0f / s_lastOCFactor; + PowerPC::ppcState.downcount = CyclesToDowncount(maxslicelength); + g_slicelength = maxslicelength; + g_globalTimer = 0; idledCycles = 0; + globalTimerIsSane = true; ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback); } @@ -193,14 +199,17 @@ static void EventDoState(PointerWrap &p, BaseEvent* ev) void DoState(PointerWrap &p) { std::lock_guard lk(tsWriteLock); - p.Do(slicelength); - p.Do(globalTimer); + p.Do(g_slicelength); + p.Do(g_globalTimer); p.Do(idledCycles); p.Do(fakeDecStartValue); p.Do(fakeDecStartTicks); - p.Do(fakeTBStartValue); - p.Do(fakeTBStartTicks); - p.Do(lastOCFactor); + p.Do(g_fakeTBStartValue); + p.Do(g_fakeTBStartTicks); + p.Do(s_lastOCFactor); + if (p.GetMode() == PointerWrap::MODE_READ) + g_lastOCFactor_inverted = 1.0f / s_lastOCFactor; + p.DoMarker("CoreTimingData"); MoveEvents(); @@ -209,9 +218,16 @@ void DoState(PointerWrap &p) p.DoMarker("CoreTimingEvents"); } +// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil u64 GetTicks() { - return (u64)globalTimer; + u64 ticks = (u64)g_globalTimer; + if (!globalTimerIsSane) + { + int downcount = DowncountToCycles(PowerPC::ppcState.downcount); + ticks += g_slicelength - downcount; + } + return ticks; } u64 GetIdleTicks() @@ -221,7 +237,7 @@ u64 GetIdleTicks() // This is to be called when outside threads, such as the graphics thread, wants to // schedule things to be executed on the main thread. -void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata) +void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata) { _assert_msg_(POWERPC, !Core::IsCPUThread(), "ScheduleEvent_Threadsafe from wrong thread"); if (Core::g_want_determinism) @@ -232,7 +248,7 @@ void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata } std::lock_guard lk(tsWriteLock); Event ne; - ne.time = globalTimer + cyclesIntoFuture; + ne.time = g_globalTimer + cyclesIntoFuture; ne.type = event_type; ne.userdata = userdata; tsQueue.Push(ne); @@ -260,7 +276,7 @@ void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata) } // To be used from any thread, including the CPU thread -void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata) +void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata) { if (Core::IsCPUThread()) ScheduleEvent(cyclesIntoFuture, event_type, userdata); @@ -299,14 +315,21 @@ static void AddEventToQueue(Event* ne) // This must be run ONLY from within the CPU thread // cyclesIntoFuture may be VERY inaccurate if called from anything else // than Advance -void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata) +void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata) { _assert_msg_(POWERPC, Core::IsCPUThread() || Core::GetState() == Core::CORE_PAUSE, "ScheduleEvent from wrong thread"); + Event *ne = GetNewEvent(); ne->userdata = userdata; ne->type = event_type; - ne->time = globalTimer + cyclesIntoFuture; + ne->time = GetTicks() + cyclesIntoFuture; + + // If this event needs to be scheduled before the next advance(), force one early + if (!globalTimerIsSane) + ForceExceptionCheck(cyclesIntoFuture); + + AddEventToQueue(ne); } @@ -346,12 +369,13 @@ void RemoveAllEvents(int event_type) RemoveEvent(event_type); } -void ForceExceptionCheck(int cycles) +void ForceExceptionCheck(s64 cycles) { - if (DowncountToCycles(PowerPC::ppcState.downcount) > cycles) + if (s64(DowncountToCycles(PowerPC::ppcState.downcount)) > cycles) { - slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - cycles); // Account for cycles already executed by adjusting the slicelength - PowerPC::ppcState.downcount = CyclesToDowncount(cycles); + // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int here. + g_slicelength -= (DowncountToCycles(PowerPC::ppcState.downcount) - (int)cycles); // Account for cycles already executed by adjusting the g_slicelength + PowerPC::ppcState.downcount = CyclesToDowncount((int)cycles); } } @@ -366,11 +390,11 @@ void ProcessFifoWaitEvents() while (first) { - if (first->time <= globalTimer) + if (first->time <= g_globalTimer) { Event* evt = first; first = first->next; - event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time)); + event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time)); FreeEvent(evt); } else @@ -397,21 +421,26 @@ void Advance() { MoveEvents(); - int cyclesExecuted = slicelength - DowncountToCycles(PowerPC::ppcState.downcount); - globalTimer += cyclesExecuted; - lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; - PowerPC::ppcState.downcount = CyclesToDowncount(slicelength); + int cyclesExecuted = g_slicelength - DowncountToCycles(PowerPC::ppcState.downcount); + g_globalTimer += cyclesExecuted; + s_lastOCFactor = SConfig::GetInstance().m_OCEnable ? SConfig::GetInstance().m_OCFactor : 1.0f; + g_lastOCFactor_inverted = 1.0f / s_lastOCFactor; + PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength); - while (first && first->time <= globalTimer) + globalTimerIsSane = true; + + while (first && first->time <= g_globalTimer) { //LOG(POWERPC, "[Scheduler] %s (%lld, %lld) ", - // event_types[first->type].name ? event_types[first->type].name : "?", (u64)globalTimer, (u64)first->time); + // event_types[first->type].name ? event_types[first->type].name : "?", (u64)g_globalTimer, (u64)first->time); Event* evt = first; first = first->next; - event_types[evt->type].callback(evt->userdata, (int)(globalTimer - evt->time)); + event_types[evt->type].callback(evt->userdata, (int)(g_globalTimer - evt->time)); FreeEvent(evt); } + globalTimerIsSane = false; + if (!first) { WARN_LOG(POWERPC, "WARNING - no events in queue. Setting downcount to 10000"); @@ -419,10 +448,10 @@ void Advance() } else { - slicelength = (int)(first->time - globalTimer); - if (slicelength > maxSliceLength) - slicelength = maxSliceLength; - PowerPC::ppcState.downcount = CyclesToDowncount(slicelength); + g_slicelength = (int)(first->time - g_globalTimer); + if (g_slicelength > maxslicelength) + g_slicelength = maxslicelength; + PowerPC::ppcState.downcount = CyclesToDowncount(g_slicelength); } } @@ -431,7 +460,7 @@ void LogPendingEvents() Event *ptr = first; while (ptr) { - INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", globalTimer, ptr->time, ptr->type); + INFO_LOG(POWERPC, "PENDING: Now: %" PRId64 " Pending: %" PRId64 " Type: %d", g_globalTimer, ptr->time, ptr->type); ptr = ptr->next; } } @@ -494,22 +523,22 @@ void SetFakeDecStartTicks(u64 val) u64 GetFakeTBStartValue() { - return fakeTBStartValue; + return g_fakeTBStartValue; } void SetFakeTBStartValue(u64 val) { - fakeTBStartValue = val; + g_fakeTBStartValue = val; } u64 GetFakeTBStartTicks() { - return fakeTBStartTicks; + return g_fakeTBStartTicks; } void SetFakeTBStartTicks(u64 val) { - fakeTBStartTicks = val; + g_fakeTBStartTicks = val; } } // namespace diff --git a/Source/Core/Core/CoreTiming.h b/Source/Core/Core/CoreTiming.h index c87cbd55f5..ab369bc2b3 100644 --- a/Source/Core/Core/CoreTiming.h +++ b/Source/Core/Core/CoreTiming.h @@ -25,15 +25,19 @@ class PointerWrap; namespace CoreTiming { -extern s64 globalTimer; -extern u64 fakeTBStartValue; -extern u64 fakeTBStartTicks; +// These really shouldn't be global, but jit64 accesses them directly +extern s64 g_globalTimer; +extern u64 g_fakeTBStartValue; +extern u64 g_fakeTBStartTicks; +extern int g_slicelength; +extern float g_lastOCFactor_inverted; void Init(); void Shutdown(); typedef void (*TimedCallback)(u64 userdata, int cyclesLate); +// This should only be called from the CPU thread, if you are calling it any other thread, you are doing something evil u64 GetTicks(); u64 GetIdleTicks(); @@ -44,11 +48,11 @@ int RegisterEvent(const std::string& name, TimedCallback callback); void UnregisterAllEvents(); // userdata MAY NOT CONTAIN POINTERS. userdata might get written and reloaded from savestates. -void ScheduleEvent(int cyclesIntoFuture, int event_type, u64 userdata = 0); +void ScheduleEvent(s64 cyclesIntoFuture, int event_type, u64 userdata = 0); void ScheduleEvent_Immediate(int event_type, u64 userdata = 0); -void ScheduleEvent_Threadsafe(int cyclesIntoFuture, int event_type, u64 userdata = 0); +void ScheduleEvent_Threadsafe(s64 cyclesIntoFuture, int event_type, u64 userdata = 0); void ScheduleEvent_Threadsafe_Immediate(int event_type, u64 userdata = 0); -void ScheduleEvent_AnyThread(int cyclesIntoFuture, int event_type, u64 userdata = 0); +void ScheduleEvent_AnyThread(s64 cyclesIntoFuture, int event_type, u64 userdata = 0); // We only permit one event of each type in the queue at a time. void RemoveEvent(int event_type); @@ -76,8 +80,8 @@ void SetFakeTBStartValue(u64 val); u64 GetFakeTBStartTicks(); void SetFakeTBStartTicks(u64 val); -void ForceExceptionCheck(int cycles); +void ForceExceptionCheck(s64 cycles); + -extern int slicelength; } // end of namespace diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp index a018183c8c..f6312db207 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.cpp @@ -178,7 +178,7 @@ void Interpreter::SingleStep() { SingleStepInner(); - CoreTiming::slicelength = 1; + CoreTiming::g_slicelength = 1; PowerPC::ppcState.downcount = 0; CoreTiming::Advance(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 8d14610f4b..224b4501ad 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -283,8 +283,14 @@ void Jit64::mfspr(UGeckoInstruction inst) // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. - MOV(64, R(RAX), M(&CoreTiming::globalTimer)); - SUB(64, R(RAX), M(&CoreTiming::fakeTBStartTicks)); + // Scale downcount by the CPU overclocking factor. + CVTSI2SS(XMM0, PPCSTATE(downcount)); + MULSS(XMM0, M(&CoreTiming::g_lastOCFactor_inverted)); + CVTSS2SI(RDX, R(XMM0)); // RDX is downcount scaled by the overclocking factor + MOV(32, R(RAX), M(&CoreTiming::g_slicelength)); + SUB(64, R(RAX), R(RDX)); // cycles since the last CoreTiming::Advance() event is (slicelength - Scaled_downcount) + ADD(64, R(RAX), M(&CoreTiming::g_globalTimer)); + SUB(64, R(RAX), M(&CoreTiming::g_fakeTBStartTicks)); // It might seem convenient to correct the timer for the block position here for even more accurate // timing, but as of currently, this can break games. If we end up reading a time *after* the time // at which an interrupt was supposed to occur, e.g. because we're 100 cycles into a block with only @@ -292,10 +298,11 @@ void Jit64::mfspr(UGeckoInstruction inst) // which won't get past the loading screen. //if (js.downcountAmount) // ADD(64, R(RAX), Imm32(js.downcountAmount)); + // a / 12 = (a * 0xAAAAAAAAAAAAAAAB) >> 67 MOV(64, R(RDX), Imm64(0xAAAAAAAAAAAAAAABULL)); MUL(64, R(RDX)); - MOV(64, R(RAX), M(&CoreTiming::fakeTBStartValue)); + MOV(64, R(RAX), M(&CoreTiming::g_fakeTBStartValue)); SHR(64, R(RDX), Imm8(3)); ADD(64, R(RAX), R(RDX)); MOV(64, PPCSTATE(spr[SPR_TL]), R(RAX)); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index d275bdab3b..b0764012cf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -224,6 +224,10 @@ void JitArm64::mfspr(UGeckoInstruction inst) case SPR_TL: case SPR_TU: { + // The inline implementation here is inaccurate and out of date as of PR3601 + FALLBACK_IF(true); // Fallback to interpreted version. + + /* ARM64Reg WA = gpr.GetReg(); ARM64Reg WB = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -231,9 +235,9 @@ void JitArm64::mfspr(UGeckoInstruction inst) // An inline implementation of CoreTiming::GetFakeTimeBase, since in timer-heavy games the // cost of calling out to C for this is actually significant. - MOVI2R(XA, (u64)&CoreTiming::globalTimer); + MOVI2R(XA, (u64)&CoreTiming::g_globalTimer); LDR(INDEX_UNSIGNED, XA, XA, 0); - MOVI2R(XB, (u64)&CoreTiming::fakeTBStartTicks); + MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartTicks); LDR(INDEX_UNSIGNED, XB, XB, 0); SUB(XA, XA, XB); @@ -247,7 +251,7 @@ void JitArm64::mfspr(UGeckoInstruction inst) ADD(XB, XB, 1); UMULH(XA, XA, XB); - MOVI2R(XB, (u64)&CoreTiming::fakeTBStartValue); + MOVI2R(XB, (u64)&CoreTiming::g_fakeTBStartValue); LDR(INDEX_UNSIGNED, XB, XB, 0); ADD(XA, XB, XA, ArithOption(XA, ST_LSR, 3)); STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(spr[SPR_TL])); @@ -285,7 +289,7 @@ void JitArm64::mfspr(UGeckoInstruction inst) ORR(EncodeRegTo64(gpr.R(d)), SP, XA, ArithOption(XA, ST_LSR, 32)); else MOV(gpr.R(d), WA); - gpr.Unlock(WA, WB); + gpr.Unlock(WA, WB);*/ } break; case SPR_XER: