diff --git a/Source/Core/Common/BlockingLoop.h b/Source/Core/Common/BlockingLoop.h new file mode 100644 index 0000000000..d67e07d90e --- /dev/null +++ b/Source/Core/Common/BlockingLoop.h @@ -0,0 +1,164 @@ +// Copyright 2015 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "Common/Event.h" +#include "Common/Flag.h" + +namespace Common +{ + +// This class provides a synchronized loop. +// It's a thread-safe way to trigger a new iteration without busy loops. +// It's optimized for high-usage iterations which usually are already running while it's triggered often. +class BlockingLoop +{ +public: + BlockingLoop() + { + m_stopped.Set(); + } + + ~BlockingLoop() + { + Stop(); + } + + // Triggers to rerun the payload of the Run() function at least once again. + // This function will never block and is designed to finish as fast as possible. + void Wakeup() + { + // already running, so no need for a wakeup + if (m_is_running.IsSet()) + return; + + m_is_running.Set(); + m_is_pending.Set(); + m_new_work_event.Set(); + } + + // Wait for a complete payload run after the last Wakeup() call. + // If stopped, this returns immediately. + void Wait() + { + // We have to give the loop a chance to exit. + m_may_sleep.Set(); + + if (m_stopped.IsSet() || (!m_is_running.IsSet() && !m_is_pending.IsSet())) + return; + + // notifying this event will only wake up one thread, so use a mutex here to + // allow only one waiting thread. And in this way, we get an event free wakeup + // but for the first thread for free + std::lock_guard lk(m_wait_lock); + + while (!m_stopped.IsSet() && (m_is_running.IsSet() || m_is_pending.IsSet())) + { + m_may_sleep.Set(); + m_done_event.Wait(); + } + } + + // Half start the worker. + // So this object is in running state and Wait() will block until the worker calls Run(). + // This may be called from any thread and is supposed to call at least once before Wait() is used. + void Prepare() + { + // There is a race condition if the other threads call this function while + // the loop thread is initializing. Using this lock will ensure a valid state. + std::lock_guard lk(m_prepare_lock); + + if (!m_stopped.TestAndClear()) + return; + m_is_pending.Set(); + m_shutdown.Clear(); + m_may_sleep.Set(); + } + + // Mainloop of this object. + // The payload callback is called at least as often as it's needed to match the Wakeup() requirements. + template void Run(F payload) + { + Prepare(); + + while (!m_shutdown.IsSet()) + { + payload(); + + m_is_pending.Clear(); + m_done_event.Set(); + + if (m_is_running.IsSet()) + { + if (m_may_sleep.IsSet()) + { + m_is_pending.Set(); + m_is_running.Clear(); + + // We'll sleep after the next iteration now, + // so clear this flag now and we won't sleep another times. + m_may_sleep.Clear(); + } + } + else + { + m_new_work_event.WaitFor(std::chrono::milliseconds(100)); + } + + } + + m_is_running.Clear(); + m_is_pending.Clear(); + m_stopped.Set(); + + m_done_event.Set(); + } + + // Quits the mainloop. + // By default, it will wait until the Mainloop quits. + // Be careful to not use the blocking way within the payload of the Run() method. + void Stop(bool block = true) + { + if (m_stopped.IsSet()) + return; + + m_shutdown.Set(); + Wakeup(); + + if (block) + Wait(); + } + + bool IsRunning() const + { + return !m_stopped.IsSet() && !m_shutdown.IsSet(); + } + + void AllowSleep() + { + m_may_sleep.Set(); + } + +private: + std::mutex m_wait_lock; + std::mutex m_prepare_lock; + + Flag m_stopped; // This one is set, Wait() shall not block. + Flag m_shutdown; // If this one is set, the loop shall be quit. + + Event m_new_work_event; + Flag m_is_running; // If this one is set, the loop will be called at least once again. + + Event m_done_event; + Flag m_is_pending; // If this one is set, there might still be work to do. + + Flag m_may_sleep; // If this one is set, we fall back from the busy loop to an event based synchronization. +}; + +} diff --git a/Source/Core/Common/Common.vcxproj b/Source/Core/Common/Common.vcxproj index 7b2c278cd1..fe3a5f22dc 100644 --- a/Source/Core/Common/Common.vcxproj +++ b/Source/Core/Common/Common.vcxproj @@ -40,6 +40,7 @@ + diff --git a/Source/Core/Common/Common.vcxproj.filters b/Source/Core/Common/Common.vcxproj.filters index ffaf7d6be8..712122b3d1 100644 --- a/Source/Core/Common/Common.vcxproj.filters +++ b/Source/Core/Common/Common.vcxproj.filters @@ -14,6 +14,7 @@ + @@ -126,4 +127,4 @@ - \ No newline at end of file + diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 8b92f72f35..fa106aa4b8 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -475,7 +475,7 @@ void Idle() { //DEBUG_LOG(POWERPC, "Idle"); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack) + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack && !SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU) { //When the FIFO is processing data we must not advance because in this way //the VI will be desynchronized. So, We are waiting until the FIFO finish and diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index 5c84966489..a44614d9e0 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -62,6 +62,7 @@ IPC_HLE_PERIOD: For the Wiimote this is the call schedule: #include "Core/PowerPC/PowerPC.h" #include "VideoCommon/CommandProcessor.h" +#include "VideoCommon/Fifo.h" #include "VideoCommon/VideoBackendBase.h" @@ -189,7 +190,7 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate) static void ThrottleCallback(u64 last_time, int cyclesLate) { // Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz. - CommandProcessor::s_gpuMaySleep.Set(); + GpuMaySleep(); u32 time = Common::Timer::GetTimeMs(); diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 449e99982f..88b16afe7e 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -49,8 +49,6 @@ static std::atomic s_interrupt_finish_waiting; static std::atomic s_vi_ticks(CommandProcessor::m_cpClockOrigin); -Common::Flag s_gpuMaySleep; - static bool IsOnThread() { return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread; diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index 0b33150ce4..ee130b82b6 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -17,7 +17,6 @@ namespace CommandProcessor { extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread. -extern Common::Flag s_gpuMaySleep; // internal hardware addresses enum diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 289a62d8e7..5e482f3670 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -5,6 +5,7 @@ #include #include "Common/Atomic.h" +#include "Common/BlockingLoop.h" #include "Common/ChunkFile.h" #include "Common/CPUDetect.h" #include "Common/Event.h" @@ -26,11 +27,13 @@ #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoConfig.h" bool g_bSkipCurrentFrame = false; -static std::atomic s_gpu_running_state; +static Common::BlockingLoop s_gpu_mainloop; + static std::atomic s_emu_running_state; // Most of this array is unlikely to be faulted in... @@ -41,8 +44,6 @@ static u8* s_fifo_aux_read_ptr; bool g_use_deterministic_gpu_thread; // STATE_TO_SAVE -static std::mutex s_video_buffer_lock; -static std::condition_variable s_video_buffer_cond; static u8* s_video_buffer; static u8* s_video_buffer_read_ptr; static std::atomic s_video_buffer_write_ptr; @@ -60,12 +61,6 @@ static u8* s_video_buffer_pp_read_ptr; // polls, it's just atomic. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr. -static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again -static Common::Event s_gpu_new_work_event; - -static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do -static Common::Event s_gpu_done_event; - void Fifo_DoState(PointerWrap &p) { p.DoArray(s_video_buffer, FIFO_SIZE); @@ -102,13 +97,14 @@ void Fifo_Init() // Padded so that SIMD overreads in the vertex loader are safe s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE + 4); ResetVideoBuffer(); - s_gpu_running_state.store(false); + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) + s_gpu_mainloop.Prepare(); CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin); } void Fifo_Shutdown() { - if (s_gpu_running_state.load()) + if (s_gpu_mainloop.IsRunning()) PanicAlert("Fifo shutting down while active"); FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4); @@ -135,27 +131,22 @@ void ExitGpuLoop() FlushGpu(); // Terminate GPU thread loop - s_gpu_running_state.store(false); s_emu_running_state.store(true); - s_gpu_new_work_event.Set(); + s_gpu_mainloop.Stop(false); } void EmulatorState(bool running) { s_emu_running_state.store(running); - s_gpu_new_work_event.Set(); + s_gpu_mainloop.Wakeup(); } void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) { if (g_use_deterministic_gpu_thread) { - std::unique_lock lk(s_video_buffer_lock); - u8* write_ptr = s_video_buffer_write_ptr; - s_video_buffer_cond.wait(lk, [&]() { - return !s_gpu_running_state.load() || s_video_buffer_seen_ptr == write_ptr; - }); - if (!s_gpu_running_state.load()) + s_gpu_mainloop.Wait(); + if (!s_gpu_mainloop.IsRunning()) return; // Opportunistically reset FIFOs so we don't wrap around. @@ -168,6 +159,8 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) if (may_move_read_ptr) { + u8* write_ptr = s_video_buffer_write_ptr; + // what's left over in the buffer size_t size = write_ptr - s_video_buffer_pp_read_ptr; @@ -188,7 +181,7 @@ void PushFifoAuxBuffer(void* ptr, size_t size) if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr)) { SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false); - if (!s_gpu_running_state.load()) + if (!s_gpu_mainloop.IsRunning()) { // GPU is shutting down return; @@ -243,9 +236,9 @@ static void ReadDataFromFifoOnCPU(u32 readPtr) // We can't wrap around while the GPU is working on the data. // This should be very rare due to the reset in SyncGPU. SyncGPU(SYNC_GPU_WRAPAROUND); - if (!s_gpu_running_state.load()) + if (!s_gpu_mainloop.IsRunning()) { - // GPU is shutting down + // GPU is shutting down, so the next asserts may fail return; } @@ -283,18 +276,19 @@ void ResetVideoBuffer() // Purpose: Keep the Core HW updated about the CPU-GPU distance void RunGpuLoop() { - s_gpu_running_state.store(true); - SCPFifoStruct &fifo = CommandProcessor::fifo; - u32 cyclesExecuted = 0; AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetPassthrough(false); - while (s_gpu_running_state.load()) - { + s_gpu_mainloop.Run( + [] { g_video_backend->PeekMessages(); - if (g_use_deterministic_gpu_thread && s_emu_running_state.load()) + // Do nothing while paused + if (!s_emu_running_state.load()) + return; + + if (g_use_deterministic_gpu_thread) { AsyncRequests::GetInstance()->PullEvents(); @@ -305,16 +299,13 @@ void RunGpuLoop() if (write_ptr > seen_ptr) { s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false); - - { - std::lock_guard vblk(s_video_buffer_lock); - s_video_buffer_seen_ptr = write_ptr; - s_video_buffer_cond.notify_all(); - } + s_video_buffer_seen_ptr = write_ptr; } } - else if (s_emu_running_state.load()) + else { + SCPFifoStruct &fifo = CommandProcessor::fifo; + AsyncRequests::GetInstance()->PullEvents(); CommandProcessor::SetCPStatusFromGPU(); @@ -333,6 +324,7 @@ void RunGpuLoop() if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin) { + u32 cyclesExecuted = 0; u32 readPtr = fifo.CPReadPointer; ReadDataFromFifo(readPtr); @@ -369,31 +361,15 @@ void RunGpuLoop() // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. AsyncRequests::GetInstance()->PullEvents(); } + // The fifo is empty and it's unlikely we will get any more work in the near future. + // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer. + VertexManager::Flush(); // don't release the GPU running state on sync GPU waits fifo.isGpuReadingData = !run_loop; } + }); - s_gpu_is_pending.Clear(); - s_gpu_done_event.Set(); - - if (s_gpu_is_running.IsSet()) - { - if (CommandProcessor::s_gpuMaySleep.IsSet()) - { - // Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop - s_gpu_is_pending.Set(); - s_gpu_is_running.Clear(); - CommandProcessor::s_gpuMaySleep.Clear(); - } - } - else - { - s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100)); - } - } - // wake up SyncGPU if we were interrupted - s_video_buffer_cond.notify_all(); AsyncRequests::GetInstance()->SetEnable(false); AsyncRequests::GetInstance()->SetPassthrough(true); } @@ -403,11 +379,12 @@ void FlushGpu() if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) return; - while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet()) - { - CommandProcessor::s_gpuMaySleep.Set(); - s_gpu_done_event.Wait(); - } + s_gpu_mainloop.Wait(); +} + +void GpuMaySleep() +{ + s_gpu_mainloop.AllowSleep(); } bool AtBreakpoint() @@ -429,6 +406,7 @@ void RunGpu() if (g_use_deterministic_gpu_thread) { ReadDataFromFifoOnCPU(fifo.CPReadPointer); + s_gpu_mainloop.Wakeup(); } else { @@ -460,11 +438,9 @@ void RunGpu() } // wake up GPU thread - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet()) + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) { - s_gpu_is_pending.Set(); - s_gpu_is_running.Set(); - s_gpu_new_work_event.Set(); + s_gpu_mainloop.Wakeup(); } } diff --git a/Source/Core/VideoCommon/Fifo.h b/Source/Core/VideoCommon/Fifo.h index b59004aa03..8a8a954fe0 100644 --- a/Source/Core/VideoCommon/Fifo.h +++ b/Source/Core/VideoCommon/Fifo.h @@ -43,6 +43,7 @@ void* PopFifoAuxBuffer(size_t size); void FlushGpu(); void RunGpu(); +void GpuMaySleep(); void RunGpuLoop(); void ExitGpuLoop(); void EmulatorState(bool running);