From 279c657cda7772a39f318b5df68fb279bb0400b4 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 5 Mar 2015 17:12:24 +0100 Subject: [PATCH 1/6] Fifo: Replace busy loop with condition variable --- Source/Core/VideoCommon/AsyncRequests.cpp | 2 + Source/Core/VideoCommon/CommandProcessor.cpp | 8 +- Source/Core/VideoCommon/Fifo.cpp | 82 ++++++++++++-------- 3 files changed, 54 insertions(+), 38 deletions(-) diff --git a/Source/Core/VideoCommon/AsyncRequests.cpp b/Source/Core/VideoCommon/AsyncRequests.cpp index a6cc5a4b9c..d2945a3dbc 100644 --- a/Source/Core/VideoCommon/AsyncRequests.cpp +++ b/Source/Core/VideoCommon/AsyncRequests.cpp @@ -1,4 +1,5 @@ #include "VideoCommon/AsyncRequests.h" +#include "VideoCommon/Fifo.h" #include "VideoCommon/RenderBase.h" AsyncRequests AsyncRequests::s_singleton; @@ -49,6 +50,7 @@ void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking) m_queue.push(event); + RunGpu(); if (blocking) { m_cond.wait(lock, [this]{return m_queue.empty();}); diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index b53c50fc2e..21ef0be2a9 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -322,10 +322,7 @@ void GatherPipeBursted() ProcessFifoAllDistance(); } } - else - { - RunGpu(); - } + RunGpu(); return; } @@ -375,6 +372,7 @@ void UpdateInterrupts(u64 userdata) } CoreTiming::ForceExceptionCheck(0); interruptWaiting = false; + RunGpu(); } void UpdateInterruptsFromVideoBackend(u64 userdata) @@ -551,5 +549,7 @@ void Update() if (fifo.isGpuReadingData) Common::AtomicAdd(VITicks, SystemTimers::GetTicksPerSecond() / 10000); + + RunGpu(); } } // end of namespace CommandProcessor diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 8c7dbce49b..4ca7a3de58 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -5,6 +5,7 @@ #include "Common/Atomic.h" #include "Common/ChunkFile.h" #include "Common/CPUDetect.h" +#include "Common/Event.h" #include "Common/FPURoundMode.h" #include "Common/MemoryUtil.h" #include "Common/Thread.h" @@ -58,6 +59,9 @@ static u8* s_video_buffer_pp_read_ptr; // polls, it's just atomic. // - The pp_read_ptr is the CPU preprocessing version of the read_ptr. +static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again +static Common::Event s_gpu_new_work_event; + void Fifo_DoState(PointerWrap &p) { p.DoArray(s_video_buffer, FIFO_SIZE); @@ -133,11 +137,13 @@ void ExitGpuLoop() // Terminate GPU thread loop GpuRunningState = false; EmuRunningState = true; + s_gpu_new_work_event.Set(); } void EmulatorState(bool running) { EmuRunningState = running; + s_gpu_new_work_event.Set(); } void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr) @@ -271,10 +277,6 @@ void RunGpuLoop() SCPFifoStruct &fifo = CommandProcessor::fifo; u32 cyclesExecuted = 0; - // If the host CPU has only two cores, idle loop instead of busy loop - // This allows a system that we are maxing out in dual core mode to do other things - bool yield_cpu = cpu_info.num_cores <= 2; - AsyncRequests::GetInstance()->SetEnable(true); AsyncRequests::GetInstance()->SetPassthrough(false); @@ -353,11 +355,15 @@ void RunGpuLoop() if (EmuRunningState) { - // NOTE(jsd): Calling SwitchToThread() on Windows 7 x64 is a hot spot, according to profiler. - // See https://docs.google.com/spreadsheet/ccc?key=0Ah4nh0yGtjrgdFpDeF9pS3V6RUotRVE3S3J4TGM1NlE#gid=0 - // for benchmark details. - if (yield_cpu) - Common::YieldCPU(); + if (s_gpu_is_running.IsSet()) + { + // reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop + s_gpu_is_running.Clear(); + } + else + { + s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100)); + } } else { @@ -386,36 +392,44 @@ bool AtBreakpoint() void RunGpu() { - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && - !g_use_deterministic_gpu_thread) - return; - SCPFifoStruct &fifo = CommandProcessor::fifo; - while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) + + // execute GPU + if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) { - if (g_use_deterministic_gpu_thread) + while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) { - ReadDataFromFifoOnCPU(fifo.CPReadPointer); + if (g_use_deterministic_gpu_thread) + { + ReadDataFromFifoOnCPU(fifo.CPReadPointer); + } + else + { + FPURoundMode::SaveSIMDState(); + FPURoundMode::LoadDefaultSIMDState(); + ReadDataFromFifo(fifo.CPReadPointer); + s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); + FPURoundMode::LoadSIMDState(); + } + + //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); + + if (fifo.CPReadPointer == fifo.CPEnd) + fifo.CPReadPointer = fifo.CPBase; + else + fifo.CPReadPointer += 32; + + fifo.CPReadWriteDistance -= 32; } - else - { - FPURoundMode::SaveSIMDState(); - FPURoundMode::LoadDefaultSIMDState(); - ReadDataFromFifo(fifo.CPReadPointer); - s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); - FPURoundMode::LoadSIMDState(); - } - - //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); - - if (fifo.CPReadPointer == fifo.CPEnd) - fifo.CPReadPointer = fifo.CPBase; - else - fifo.CPReadPointer += 32; - - fifo.CPReadWriteDistance -= 32; + CommandProcessor::SetCPStatusFromGPU(); + } + + // wake up GPU thread + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet()) + { + s_gpu_is_running.Set(); + s_gpu_new_work_event.Set(); } - CommandProcessor::SetCPStatusFromGPU(); } void Fifo_UpdateWantDeterminism(bool want) From 9bdaa00e2dc18c06dcb9882d7f0caf372ed5990d Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 29 Mar 2015 12:20:24 +0200 Subject: [PATCH 2/6] Fifo: use the outer loop on sync GPU --- Source/Core/VideoCommon/Fifo.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 4ca7a3de58..5f75deb672 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -306,10 +306,15 @@ void RunGpuLoop() { CommandProcessor::SetCPStatusFromGPU(); - Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin); + if (!fifo.isGpuReadingData) + { + Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin); + } + + bool run_loop = true; // check if we are able to run this buffer - while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) + while (GpuRunningState && EmuRunningState && run_loop && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { fifo.isGpuReadingData = true; CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false; @@ -340,6 +345,10 @@ void RunGpuLoop() if ((write_ptr - s_video_buffer_read_ptr) == 0) Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer); } + else + { + run_loop = false; + } CommandProcessor::SetCPStatusFromGPU(); @@ -350,7 +359,8 @@ void RunGpuLoop() CommandProcessor::isPossibleWaitingSetDrawDone = false; } - fifo.isGpuReadingData = false; + // don't release the GPU running state on sync GPU waits + fifo.isGpuReadingData = !run_loop; } if (EmuRunningState) From b020ae1c5db4e2e198d1beea91c75219a251a167 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 5 Mar 2015 21:14:46 +0100 Subject: [PATCH 3/6] Fifo: rewrite sync on idle skipping hack Now it's done without a busy loop --- Source/Core/Core/CoreTiming.cpp | 7 ++--- Source/Core/VideoBackends/Software/SWmain.cpp | 5 ---- .../VideoBackends/Software/VideoBackend.h | 2 +- Source/Core/VideoCommon/CommandProcessor.cpp | 17 ++---------- Source/Core/VideoCommon/CommandProcessor.h | 2 -- Source/Core/VideoCommon/Fifo.cpp | 26 ++++++++++++++----- Source/Core/VideoCommon/Fifo.h | 1 + Source/Core/VideoCommon/MainBase.cpp | 4 +-- Source/Core/VideoCommon/PixelEngine.cpp | 1 - Source/Core/VideoCommon/VideoBackendBase.h | 4 +-- 10 files changed, 30 insertions(+), 39 deletions(-) diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp index 5e75c27b05..d9872dcc3b 100644 --- a/Source/Core/Core/CoreTiming.cpp +++ b/Source/Core/Core/CoreTiming.cpp @@ -480,11 +480,8 @@ void Idle() //When the FIFO is processing data we must not advance because in this way //the VI will be desynchronized. So, We are waiting until the FIFO finish and //while we process only the events required by the FIFO. - while (g_video_backend->Video_IsPossibleWaitingSetDrawDone()) - { - ProcessFifoWaitEvents(); - Common::YieldCPU(); - } + ProcessFifoWaitEvents(); + g_video_backend->Video_Sync(); } idledCycles += DowncountToCycles(PowerPC::ppcState.downcount); diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 497d26be00..784fe6d07b 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -360,11 +360,6 @@ void VideoSoftware::Video_GatherPipeBursted() SWCommandProcessor::GatherPipeBursted(); } -bool VideoSoftware::Video_IsPossibleWaitingSetDrawDone() -{ - return false; -} - void VideoSoftware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) { SWCommandProcessor::RegisterMMIO(mmio, base); diff --git a/Source/Core/VideoBackends/Software/VideoBackend.h b/Source/Core/VideoBackends/Software/VideoBackend.h index 0edb3bd4df..8697e27c98 100644 --- a/Source/Core/VideoBackends/Software/VideoBackend.h +++ b/Source/Core/VideoBackends/Software/VideoBackend.h @@ -45,7 +45,7 @@ class VideoSoftware : public VideoBackend void Video_SetRendering(bool bEnabled) override; void Video_GatherPipeBursted() override; - bool Video_IsPossibleWaitingSetDrawDone() override; + void Video_Sync() override {} void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override; diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 21ef0be2a9..162e3a2d53 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -40,7 +40,6 @@ static u16 m_bboxright; static u16 m_bboxbottom; static u16 m_tokenReg; -volatile bool isPossibleWaitingSetDrawDone = false; volatile bool interruptSet= false; volatile bool interruptWaiting= false; volatile bool interruptTokenWaiting = false; @@ -70,7 +69,6 @@ void DoState(PointerWrap &p) p.Do(m_tokenReg); p.Do(fifo); - p.Do(isPossibleWaitingSetDrawDone); p.Do(interruptSet); p.Do(interruptWaiting); p.Do(interruptTokenWaiting); @@ -123,8 +121,6 @@ void Init() interruptFinishWaiting = false; interruptTokenWaiting = false; - isPossibleWaitingSetDrawDone = false; - et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper); } @@ -319,7 +315,7 @@ void GatherPipeBursted() (ProcessorInterface::Fifo_CPUBase == fifo.CPBase) && fifo.CPReadWriteDistance > 0) { - ProcessFifoAllDistance(); + FlushGpu(); } } RunGpu(); @@ -468,15 +464,6 @@ void SetCPStatusFromCPU() } } -void ProcessFifoAllDistance() -{ - if (IsOnThread()) - { - while (!interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) - Common::YieldCPU(); - } -} - void ProcessFifoEvents() { if (IsOnThread() && (interruptWaiting || interruptFinishWaiting || interruptTokenWaiting)) @@ -518,7 +505,7 @@ void SetCpControlRegister() if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable) { fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable; - while (fifo.isGpuReadingData) Common::YieldCPU(); + FlushGpu(); } else { diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index 0dad1578af..d25a5b7ef4 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -17,7 +17,6 @@ namespace CommandProcessor extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread. -extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread. extern volatile bool interruptSet; extern volatile bool interruptWaiting; extern volatile bool interruptTokenWaiting; @@ -145,7 +144,6 @@ void UpdateInterruptsFromVideoBackend(u64 userdata); void SetCpClearRegister(); void SetCpControlRegister(); void SetCpStatusRegister(); -void ProcessFifoAllDistance(); void ProcessFifoEvents(); void Update(); diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 5f75deb672..0b65a56cd8 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -62,6 +62,9 @@ static u8* s_video_buffer_pp_read_ptr; static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again static Common::Event s_gpu_new_work_event; +static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do +static Common::Event s_gpu_done_event; + void Fifo_DoState(PointerWrap &p) { p.DoArray(s_video_buffer, FIFO_SIZE); @@ -85,7 +88,6 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock) EmulatorState(false); if (!Core::IsGPUThread()) m_csHWVidOccupied.lock(); - _dbg_assert_(COMMON, !CommandProcessor::fifo.isGpuReadingData); } else { @@ -131,9 +133,8 @@ void ExitGpuLoop() { // This should break the wait loop in CPU thread CommandProcessor::fifo.bFF_GPReadEnable = false; - SCPFifoStruct &fifo = CommandProcessor::fifo; - while (fifo.isGpuReadingData) - Common::YieldCPU(); + FlushGpu(); + // Terminate GPU thread loop GpuRunningState = false; EmuRunningState = true; @@ -317,7 +318,6 @@ void RunGpuLoop() while (GpuRunningState && EmuRunningState && run_loop && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { fifo.isGpuReadingData = true; - CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false; if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin) { @@ -356,18 +356,21 @@ void RunGpuLoop() // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down. AsyncRequests::GetInstance()->PullEvents(); - CommandProcessor::isPossibleWaitingSetDrawDone = false; } // don't release the GPU running state on sync GPU waits fifo.isGpuReadingData = !run_loop; } + s_gpu_is_pending.Clear(); + s_gpu_done_event.Set(); + if (EmuRunningState) { if (s_gpu_is_running.IsSet()) { // reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop + s_gpu_is_pending.Set(); s_gpu_is_running.Clear(); } else @@ -393,6 +396,16 @@ void RunGpuLoop() AsyncRequests::GetInstance()->SetPassthrough(true); } +void FlushGpu() +{ + if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) + return; + + while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet()) + { + s_gpu_done_event.Wait(); + } +} bool AtBreakpoint() { @@ -437,6 +450,7 @@ void RunGpu() // wake up GPU thread if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet()) { + s_gpu_is_pending.Set(); s_gpu_is_running.Set(); s_gpu_new_work_event.Set(); } diff --git a/Source/Core/VideoCommon/Fifo.h b/Source/Core/VideoCommon/Fifo.h index 8cec0d824d..f8b4b5f625 100644 --- a/Source/Core/VideoCommon/Fifo.h +++ b/Source/Core/VideoCommon/Fifo.h @@ -41,6 +41,7 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true); void PushFifoAuxBuffer(void* ptr, size_t size); void* PopFifoAuxBuffer(size_t size); +void FlushGpu(); void RunGpu(); void RunGpuLoop(); void ExitGpuLoop(); diff --git a/Source/Core/VideoCommon/MainBase.cpp b/Source/Core/VideoCommon/MainBase.cpp index a8af71b31e..102e17db34 100644 --- a/Source/Core/VideoCommon/MainBase.cpp +++ b/Source/Core/VideoCommon/MainBase.cpp @@ -233,9 +233,9 @@ void VideoBackendHardware::Video_GatherPipeBursted() CommandProcessor::GatherPipeBursted(); } -bool VideoBackendHardware::Video_IsPossibleWaitingSetDrawDone() +void VideoBackendHardware::Video_Sync() { - return CommandProcessor::isPossibleWaitingSetDrawDone; + FlushGpu(); } void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) diff --git a/Source/Core/VideoCommon/PixelEngine.cpp b/Source/Core/VideoCommon/PixelEngine.cpp index afe08c7d0a..da086924b1 100644 --- a/Source/Core/VideoCommon/PixelEngine.cpp +++ b/Source/Core/VideoCommon/PixelEngine.cpp @@ -287,7 +287,6 @@ void SetFinish_OnMainThread(u64 userdata, int cyclesLate) Common::AtomicStore(*(volatile u32*)&g_bSignalFinishInterrupt, 1); UpdateInterrupts(); CommandProcessor::interruptFinishWaiting = false; - CommandProcessor::isPossibleWaitingSetDrawDone = false; } // SetToken diff --git a/Source/Core/VideoCommon/VideoBackendBase.h b/Source/Core/VideoCommon/VideoBackendBase.h index 4796a29b07..9aa6208e52 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.h +++ b/Source/Core/VideoCommon/VideoBackendBase.h @@ -99,7 +99,7 @@ public: virtual void Video_GatherPipeBursted() = 0; - virtual bool Video_IsPossibleWaitingSetDrawDone() = 0; + virtual void Video_Sync() = 0; // Registers MMIO handlers for the CommandProcessor registers. virtual void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) = 0; @@ -148,7 +148,7 @@ class VideoBackendHardware : public VideoBackend void Video_GatherPipeBursted() override; - bool Video_IsPossibleWaitingSetDrawDone() override; + void Video_Sync() override; void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override; From d2c62b17445b666b81a19cb3a6c46f9ee2d1388e Mon Sep 17 00:00:00 2001 From: degasus Date: Fri, 13 Mar 2015 23:36:31 +0100 Subject: [PATCH 4/6] Fifo: only sleep once within every ms of emulated time --- Source/Core/Core/HW/SystemTimers.cpp | 3 +++ Source/Core/VideoCommon/CommandProcessor.cpp | 2 ++ Source/Core/VideoCommon/CommandProcessor.h | 2 ++ Source/Core/VideoCommon/Fifo.cpp | 11 ++++++++--- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp index 72d1dc8715..e69c3c1e78 100644 --- a/Source/Core/Core/HW/SystemTimers.cpp +++ b/Source/Core/Core/HW/SystemTimers.cpp @@ -199,6 +199,9 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate) static void ThrottleCallback(u64 last_time, int cyclesLate) { + // Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz. + CommandProcessor::s_gpuMaySleep.Set(); + u32 time = Common::Timer::GetTimeMs(); int diff = (u32)last_time - time; diff --git a/Source/Core/VideoCommon/CommandProcessor.cpp b/Source/Core/VideoCommon/CommandProcessor.cpp index 162e3a2d53..2c6660e1f8 100644 --- a/Source/Core/VideoCommon/CommandProcessor.cpp +++ b/Source/Core/VideoCommon/CommandProcessor.cpp @@ -45,6 +45,8 @@ volatile bool interruptWaiting= false; volatile bool interruptTokenWaiting = false; volatile bool interruptFinishWaiting = false; +Common::Flag s_gpuMaySleep; + volatile u32 VITicks = CommandProcessor::m_cpClockOrigin; static bool IsOnThread() diff --git a/Source/Core/VideoCommon/CommandProcessor.h b/Source/Core/VideoCommon/CommandProcessor.h index d25a5b7ef4..6d177844a6 100644 --- a/Source/Core/VideoCommon/CommandProcessor.h +++ b/Source/Core/VideoCommon/CommandProcessor.h @@ -5,6 +5,7 @@ #pragma once #include "Common/CommonTypes.h" +#include "Common/Flag.h" #include "VideoCommon/VideoBackendBase.h" class PointerWrap; @@ -21,6 +22,7 @@ extern volatile bool interruptSet; extern volatile bool interruptWaiting; extern volatile bool interruptTokenWaiting; extern volatile bool interruptFinishWaiting; +extern Common::Flag s_gpuMaySleep; // internal hardware addresses enum diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 0b65a56cd8..add7618d5d 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -369,9 +369,13 @@ void RunGpuLoop() { if (s_gpu_is_running.IsSet()) { - // reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop - s_gpu_is_pending.Set(); - s_gpu_is_running.Clear(); + if (CommandProcessor::s_gpuMaySleep.IsSet()) + { + // Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop + s_gpu_is_pending.Set(); + s_gpu_is_running.Clear(); + CommandProcessor::s_gpuMaySleep.Clear(); + } } else { @@ -403,6 +407,7 @@ void FlushGpu() while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet()) { + CommandProcessor::s_gpuMaySleep.Set(); s_gpu_done_event.Wait(); } } From b1ffd32f5f09a86598bb47c4b392b28354ed0a87 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 14 Mar 2015 08:02:16 +0100 Subject: [PATCH 5/6] Fifo: only touch the SIMD state once in the single core loop --- Source/Core/VideoCommon/Fifo.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index add7618d5d..c9ac3fdbbb 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -425,6 +425,7 @@ void RunGpu() // execute GPU if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread) { + bool reset_simd_state = false; while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) { if (g_use_deterministic_gpu_thread) @@ -433,11 +434,14 @@ void RunGpu() } else { - FPURoundMode::SaveSIMDState(); - FPURoundMode::LoadDefaultSIMDState(); + if (!reset_simd_state) + { + FPURoundMode::SaveSIMDState(); + FPURoundMode::LoadDefaultSIMDState(); + reset_simd_state = true; + } ReadDataFromFifo(fifo.CPReadPointer); s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false); - FPURoundMode::LoadSIMDState(); } //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); @@ -450,6 +454,11 @@ void RunGpu() fifo.CPReadWriteDistance -= 32; } CommandProcessor::SetCPStatusFromGPU(); + + if (reset_simd_state) + { + FPURoundMode::LoadSIMDState(); + } } // wake up GPU thread From 74795b45539ebee1c2af78fd6dca9022c3189c76 Mon Sep 17 00:00:00 2001 From: degasus Date: Sun, 29 Mar 2015 15:05:11 +0200 Subject: [PATCH 6/6] Fifo: rewrite Fifo_PauseAndLock This lock isn't required any more as our FlushGpu garanty to block until the GPU is idle --- Source/Core/VideoCommon/Fifo.cpp | 48 +++++++++++--------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index c9ac3fdbbb..c5e49fb583 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -30,7 +30,6 @@ bool g_bSkipCurrentFrame = false; static volatile bool GpuRunningState = false; static volatile bool EmuRunningState = false; -static std::mutex m_csHWVidOccupied; // Most of this array is unlikely to be faulted in... static u8 s_fifo_aux_data[FIFO_SIZE]; @@ -86,15 +85,12 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock) { SyncGPU(SYNC_GPU_OTHER); EmulatorState(false); - if (!Core::IsGPUThread()) - m_csHWVidOccupied.lock(); + FlushGpu(); } else { if (unpauseOnUnlock) EmulatorState(true); - if (!Core::IsGPUThread()) - m_csHWVidOccupied.unlock(); } } @@ -273,7 +269,6 @@ void ResetVideoBuffer() // Purpose: Keep the Core HW updated about the CPU-GPU distance void RunGpuLoop() { - std::lock_guard lk(m_csHWVidOccupied); GpuRunningState = true; SCPFifoStruct &fifo = CommandProcessor::fifo; u32 cyclesExecuted = 0; @@ -285,9 +280,10 @@ void RunGpuLoop() { g_video_backend->PeekMessages(); - AsyncRequests::GetInstance()->PullEvents(); - if (g_use_deterministic_gpu_thread) + if (g_use_deterministic_gpu_thread && EmuRunningState) { + AsyncRequests::GetInstance()->PullEvents(); + // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder. u8* seen_ptr = s_video_buffer_seen_ptr; u8* write_ptr = s_video_buffer_write_ptr; @@ -303,8 +299,10 @@ void RunGpuLoop() } } } - else + else if (EmuRunningState) { + AsyncRequests::GetInstance()->PullEvents(); + CommandProcessor::SetCPStatusFromGPU(); if (!fifo.isGpuReadingData) @@ -315,7 +313,7 @@ void RunGpuLoop() bool run_loop = true; // check if we are able to run this buffer - while (GpuRunningState && EmuRunningState && run_loop && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) + while (run_loop && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint()) { fifo.isGpuReadingData = true; @@ -365,33 +363,19 @@ void RunGpuLoop() s_gpu_is_pending.Clear(); s_gpu_done_event.Set(); - if (EmuRunningState) + if (s_gpu_is_running.IsSet()) { - if (s_gpu_is_running.IsSet()) + if (CommandProcessor::s_gpuMaySleep.IsSet()) { - if (CommandProcessor::s_gpuMaySleep.IsSet()) - { - // Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop - s_gpu_is_pending.Set(); - s_gpu_is_running.Clear(); - CommandProcessor::s_gpuMaySleep.Clear(); - } - } - else - { - s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100)); + // Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop + s_gpu_is_pending.Set(); + s_gpu_is_running.Clear(); + CommandProcessor::s_gpuMaySleep.Clear(); } } else { - // While the emu is paused, we still handle async requests then sleep. - while (!EmuRunningState) - { - g_video_backend->PeekMessages(); - m_csHWVidOccupied.unlock(); - Common::SleepCurrentThread(1); - m_csHWVidOccupied.lock(); - } + s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100)); } } // wake up SyncGPU if we were interrupted @@ -472,7 +456,7 @@ void RunGpu() void Fifo_UpdateWantDeterminism(bool want) { - // We are paused (or not running at all yet) and have m_csHWVidOccupied, so + // We are paused (or not running at all yet), so // it should be safe to change this. const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter; bool gpu_thread = false;