Merge pull request #2172 from degasus/block_gpu_thread
Block gpu thread
This commit is contained in:
commit
4669b50e23
|
@ -480,11 +480,8 @@ void Idle()
|
|||
//When the FIFO is processing data we must not advance because in this way
|
||||
//the VI will be desynchronized. So, We are waiting until the FIFO finish and
|
||||
//while we process only the events required by the FIFO.
|
||||
while (g_video_backend->Video_IsPossibleWaitingSetDrawDone())
|
||||
{
|
||||
ProcessFifoWaitEvents();
|
||||
Common::YieldCPU();
|
||||
}
|
||||
ProcessFifoWaitEvents();
|
||||
g_video_backend->Video_Sync();
|
||||
}
|
||||
|
||||
idledCycles += DowncountToCycles(PowerPC::ppcState.downcount);
|
||||
|
|
|
@ -199,6 +199,9 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate)
|
|||
|
||||
static void ThrottleCallback(u64 last_time, int cyclesLate)
|
||||
{
|
||||
// Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz.
|
||||
CommandProcessor::s_gpuMaySleep.Set();
|
||||
|
||||
u32 time = Common::Timer::GetTimeMs();
|
||||
|
||||
int diff = (u32)last_time - time;
|
||||
|
|
|
@ -360,11 +360,6 @@ void VideoSoftware::Video_GatherPipeBursted()
|
|||
SWCommandProcessor::GatherPipeBursted();
|
||||
}
|
||||
|
||||
bool VideoSoftware::Video_IsPossibleWaitingSetDrawDone()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void VideoSoftware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)
|
||||
{
|
||||
SWCommandProcessor::RegisterMMIO(mmio, base);
|
||||
|
|
|
@ -45,7 +45,7 @@ class VideoSoftware : public VideoBackend
|
|||
void Video_SetRendering(bool bEnabled) override;
|
||||
|
||||
void Video_GatherPipeBursted() override;
|
||||
bool Video_IsPossibleWaitingSetDrawDone() override;
|
||||
void Video_Sync() override {}
|
||||
|
||||
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "VideoCommon/AsyncRequests.h"
|
||||
#include "VideoCommon/Fifo.h"
|
||||
#include "VideoCommon/RenderBase.h"
|
||||
|
||||
AsyncRequests AsyncRequests::s_singleton;
|
||||
|
@ -49,6 +50,7 @@ void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking)
|
|||
|
||||
m_queue.push(event);
|
||||
|
||||
RunGpu();
|
||||
if (blocking)
|
||||
{
|
||||
m_cond.wait(lock, [this]{return m_queue.empty();});
|
||||
|
|
|
@ -40,12 +40,13 @@ static u16 m_bboxright;
|
|||
static u16 m_bboxbottom;
|
||||
static u16 m_tokenReg;
|
||||
|
||||
volatile bool isPossibleWaitingSetDrawDone = false;
|
||||
volatile bool interruptSet= false;
|
||||
volatile bool interruptWaiting= false;
|
||||
volatile bool interruptTokenWaiting = false;
|
||||
volatile bool interruptFinishWaiting = false;
|
||||
|
||||
Common::Flag s_gpuMaySleep;
|
||||
|
||||
volatile u32 VITicks = CommandProcessor::m_cpClockOrigin;
|
||||
|
||||
static bool IsOnThread()
|
||||
|
@ -70,7 +71,6 @@ void DoState(PointerWrap &p)
|
|||
p.Do(m_tokenReg);
|
||||
p.Do(fifo);
|
||||
|
||||
p.Do(isPossibleWaitingSetDrawDone);
|
||||
p.Do(interruptSet);
|
||||
p.Do(interruptWaiting);
|
||||
p.Do(interruptTokenWaiting);
|
||||
|
@ -123,8 +123,6 @@ void Init()
|
|||
interruptFinishWaiting = false;
|
||||
interruptTokenWaiting = false;
|
||||
|
||||
isPossibleWaitingSetDrawDone = false;
|
||||
|
||||
et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper);
|
||||
}
|
||||
|
||||
|
@ -319,13 +317,10 @@ void GatherPipeBursted()
|
|||
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase) &&
|
||||
fifo.CPReadWriteDistance > 0)
|
||||
{
|
||||
ProcessFifoAllDistance();
|
||||
FlushGpu();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RunGpu();
|
||||
}
|
||||
RunGpu();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -375,6 +370,7 @@ void UpdateInterrupts(u64 userdata)
|
|||
}
|
||||
CoreTiming::ForceExceptionCheck(0);
|
||||
interruptWaiting = false;
|
||||
RunGpu();
|
||||
}
|
||||
|
||||
void UpdateInterruptsFromVideoBackend(u64 userdata)
|
||||
|
@ -470,15 +466,6 @@ void SetCPStatusFromCPU()
|
|||
}
|
||||
}
|
||||
|
||||
void ProcessFifoAllDistance()
|
||||
{
|
||||
if (IsOnThread())
|
||||
{
|
||||
while (!interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||
Common::YieldCPU();
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessFifoEvents()
|
||||
{
|
||||
if (IsOnThread() && (interruptWaiting || interruptFinishWaiting || interruptTokenWaiting))
|
||||
|
@ -520,7 +507,7 @@ void SetCpControlRegister()
|
|||
if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable)
|
||||
{
|
||||
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
|
||||
while (fifo.isGpuReadingData) Common::YieldCPU();
|
||||
FlushGpu();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -551,5 +538,7 @@ void Update()
|
|||
|
||||
if (fifo.isGpuReadingData)
|
||||
Common::AtomicAdd(VITicks, SystemTimers::GetTicksPerSecond() / 10000);
|
||||
|
||||
RunGpu();
|
||||
}
|
||||
} // end of namespace CommandProcessor
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Flag.h"
|
||||
#include "VideoCommon/VideoBackendBase.h"
|
||||
|
||||
class PointerWrap;
|
||||
|
@ -17,11 +18,11 @@ namespace CommandProcessor
|
|||
|
||||
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
|
||||
|
||||
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
|
||||
extern volatile bool interruptSet;
|
||||
extern volatile bool interruptWaiting;
|
||||
extern volatile bool interruptTokenWaiting;
|
||||
extern volatile bool interruptFinishWaiting;
|
||||
extern Common::Flag s_gpuMaySleep;
|
||||
|
||||
// internal hardware addresses
|
||||
enum
|
||||
|
@ -145,7 +146,6 @@ void UpdateInterruptsFromVideoBackend(u64 userdata);
|
|||
void SetCpClearRegister();
|
||||
void SetCpControlRegister();
|
||||
void SetCpStatusRegister();
|
||||
void ProcessFifoAllDistance();
|
||||
void ProcessFifoEvents();
|
||||
|
||||
void Update();
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include "Common/Atomic.h"
|
||||
#include "Common/ChunkFile.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/Event.h"
|
||||
#include "Common/FPURoundMode.h"
|
||||
#include "Common/MemoryUtil.h"
|
||||
#include "Common/Thread.h"
|
||||
|
@ -29,7 +30,6 @@ bool g_bSkipCurrentFrame = false;
|
|||
|
||||
static volatile bool GpuRunningState = false;
|
||||
static volatile bool EmuRunningState = false;
|
||||
static std::mutex m_csHWVidOccupied;
|
||||
|
||||
// Most of this array is unlikely to be faulted in...
|
||||
static u8 s_fifo_aux_data[FIFO_SIZE];
|
||||
|
@ -58,6 +58,12 @@ static u8* s_video_buffer_pp_read_ptr;
|
|||
// polls, it's just atomic.
|
||||
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
|
||||
|
||||
static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again
|
||||
static Common::Event s_gpu_new_work_event;
|
||||
|
||||
static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do
|
||||
static Common::Event s_gpu_done_event;
|
||||
|
||||
void Fifo_DoState(PointerWrap &p)
|
||||
{
|
||||
p.DoArray(s_video_buffer, FIFO_SIZE);
|
||||
|
@ -79,16 +85,12 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
|||
{
|
||||
SyncGPU(SYNC_GPU_OTHER);
|
||||
EmulatorState(false);
|
||||
if (!Core::IsGPUThread())
|
||||
m_csHWVidOccupied.lock();
|
||||
_dbg_assert_(COMMON, !CommandProcessor::fifo.isGpuReadingData);
|
||||
FlushGpu();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (unpauseOnUnlock)
|
||||
EmulatorState(true);
|
||||
if (!Core::IsGPUThread())
|
||||
m_csHWVidOccupied.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -127,17 +129,18 @@ void ExitGpuLoop()
|
|||
{
|
||||
// This should break the wait loop in CPU thread
|
||||
CommandProcessor::fifo.bFF_GPReadEnable = false;
|
||||
SCPFifoStruct &fifo = CommandProcessor::fifo;
|
||||
while (fifo.isGpuReadingData)
|
||||
Common::YieldCPU();
|
||||
FlushGpu();
|
||||
|
||||
// Terminate GPU thread loop
|
||||
GpuRunningState = false;
|
||||
EmuRunningState = true;
|
||||
s_gpu_new_work_event.Set();
|
||||
}
|
||||
|
||||
void EmulatorState(bool running)
|
||||
{
|
||||
EmuRunningState = running;
|
||||
s_gpu_new_work_event.Set();
|
||||
}
|
||||
|
||||
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
|
||||
|
@ -266,15 +269,10 @@ void ResetVideoBuffer()
|
|||
// Purpose: Keep the Core HW updated about the CPU-GPU distance
|
||||
void RunGpuLoop()
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(m_csHWVidOccupied);
|
||||
GpuRunningState = true;
|
||||
SCPFifoStruct &fifo = CommandProcessor::fifo;
|
||||
u32 cyclesExecuted = 0;
|
||||
|
||||
// If the host CPU has only two cores, idle loop instead of busy loop
|
||||
// This allows a system that we are maxing out in dual core mode to do other things
|
||||
bool yield_cpu = cpu_info.num_cores <= 2;
|
||||
|
||||
AsyncRequests::GetInstance()->SetEnable(true);
|
||||
AsyncRequests::GetInstance()->SetPassthrough(false);
|
||||
|
||||
|
@ -282,9 +280,10 @@ void RunGpuLoop()
|
|||
{
|
||||
g_video_backend->PeekMessages();
|
||||
|
||||
AsyncRequests::GetInstance()->PullEvents();
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
if (g_use_deterministic_gpu_thread && EmuRunningState)
|
||||
{
|
||||
AsyncRequests::GetInstance()->PullEvents();
|
||||
|
||||
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
|
||||
u8* seen_ptr = s_video_buffer_seen_ptr;
|
||||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
|
@ -300,17 +299,23 @@ void RunGpuLoop()
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
else if (EmuRunningState)
|
||||
{
|
||||
AsyncRequests::GetInstance()->PullEvents();
|
||||
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
||||
if (!fifo.isGpuReadingData)
|
||||
{
|
||||
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
||||
}
|
||||
|
||||
bool run_loop = true;
|
||||
|
||||
// check if we are able to run this buffer
|
||||
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||
while (run_loop && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||
{
|
||||
fifo.isGpuReadingData = true;
|
||||
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
|
||||
|
||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
|
||||
{
|
||||
|
@ -338,6 +343,10 @@ void RunGpuLoop()
|
|||
if ((write_ptr - s_video_buffer_read_ptr) == 0)
|
||||
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
|
||||
}
|
||||
else
|
||||
{
|
||||
run_loop = false;
|
||||
}
|
||||
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
|
@ -345,30 +354,28 @@ void RunGpuLoop()
|
|||
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
|
||||
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
|
||||
AsyncRequests::GetInstance()->PullEvents();
|
||||
CommandProcessor::isPossibleWaitingSetDrawDone = false;
|
||||
}
|
||||
|
||||
fifo.isGpuReadingData = false;
|
||||
// don't release the GPU running state on sync GPU waits
|
||||
fifo.isGpuReadingData = !run_loop;
|
||||
}
|
||||
|
||||
if (EmuRunningState)
|
||||
s_gpu_is_pending.Clear();
|
||||
s_gpu_done_event.Set();
|
||||
|
||||
if (s_gpu_is_running.IsSet())
|
||||
{
|
||||
// NOTE(jsd): Calling SwitchToThread() on Windows 7 x64 is a hot spot, according to profiler.
|
||||
// See https://docs.google.com/spreadsheet/ccc?key=0Ah4nh0yGtjrgdFpDeF9pS3V6RUotRVE3S3J4TGM1NlE#gid=0
|
||||
// for benchmark details.
|
||||
if (yield_cpu)
|
||||
Common::YieldCPU();
|
||||
if (CommandProcessor::s_gpuMaySleep.IsSet())
|
||||
{
|
||||
// Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop
|
||||
s_gpu_is_pending.Set();
|
||||
s_gpu_is_running.Clear();
|
||||
CommandProcessor::s_gpuMaySleep.Clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// While the emu is paused, we still handle async requests then sleep.
|
||||
while (!EmuRunningState)
|
||||
{
|
||||
g_video_backend->PeekMessages();
|
||||
m_csHWVidOccupied.unlock();
|
||||
Common::SleepCurrentThread(1);
|
||||
m_csHWVidOccupied.lock();
|
||||
}
|
||||
s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
// wake up SyncGPU if we were interrupted
|
||||
|
@ -377,6 +384,17 @@ void RunGpuLoop()
|
|||
AsyncRequests::GetInstance()->SetPassthrough(true);
|
||||
}
|
||||
|
||||
void FlushGpu()
|
||||
{
|
||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
|
||||
return;
|
||||
|
||||
while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet())
|
||||
{
|
||||
CommandProcessor::s_gpuMaySleep.Set();
|
||||
s_gpu_done_event.Wait();
|
||||
}
|
||||
}
|
||||
|
||||
bool AtBreakpoint()
|
||||
{
|
||||
|
@ -386,41 +404,59 @@ bool AtBreakpoint()
|
|||
|
||||
void RunGpu()
|
||||
{
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread &&
|
||||
!g_use_deterministic_gpu_thread)
|
||||
return;
|
||||
|
||||
SCPFifoStruct &fifo = CommandProcessor::fifo;
|
||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
|
||||
|
||||
// execute GPU
|
||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
|
||||
{
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
bool reset_simd_state = false;
|
||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
|
||||
{
|
||||
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
{
|
||||
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!reset_simd_state)
|
||||
{
|
||||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
reset_simd_state = true;
|
||||
}
|
||||
ReadDataFromFifo(fifo.CPReadPointer);
|
||||
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
|
||||
}
|
||||
|
||||
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
|
||||
|
||||
if (fifo.CPReadPointer == fifo.CPEnd)
|
||||
fifo.CPReadPointer = fifo.CPBase;
|
||||
else
|
||||
fifo.CPReadPointer += 32;
|
||||
|
||||
fifo.CPReadWriteDistance -= 32;
|
||||
}
|
||||
else
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
if (reset_simd_state)
|
||||
{
|
||||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
ReadDataFromFifo(fifo.CPReadPointer);
|
||||
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
|
||||
FPURoundMode::LoadSIMDState();
|
||||
}
|
||||
|
||||
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
|
||||
|
||||
if (fifo.CPReadPointer == fifo.CPEnd)
|
||||
fifo.CPReadPointer = fifo.CPBase;
|
||||
else
|
||||
fifo.CPReadPointer += 32;
|
||||
|
||||
fifo.CPReadWriteDistance -= 32;
|
||||
}
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
// wake up GPU thread
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet())
|
||||
{
|
||||
s_gpu_is_pending.Set();
|
||||
s_gpu_is_running.Set();
|
||||
s_gpu_new_work_event.Set();
|
||||
}
|
||||
}
|
||||
|
||||
void Fifo_UpdateWantDeterminism(bool want)
|
||||
{
|
||||
// We are paused (or not running at all yet) and have m_csHWVidOccupied, so
|
||||
// We are paused (or not running at all yet), so
|
||||
// it should be safe to change this.
|
||||
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
|
||||
bool gpu_thread = false;
|
||||
|
|
|
@ -41,6 +41,7 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
|
|||
void PushFifoAuxBuffer(void* ptr, size_t size);
|
||||
void* PopFifoAuxBuffer(size_t size);
|
||||
|
||||
void FlushGpu();
|
||||
void RunGpu();
|
||||
void RunGpuLoop();
|
||||
void ExitGpuLoop();
|
||||
|
|
|
@ -233,9 +233,9 @@ void VideoBackendHardware::Video_GatherPipeBursted()
|
|||
CommandProcessor::GatherPipeBursted();
|
||||
}
|
||||
|
||||
bool VideoBackendHardware::Video_IsPossibleWaitingSetDrawDone()
|
||||
void VideoBackendHardware::Video_Sync()
|
||||
{
|
||||
return CommandProcessor::isPossibleWaitingSetDrawDone;
|
||||
FlushGpu();
|
||||
}
|
||||
|
||||
void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)
|
||||
|
|
|
@ -287,7 +287,6 @@ void SetFinish_OnMainThread(u64 userdata, int cyclesLate)
|
|||
Common::AtomicStore(*(volatile u32*)&g_bSignalFinishInterrupt, 1);
|
||||
UpdateInterrupts();
|
||||
CommandProcessor::interruptFinishWaiting = false;
|
||||
CommandProcessor::isPossibleWaitingSetDrawDone = false;
|
||||
}
|
||||
|
||||
// SetToken
|
||||
|
|
|
@ -99,7 +99,7 @@ public:
|
|||
|
||||
virtual void Video_GatherPipeBursted() = 0;
|
||||
|
||||
virtual bool Video_IsPossibleWaitingSetDrawDone() = 0;
|
||||
virtual void Video_Sync() = 0;
|
||||
|
||||
// Registers MMIO handlers for the CommandProcessor registers.
|
||||
virtual void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) = 0;
|
||||
|
@ -148,7 +148,7 @@ class VideoBackendHardware : public VideoBackend
|
|||
|
||||
void Video_GatherPipeBursted() override;
|
||||
|
||||
bool Video_IsPossibleWaitingSetDrawDone() override;
|
||||
void Video_Sync() override;
|
||||
|
||||
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;
|
||||
|
||||
|
|
Loading…
Reference in New Issue