Merge pull request #2172 from degasus/block_gpu_thread

Block gpu thread
This commit is contained in:
Markus Wick 2015-04-06 13:24:17 +02:00
commit 4669b50e23
12 changed files with 118 additions and 96 deletions

View File

@ -480,11 +480,8 @@ void Idle()
//When the FIFO is processing data we must not advance because in this way
//the VI will be desynchronized. So, We are waiting until the FIFO finish and
//while we process only the events required by the FIFO.
while (g_video_backend->Video_IsPossibleWaitingSetDrawDone())
{
ProcessFifoWaitEvents();
Common::YieldCPU();
}
ProcessFifoWaitEvents();
g_video_backend->Video_Sync();
}
idledCycles += DowncountToCycles(PowerPC::ppcState.downcount);

View File

@ -199,6 +199,9 @@ static void PatchEngineCallback(u64 userdata, int cyclesLate)
static void ThrottleCallback(u64 last_time, int cyclesLate)
{
// Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz.
CommandProcessor::s_gpuMaySleep.Set();
u32 time = Common::Timer::GetTimeMs();
int diff = (u32)last_time - time;

View File

@ -360,11 +360,6 @@ void VideoSoftware::Video_GatherPipeBursted()
SWCommandProcessor::GatherPipeBursted();
}
bool VideoSoftware::Video_IsPossibleWaitingSetDrawDone()
{
return false;
}
void VideoSoftware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)
{
SWCommandProcessor::RegisterMMIO(mmio, base);

View File

@ -45,7 +45,7 @@ class VideoSoftware : public VideoBackend
void Video_SetRendering(bool bEnabled) override;
void Video_GatherPipeBursted() override;
bool Video_IsPossibleWaitingSetDrawDone() override;
void Video_Sync() override {}
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;

View File

@ -1,4 +1,5 @@
#include "VideoCommon/AsyncRequests.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/RenderBase.h"
AsyncRequests AsyncRequests::s_singleton;
@ -49,6 +50,7 @@ void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking)
m_queue.push(event);
RunGpu();
if (blocking)
{
m_cond.wait(lock, [this]{return m_queue.empty();});

View File

@ -40,12 +40,13 @@ static u16 m_bboxright;
static u16 m_bboxbottom;
static u16 m_tokenReg;
volatile bool isPossibleWaitingSetDrawDone = false;
volatile bool interruptSet= false;
volatile bool interruptWaiting= false;
volatile bool interruptTokenWaiting = false;
volatile bool interruptFinishWaiting = false;
Common::Flag s_gpuMaySleep;
volatile u32 VITicks = CommandProcessor::m_cpClockOrigin;
static bool IsOnThread()
@ -70,7 +71,6 @@ void DoState(PointerWrap &p)
p.Do(m_tokenReg);
p.Do(fifo);
p.Do(isPossibleWaitingSetDrawDone);
p.Do(interruptSet);
p.Do(interruptWaiting);
p.Do(interruptTokenWaiting);
@ -123,8 +123,6 @@ void Init()
interruptFinishWaiting = false;
interruptTokenWaiting = false;
isPossibleWaitingSetDrawDone = false;
et_UpdateInterrupts = CoreTiming::RegisterEvent("CPInterrupt", UpdateInterrupts_Wrapper);
}
@ -319,13 +317,10 @@ void GatherPipeBursted()
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase) &&
fifo.CPReadWriteDistance > 0)
{
ProcessFifoAllDistance();
FlushGpu();
}
}
else
{
RunGpu();
}
RunGpu();
return;
}
@ -375,6 +370,7 @@ void UpdateInterrupts(u64 userdata)
}
CoreTiming::ForceExceptionCheck(0);
interruptWaiting = false;
RunGpu();
}
void UpdateInterruptsFromVideoBackend(u64 userdata)
@ -470,15 +466,6 @@ void SetCPStatusFromCPU()
}
}
void ProcessFifoAllDistance()
{
if (IsOnThread())
{
while (!interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
Common::YieldCPU();
}
}
void ProcessFifoEvents()
{
if (IsOnThread() && (interruptWaiting || interruptFinishWaiting || interruptTokenWaiting))
@ -520,7 +507,7 @@ void SetCpControlRegister()
if (fifo.bFF_GPReadEnable && !m_CPCtrlReg.GPReadEnable)
{
fifo.bFF_GPReadEnable = m_CPCtrlReg.GPReadEnable;
while (fifo.isGpuReadingData) Common::YieldCPU();
FlushGpu();
}
else
{
@ -551,5 +538,7 @@ void Update()
if (fifo.isGpuReadingData)
Common::AtomicAdd(VITicks, SystemTimers::GetTicksPerSecond() / 10000);
RunGpu();
}
} // end of namespace CommandProcessor

View File

@ -5,6 +5,7 @@
#pragma once
#include "Common/CommonTypes.h"
#include "Common/Flag.h"
#include "VideoCommon/VideoBackendBase.h"
class PointerWrap;
@ -17,11 +18,11 @@ namespace CommandProcessor
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
extern volatile bool interruptSet;
extern volatile bool interruptWaiting;
extern volatile bool interruptTokenWaiting;
extern volatile bool interruptFinishWaiting;
extern Common::Flag s_gpuMaySleep;
// internal hardware addresses
enum
@ -145,7 +146,6 @@ void UpdateInterruptsFromVideoBackend(u64 userdata);
void SetCpClearRegister();
void SetCpControlRegister();
void SetCpStatusRegister();
void ProcessFifoAllDistance();
void ProcessFifoEvents();
void Update();

View File

@ -5,6 +5,7 @@
#include "Common/Atomic.h"
#include "Common/ChunkFile.h"
#include "Common/CPUDetect.h"
#include "Common/Event.h"
#include "Common/FPURoundMode.h"
#include "Common/MemoryUtil.h"
#include "Common/Thread.h"
@ -29,7 +30,6 @@ bool g_bSkipCurrentFrame = false;
static volatile bool GpuRunningState = false;
static volatile bool EmuRunningState = false;
static std::mutex m_csHWVidOccupied;
// Most of this array is unlikely to be faulted in...
static u8 s_fifo_aux_data[FIFO_SIZE];
@ -58,6 +58,12 @@ static u8* s_video_buffer_pp_read_ptr;
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static Common::Flag s_gpu_is_running; // If this one is set, the gpu loop will be called at least once again
static Common::Event s_gpu_new_work_event;
static Common::Flag s_gpu_is_pending; // If this one is set, there might still be work to do
static Common::Event s_gpu_done_event;
void Fifo_DoState(PointerWrap &p)
{
p.DoArray(s_video_buffer, FIFO_SIZE);
@ -79,16 +85,12 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
{
SyncGPU(SYNC_GPU_OTHER);
EmulatorState(false);
if (!Core::IsGPUThread())
m_csHWVidOccupied.lock();
_dbg_assert_(COMMON, !CommandProcessor::fifo.isGpuReadingData);
FlushGpu();
}
else
{
if (unpauseOnUnlock)
EmulatorState(true);
if (!Core::IsGPUThread())
m_csHWVidOccupied.unlock();
}
}
@ -127,17 +129,18 @@ void ExitGpuLoop()
{
// This should break the wait loop in CPU thread
CommandProcessor::fifo.bFF_GPReadEnable = false;
SCPFifoStruct &fifo = CommandProcessor::fifo;
while (fifo.isGpuReadingData)
Common::YieldCPU();
FlushGpu();
// Terminate GPU thread loop
GpuRunningState = false;
EmuRunningState = true;
s_gpu_new_work_event.Set();
}
void EmulatorState(bool running)
{
EmuRunningState = running;
s_gpu_new_work_event.Set();
}
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
@ -266,15 +269,10 @@ void ResetVideoBuffer()
// Purpose: Keep the Core HW updated about the CPU-GPU distance
void RunGpuLoop()
{
std::lock_guard<std::mutex> lk(m_csHWVidOccupied);
GpuRunningState = true;
SCPFifoStruct &fifo = CommandProcessor::fifo;
u32 cyclesExecuted = 0;
// If the host CPU has only two cores, idle loop instead of busy loop
// This allows a system that we are maxing out in dual core mode to do other things
bool yield_cpu = cpu_info.num_cores <= 2;
AsyncRequests::GetInstance()->SetEnable(true);
AsyncRequests::GetInstance()->SetPassthrough(false);
@ -282,9 +280,10 @@ void RunGpuLoop()
{
g_video_backend->PeekMessages();
AsyncRequests::GetInstance()->PullEvents();
if (g_use_deterministic_gpu_thread)
if (g_use_deterministic_gpu_thread && EmuRunningState)
{
AsyncRequests::GetInstance()->PullEvents();
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr;
@ -300,17 +299,23 @@ void RunGpuLoop()
}
}
}
else
else if (EmuRunningState)
{
AsyncRequests::GetInstance()->PullEvents();
CommandProcessor::SetCPStatusFromGPU();
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
if (!fifo.isGpuReadingData)
{
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
}
bool run_loop = true;
// check if we are able to run this buffer
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
while (run_loop && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
fifo.isGpuReadingData = true;
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
{
@ -338,6 +343,10 @@ void RunGpuLoop()
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
}
else
{
run_loop = false;
}
CommandProcessor::SetCPStatusFromGPU();
@ -345,30 +354,28 @@ void RunGpuLoop()
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
AsyncRequests::GetInstance()->PullEvents();
CommandProcessor::isPossibleWaitingSetDrawDone = false;
}
fifo.isGpuReadingData = false;
// don't release the GPU running state on sync GPU waits
fifo.isGpuReadingData = !run_loop;
}
if (EmuRunningState)
s_gpu_is_pending.Clear();
s_gpu_done_event.Set();
if (s_gpu_is_running.IsSet())
{
// NOTE(jsd): Calling SwitchToThread() on Windows 7 x64 is a hot spot, according to profiler.
// See https://docs.google.com/spreadsheet/ccc?key=0Ah4nh0yGtjrgdFpDeF9pS3V6RUotRVE3S3J4TGM1NlE#gid=0
// for benchmark details.
if (yield_cpu)
Common::YieldCPU();
if (CommandProcessor::s_gpuMaySleep.IsSet())
{
// Reset the atomic flag. But as the CPU thread might have pushed some new data, we have to rerun the GPU loop
s_gpu_is_pending.Set();
s_gpu_is_running.Clear();
CommandProcessor::s_gpuMaySleep.Clear();
}
}
else
{
// While the emu is paused, we still handle async requests then sleep.
while (!EmuRunningState)
{
g_video_backend->PeekMessages();
m_csHWVidOccupied.unlock();
Common::SleepCurrentThread(1);
m_csHWVidOccupied.lock();
}
s_gpu_new_work_event.WaitFor(std::chrono::milliseconds(100));
}
}
// wake up SyncGPU if we were interrupted
@ -377,6 +384,17 @@ void RunGpuLoop()
AsyncRequests::GetInstance()->SetPassthrough(true);
}
void FlushGpu()
{
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
return;
while (s_gpu_is_running.IsSet() || s_gpu_is_pending.IsSet())
{
CommandProcessor::s_gpuMaySleep.Set();
s_gpu_done_event.Wait();
}
}
bool AtBreakpoint()
{
@ -386,41 +404,59 @@ bool AtBreakpoint()
void RunGpu()
{
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread &&
!g_use_deterministic_gpu_thread)
return;
SCPFifoStruct &fifo = CommandProcessor::fifo;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
// execute GPU
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
{
if (g_use_deterministic_gpu_thread)
bool reset_simd_state = false;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
if (g_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
}
else
{
if (!reset_simd_state)
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
reset_simd_state = true;
}
ReadDataFromFifo(fifo.CPReadPointer);
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
}
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
if (fifo.CPReadPointer == fifo.CPEnd)
fifo.CPReadPointer = fifo.CPBase;
else
fifo.CPReadPointer += 32;
fifo.CPReadWriteDistance -= 32;
}
else
CommandProcessor::SetCPStatusFromGPU();
if (reset_simd_state)
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(fifo.CPReadPointer);
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
FPURoundMode::LoadSIMDState();
}
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
if (fifo.CPReadPointer == fifo.CPEnd)
fifo.CPReadPointer = fifo.CPBase;
else
fifo.CPReadPointer += 32;
fifo.CPReadWriteDistance -= 32;
}
CommandProcessor::SetCPStatusFromGPU();
// wake up GPU thread
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && !s_gpu_is_running.IsSet())
{
s_gpu_is_pending.Set();
s_gpu_is_running.Set();
s_gpu_new_work_event.Set();
}
}
void Fifo_UpdateWantDeterminism(bool want)
{
// We are paused (or not running at all yet) and have m_csHWVidOccupied, so
// We are paused (or not running at all yet), so
// it should be safe to change this.
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
bool gpu_thread = false;

View File

@ -41,6 +41,7 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
void PushFifoAuxBuffer(void* ptr, size_t size);
void* PopFifoAuxBuffer(size_t size);
void FlushGpu();
void RunGpu();
void RunGpuLoop();
void ExitGpuLoop();

View File

@ -233,9 +233,9 @@ void VideoBackendHardware::Video_GatherPipeBursted()
CommandProcessor::GatherPipeBursted();
}
bool VideoBackendHardware::Video_IsPossibleWaitingSetDrawDone()
void VideoBackendHardware::Video_Sync()
{
return CommandProcessor::isPossibleWaitingSetDrawDone;
FlushGpu();
}
void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)

View File

@ -287,7 +287,6 @@ void SetFinish_OnMainThread(u64 userdata, int cyclesLate)
Common::AtomicStore(*(volatile u32*)&g_bSignalFinishInterrupt, 1);
UpdateInterrupts();
CommandProcessor::interruptFinishWaiting = false;
CommandProcessor::isPossibleWaitingSetDrawDone = false;
}
// SetToken

View File

@ -99,7 +99,7 @@ public:
virtual void Video_GatherPipeBursted() = 0;
virtual bool Video_IsPossibleWaitingSetDrawDone() = 0;
virtual void Video_Sync() = 0;
// Registers MMIO handlers for the CommandProcessor registers.
virtual void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) = 0;
@ -148,7 +148,7 @@ class VideoBackendHardware : public VideoBackend
void Video_GatherPipeBursted() override;
bool Video_IsPossibleWaitingSetDrawDone() override;
void Video_Sync() override;
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;