Merge pull request #2533 from degasus/syncgpu

Fifo: Rewrite SyncGPU
This commit is contained in:
Jules Blok 2015-06-09 09:43:26 +02:00
commit d5788f75a3
12 changed files with 126 additions and 110 deletions

View File

@ -54,7 +54,7 @@ public:
void Wait()
{
// already done
if (m_stopped.IsSet() || m_running_state.load() <= STATE_DONE)
if (IsDone())
return;
// notifying this event will only wake up one thread, so use a mutex here to
@ -63,7 +63,7 @@ public:
std::lock_guard<std::mutex> lk(m_wait_lock);
// Wait for the worker thread to finish.
while (!m_stopped.IsSet() && m_running_state.load() > STATE_DONE)
while (!IsDone())
{
m_done_event.Wait();
}
@ -183,6 +183,11 @@ public:
return !m_stopped.IsSet() && !m_shutdown.IsSet();
}
bool IsDone() const
{
return m_stopped.IsSet() || m_running_state.load() <= STATE_DONE;
}
// This function should be triggered regularly over time so
// that we will fall back from the busy loop to sleeping.
void AllowSleep()

View File

@ -182,6 +182,10 @@ void SConfig::SaveCoreSettings(IniFile& ini)
core->Set("DSPHLE", m_LocalCoreStartupParameter.bDSPHLE);
core->Set("SkipIdle", m_LocalCoreStartupParameter.bSkipIdle);
core->Set("SyncOnSkipIdle", m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack);
core->Set("SyncGPU", m_LocalCoreStartupParameter.bSyncGPU);
core->Set("SyncGpuMaxDistance", m_LocalCoreStartupParameter.iSyncGpuMaxDistance);
core->Set("SyncGpuMinDistance", m_LocalCoreStartupParameter.iSyncGpuMinDistance);
core->Set("SyncGpuOverclock", m_LocalCoreStartupParameter.fSyncGpuOverclock);
core->Set("DefaultISO", m_LocalCoreStartupParameter.m_strDefaultISO);
core->Set("DVDRoot", m_LocalCoreStartupParameter.m_strDVDRoot);
core->Set("Apploader", m_LocalCoreStartupParameter.m_strApploader);
@ -458,6 +462,9 @@ void SConfig::LoadCoreSettings(IniFile& ini)
core->Get("MMU", &m_LocalCoreStartupParameter.bMMU, false);
core->Get("BBDumpPort", &m_LocalCoreStartupParameter.iBBDumpPort, -1);
core->Get("SyncGPU", &m_LocalCoreStartupParameter.bSyncGPU, false);
core->Get("SyncGpuMaxDistance", &m_LocalCoreStartupParameter.iSyncGpuMaxDistance, 200000);
core->Get("SyncGpuMinDistance", &m_LocalCoreStartupParameter.iSyncGpuMinDistance, -200000);
core->Get("SyncGpuOverclock", &m_LocalCoreStartupParameter.fSyncGpuOverclock, 1.0);
core->Get("FastDiscSpeed", &m_LocalCoreStartupParameter.bFastDiscSpeed, false);
core->Get("DCBZ", &m_LocalCoreStartupParameter.bDCBZOFF, false);
core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default

View File

@ -184,9 +184,13 @@ struct SCoreStartupParameter
bool bMMU;
bool bDCBZOFF;
int iBBDumpPort;
bool bSyncGPU;
bool bFastDiscSpeed;
bool bSyncGPU;
int iSyncGpuMaxDistance;
int iSyncGpuMinDistance;
float fSyncGpuOverclock;
int SelectedLanguage;
bool bWii;

View File

@ -482,13 +482,13 @@ void Idle()
{
//DEBUG_LOG(POWERPC, "Idle");
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack && !SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack)
{
//When the FIFO is processing data we must not advance because in this way
//the VI will be desynchronized. So, We are waiting until the FIFO finish and
//while we process only the events required by the FIFO.
ProcessFifoWaitEvents();
g_video_backend->Video_Sync();
g_video_backend->Video_Sync(0);
}
idledCycles += DowncountToCycles(PowerPC::ppcState.downcount);

View File

@ -61,11 +61,9 @@ IPC_HLE_PERIOD: For the Wiimote this is the call schedule:
#include "Core/IPC_HLE/WII_IPC_HLE.h"
#include "Core/PowerPC/PowerPC.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/VideoBackendBase.h"
namespace SystemTimers
{
@ -81,15 +79,14 @@ static int et_IPC_HLE;
static int et_PatchEngine; // PatchEngine updates every 1/60th of a second by default
static int et_Throttle;
static u64 s_last_sync_gpu_tick;
// These are badly educated guesses
// Feel free to experiment. Set these in Init below.
static int
// This is a fixed value, don't change it
AUDIO_DMA_PERIOD,
// Regulates the speed of the Command Processor
CP_PERIOD,
// This is completely arbitrary. If we find that we need lower latency, we can just
// increase this number.
IPC_HLE_PERIOD;
@ -140,8 +137,12 @@ static void SICallback(u64 userdata, int cyclesLate)
static void CPCallback(u64 userdata, int cyclesLate)
{
CommandProcessor::Update();
CoreTiming::ScheduleEvent(CP_PERIOD - cyclesLate, et_CP);
u64 now = CoreTiming::GetTicks();
int next = g_video_backend->Video_Sync((int)(now - s_last_sync_gpu_tick));
s_last_sync_gpu_tick = now;
if (next > 0)
CoreTiming::ScheduleEvent(next, et_CP);
}
static void DecrementerCallback(u64 userdata, int cyclesLate)
@ -239,9 +240,6 @@ void Init()
// System internal sample rate is fixed at 32KHz * 4 (16bit Stereo) / 32 bytes DMA
AUDIO_DMA_PERIOD = CPU_CORE_CLOCK / (AudioInterface::GetAIDSampleRate() * 4 / 32);
// Emulated gekko <-> flipper bus speed ratio (CPU clock / flipper clock)
CP_PERIOD = GetTicksPerSecond() / 10000;
Common::Timer::IncreaseResolution();
// store and convert localtime at boot to timebase ticks
CoreTiming::SetFakeTBStartValue((u64)(CPU_CORE_CLOCK / TIMER_RATIO) * (u64)CEXIIPL::GetGCTime());
@ -253,7 +251,7 @@ void Init()
et_Dec = CoreTiming::RegisterEvent("DecCallback", DecrementerCallback);
et_VI = CoreTiming::RegisterEvent("VICallback", VICallback);
et_SI = CoreTiming::RegisterEvent("SICallback", SICallback);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
et_CP = CoreTiming::RegisterEvent("CPCallback", CPCallback);
et_DSP = CoreTiming::RegisterEvent("DSPCallback", DSPCallback);
et_AudioDMA = CoreTiming::RegisterEvent("AudioDMACallback", AudioDMACallback);
@ -266,8 +264,9 @@ void Init()
CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerFrame(), et_SI);
CoreTiming::ScheduleEvent(AUDIO_DMA_PERIOD, et_AudioDMA);
CoreTiming::ScheduleEvent(0, et_Throttle, Common::Timer::GetTimeMs());
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
CoreTiming::ScheduleEvent(CP_PERIOD, et_CP);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
CoreTiming::ScheduleEvent(0, et_CP);
s_last_sync_gpu_tick = CoreTiming::GetTicks();
CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerFrame(), et_PatchEngine);

View File

@ -49,7 +49,7 @@ class VideoSoftware : public VideoBackend
void Video_SetRendering(bool bEnabled) override;
void Video_GatherPipeBursted() override;
void Video_Sync() override {}
int Video_Sync(int ticks) override { return 0; }
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;

View File

@ -16,7 +16,6 @@
#include "Core/HW/Memmap.h"
#include "Core/HW/MMIO.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/HW/SystemTimers.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/PixelEngine.h"
@ -47,8 +46,6 @@ static std::atomic<bool> s_interrupt_waiting;
static std::atomic<bool> s_interrupt_token_waiting;
static std::atomic<bool> s_interrupt_finish_waiting;
static std::atomic<u32> s_vi_ticks(CommandProcessor::m_cpClockOrigin);
static bool IsOnThread()
{
return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
@ -546,30 +543,4 @@ void SetCpClearRegister()
{
}
void Update()
{
while (s_vi_ticks.load() > m_cpClockOrigin && fifo.isGpuReadingData && IsOnThread())
Common::YieldCPU();
if (fifo.isGpuReadingData)
s_vi_ticks.fetch_add(SystemTimers::GetTicksPerSecond() / 10000);
RunGpu();
}
u32 GetVITicks()
{
return s_vi_ticks.load();
}
void SetVITicks(u32 ticks)
{
s_vi_ticks.store(ticks);
}
void DecrementVITicks(u32 ticks)
{
s_vi_ticks.fetch_sub(ticks);
}
} // end of namespace CommandProcessor

View File

@ -121,9 +121,6 @@ union UCPClearReg
UCPClearReg(u16 _hex) {Hex = _hex; }
};
// Can be any number, low enough to not be below the number of clocks executed by the GPU per CP_PERIOD
const static u32 m_cpClockOrigin = 200000;
// Init
void Init();
void Shutdown();
@ -146,10 +143,4 @@ void SetCpControlRegister();
void SetCpStatusRegister();
void ProcessFifoEvents();
void Update();
u32 GetVITicks();
void SetVITicks(u32 ticks);
void DecrementVITicks(u32 ticks);
} // namespace CommandProcessor

View File

@ -61,6 +61,9 @@ static u8* s_video_buffer_pp_read_ptr;
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static std::atomic<int> s_sync_ticks;
static Common::Event s_sync_wakeup_event;
void Fifo_DoState(PointerWrap &p)
{
p.DoArray(s_video_buffer, FIFO_SIZE);
@ -99,7 +102,7 @@ void Fifo_Init()
ResetVideoBuffer();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
s_gpu_mainloop.Prepare();
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
s_sync_ticks.store(0);
}
void Fifo_Shutdown()
@ -282,6 +285,8 @@ void RunGpuLoop()
s_gpu_mainloop.Run(
[] {
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
g_video_backend->PeekMessages();
// Do nothing while paused
@ -310,20 +315,12 @@ void RunGpuLoop()
CommandProcessor::SetCPStatusFromGPU();
if (!fifo.isGpuReadingData)
{
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
}
bool run_loop = true;
// check if we are able to run this buffer
while (run_loop && !CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
fifo.isGpuReadingData = true;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
break;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin)
{
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
@ -336,37 +333,41 @@ void RunGpuLoop()
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && CommandProcessor::GetVITicks() >= cyclesExecuted)
CommandProcessor::DecrementVITicks(cyclesExecuted);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
}
else
{
run_loop = false;
}
CommandProcessor::SetCPStatusFromGPU();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
{
cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
int old = s_sync_ticks.fetch_sub(cyclesExecuted);
if (old > 0 && old - (int)cyclesExecuted <= 0)
s_sync_wakeup_event.Set();
}
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
AsyncRequests::GetInstance()->PullEvents();
}
// fast skip remaining GPU time if fifo is empty
if (s_sync_ticks.load() > 0)
{
int old = s_sync_ticks.exchange(0);
if (old > 0)
s_sync_wakeup_event.Set();
}
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
VertexManager::Flush();
// don't release the GPU running state on sync GPU waits
fifo.isGpuReadingData = !run_loop;
}
}, 100);
@ -376,7 +377,9 @@ void RunGpuLoop()
void FlushGpu()
{
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
if (!param.bCPUThread || g_use_deterministic_gpu_thread)
return;
s_gpu_mainloop.Wait();
@ -396,9 +399,10 @@ bool AtBreakpoint()
void RunGpu()
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
// execute GPU
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
if (!param.bCPUThread || g_use_deterministic_gpu_thread)
{
bool reset_simd_state = false;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
@ -438,7 +442,7 @@ void RunGpu()
}
// wake up GPU thread
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
if (param.bCPUThread)
{
s_gpu_mainloop.Wakeup();
}
@ -471,7 +475,7 @@ void Fifo_UpdateWantDeterminism(bool want)
break;
}
gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
gpu_thread = gpu_thread && param.bCPUThread;
if (g_use_deterministic_gpu_thread != gpu_thread)
{
@ -485,3 +489,40 @@ void Fifo_UpdateWantDeterminism(bool want)
}
}
}
int Fifo_Update(int ticks)
{
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
if (ticks == 0)
{
FlushGpu();
return param.iSyncGpuMaxDistance;
}
// GPU is sleeping, so no need for synchronization
if (s_gpu_mainloop.IsDone() || g_use_deterministic_gpu_thread)
{
if (s_sync_ticks.load() < 0)
{
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
}
return param.iSyncGpuMaxDistance;
}
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance)
{
while (s_sync_ticks.load() > 0)
{
s_sync_wakeup_event.Wait();
}
}
return param.iSyncGpuMaxDistance - s_sync_ticks.load();
}

View File

@ -50,3 +50,4 @@ void EmulatorState(bool running);
bool AtBreakpoint();
void ResetVideoBuffer();
void Fifo_SetRendering(bool bEnabled);
int Fifo_Update(int ticks);

View File

@ -245,9 +245,9 @@ void VideoBackendHardware::Video_GatherPipeBursted()
CommandProcessor::GatherPipeBursted();
}
void VideoBackendHardware::Video_Sync()
int VideoBackendHardware::Video_Sync(int ticks)
{
FlushGpu();
return Fifo_Update(ticks);
}
void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)

View File

@ -56,9 +56,6 @@ struct SCPFifoStruct
volatile u32 bFF_LoWatermark;
volatile u32 bFF_HiWatermark;
// for GP watchdog hack
volatile u32 isGpuReadingData;
};
class VideoBackend
@ -99,7 +96,7 @@ public:
virtual void Video_GatherPipeBursted() = 0;
virtual void Video_Sync() = 0;
virtual int Video_Sync(int ticks) = 0;
// Registers MMIO handlers for the CommandProcessor registers.
virtual void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) = 0;
@ -148,7 +145,7 @@ class VideoBackendHardware : public VideoBackend
void Video_GatherPipeBursted() override;
void Video_Sync() override;
int Video_Sync(int ticks) override;
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;