Fifo: Rewrite SyncGpu

The new implementation has 3 options:
 SyncGpuMaxDistance
 SyncGpuMinDistance
 SyncGpuOverclock

The MaxDistance controlls how many CPU cycles the CPU is allowed to be in front
of the GPU. Too low values will slow down extremly, too high values are as
unsynchronized and half of the games will crash.
The -MinDistance (negative) set how many cycles the GPU is allowed to be in
front of the CPU. As we are used to emulate an infinitiv fast GPU, this may be
set to any high (negative) number.

The last parameter is to hack a faster (>1.0) or slower(<1.0) GPU. As we don't
emulate GPU timing very well (eg skip the timings of the pixel stage completely),
an overclock factor of ~0.5 is often much more accurate than 1.0
This commit is contained in:
degasus 2015-06-03 23:21:46 +02:00
parent 5a0daef7f0
commit d31bed8b79
12 changed files with 126 additions and 110 deletions

View File

@ -54,7 +54,7 @@ public:
void Wait()
{
// already done
if (m_stopped.IsSet() || m_running_state.load() <= STATE_DONE)
if (IsDone())
return;
// notifying this event will only wake up one thread, so use a mutex here to
@ -63,7 +63,7 @@ public:
std::lock_guard<std::mutex> lk(m_wait_lock);
// Wait for the worker thread to finish.
while (!m_stopped.IsSet() && m_running_state.load() > STATE_DONE)
while (!IsDone())
{
m_done_event.Wait();
}
@ -183,6 +183,11 @@ public:
return !m_stopped.IsSet() && !m_shutdown.IsSet();
}
bool IsDone() const
{
return m_stopped.IsSet() || m_running_state.load() <= STATE_DONE;
}
// This function should be triggered regularly over time so
// that we will fall back from the busy loop to sleeping.
void AllowSleep()

View File

@ -182,6 +182,10 @@ void SConfig::SaveCoreSettings(IniFile& ini)
core->Set("DSPHLE", m_LocalCoreStartupParameter.bDSPHLE);
core->Set("SkipIdle", m_LocalCoreStartupParameter.bSkipIdle);
core->Set("SyncOnSkipIdle", m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack);
core->Set("SyncGPU", m_LocalCoreStartupParameter.bSyncGPU);
core->Set("SyncGpuMaxDistance", m_LocalCoreStartupParameter.iSyncGpuMaxDistance);
core->Set("SyncGpuMinDistance", m_LocalCoreStartupParameter.iSyncGpuMinDistance);
core->Set("SyncGpuOverclock", m_LocalCoreStartupParameter.fSyncGpuOverclock);
core->Set("DefaultISO", m_LocalCoreStartupParameter.m_strDefaultISO);
core->Set("DVDRoot", m_LocalCoreStartupParameter.m_strDVDRoot);
core->Set("Apploader", m_LocalCoreStartupParameter.m_strApploader);
@ -458,6 +462,9 @@ void SConfig::LoadCoreSettings(IniFile& ini)
core->Get("MMU", &m_LocalCoreStartupParameter.bMMU, false);
core->Get("BBDumpPort", &m_LocalCoreStartupParameter.iBBDumpPort, -1);
core->Get("SyncGPU", &m_LocalCoreStartupParameter.bSyncGPU, false);
core->Get("SyncGpuMaxDistance", &m_LocalCoreStartupParameter.iSyncGpuMaxDistance, 200000);
core->Get("SyncGpuMinDistance", &m_LocalCoreStartupParameter.iSyncGpuMinDistance, -200000);
core->Get("SyncGpuOverclock", &m_LocalCoreStartupParameter.fSyncGpuOverclock, 1.0);
core->Get("FastDiscSpeed", &m_LocalCoreStartupParameter.bFastDiscSpeed, false);
core->Get("DCBZ", &m_LocalCoreStartupParameter.bDCBZOFF, false);
core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default

View File

@ -184,9 +184,13 @@ struct SCoreStartupParameter
bool bMMU;
bool bDCBZOFF;
int iBBDumpPort;
bool bSyncGPU;
bool bFastDiscSpeed;
bool bSyncGPU;
int iSyncGpuMaxDistance;
int iSyncGpuMinDistance;
float fSyncGpuOverclock;
int SelectedLanguage;
bool bWii;

View File

@ -482,13 +482,13 @@ void Idle()
{
//DEBUG_LOG(POWERPC, "Idle");
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack && !SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPUOnSkipIdleHack)
{
//When the FIFO is processing data we must not advance because in this way
//the VI will be desynchronized. So, We are waiting until the FIFO finish and
//while we process only the events required by the FIFO.
ProcessFifoWaitEvents();
g_video_backend->Video_Sync();
g_video_backend->Video_Sync(0);
}
idledCycles += DowncountToCycles(PowerPC::ppcState.downcount);

View File

@ -61,11 +61,9 @@ IPC_HLE_PERIOD: For the Wiimote this is the call schedule:
#include "Core/IPC_HLE/WII_IPC_HLE.h"
#include "Core/PowerPC/PowerPC.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/VideoBackendBase.h"
namespace SystemTimers
{
@ -81,15 +79,14 @@ static int et_IPC_HLE;
static int et_PatchEngine; // PatchEngine updates every 1/60th of a second by default
static int et_Throttle;
static u64 s_last_sync_gpu_tick;
// These are badly educated guesses
// Feel free to experiment. Set these in Init below.
static int
// This is a fixed value, don't change it
AUDIO_DMA_PERIOD,
// Regulates the speed of the Command Processor
CP_PERIOD,
// This is completely arbitrary. If we find that we need lower latency, we can just
// increase this number.
IPC_HLE_PERIOD;
@ -140,8 +137,12 @@ static void SICallback(u64 userdata, int cyclesLate)
static void CPCallback(u64 userdata, int cyclesLate)
{
CommandProcessor::Update();
CoreTiming::ScheduleEvent(CP_PERIOD - cyclesLate, et_CP);
u64 now = CoreTiming::GetTicks();
int next = g_video_backend->Video_Sync((int)(now - s_last_sync_gpu_tick));
s_last_sync_gpu_tick = now;
if (next > 0)
CoreTiming::ScheduleEvent(next, et_CP);
}
static void DecrementerCallback(u64 userdata, int cyclesLate)
@ -239,9 +240,6 @@ void Init()
// System internal sample rate is fixed at 32KHz * 4 (16bit Stereo) / 32 bytes DMA
AUDIO_DMA_PERIOD = CPU_CORE_CLOCK / (AudioInterface::GetAIDSampleRate() * 4 / 32);
// Emulated gekko <-> flipper bus speed ratio (CPU clock / flipper clock)
CP_PERIOD = GetTicksPerSecond() / 10000;
Common::Timer::IncreaseResolution();
// store and convert localtime at boot to timebase ticks
CoreTiming::SetFakeTBStartValue((u64)(CPU_CORE_CLOCK / TIMER_RATIO) * (u64)CEXIIPL::GetGCTime());
@ -253,7 +251,7 @@ void Init()
et_Dec = CoreTiming::RegisterEvent("DecCallback", DecrementerCallback);
et_VI = CoreTiming::RegisterEvent("VICallback", VICallback);
et_SI = CoreTiming::RegisterEvent("SICallback", SICallback);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
et_CP = CoreTiming::RegisterEvent("CPCallback", CPCallback);
et_DSP = CoreTiming::RegisterEvent("DSPCallback", DSPCallback);
et_AudioDMA = CoreTiming::RegisterEvent("AudioDMACallback", AudioDMACallback);
@ -266,8 +264,9 @@ void Init()
CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerFrame(), et_SI);
CoreTiming::ScheduleEvent(AUDIO_DMA_PERIOD, et_AudioDMA);
CoreTiming::ScheduleEvent(0, et_Throttle, Common::Timer::GetTimeMs());
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
CoreTiming::ScheduleEvent(CP_PERIOD, et_CP);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread && SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
CoreTiming::ScheduleEvent(0, et_CP);
s_last_sync_gpu_tick = CoreTiming::GetTicks();
CoreTiming::ScheduleEvent(VideoInterface::GetTicksPerFrame(), et_PatchEngine);

View File

@ -49,7 +49,7 @@ class VideoSoftware : public VideoBackend
void Video_SetRendering(bool bEnabled) override;
void Video_GatherPipeBursted() override;
void Video_Sync() override {}
int Video_Sync(int ticks) override { return 0; }
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;

View File

@ -16,7 +16,6 @@
#include "Core/HW/Memmap.h"
#include "Core/HW/MMIO.h"
#include "Core/HW/ProcessorInterface.h"
#include "Core/HW/SystemTimers.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/PixelEngine.h"
@ -47,8 +46,6 @@ static std::atomic<bool> s_interrupt_waiting;
static std::atomic<bool> s_interrupt_token_waiting;
static std::atomic<bool> s_interrupt_finish_waiting;
static std::atomic<u32> s_vi_ticks(CommandProcessor::m_cpClockOrigin);
static bool IsOnThread()
{
return SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
@ -546,30 +543,4 @@ void SetCpClearRegister()
{
}
void Update()
{
while (s_vi_ticks.load() > m_cpClockOrigin && fifo.isGpuReadingData && IsOnThread())
Common::YieldCPU();
if (fifo.isGpuReadingData)
s_vi_ticks.fetch_add(SystemTimers::GetTicksPerSecond() / 10000);
RunGpu();
}
u32 GetVITicks()
{
return s_vi_ticks.load();
}
void SetVITicks(u32 ticks)
{
s_vi_ticks.store(ticks);
}
void DecrementVITicks(u32 ticks)
{
s_vi_ticks.fetch_sub(ticks);
}
} // end of namespace CommandProcessor

View File

@ -121,9 +121,6 @@ union UCPClearReg
UCPClearReg(u16 _hex) {Hex = _hex; }
};
// Can be any number, low enough to not be below the number of clocks executed by the GPU per CP_PERIOD
const static u32 m_cpClockOrigin = 200000;
// Init
void Init();
void Shutdown();
@ -146,10 +143,4 @@ void SetCpControlRegister();
void SetCpStatusRegister();
void ProcessFifoEvents();
void Update();
u32 GetVITicks();
void SetVITicks(u32 ticks);
void DecrementVITicks(u32 ticks);
} // namespace CommandProcessor

View File

@ -61,6 +61,9 @@ static u8* s_video_buffer_pp_read_ptr;
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static std::atomic<int> s_sync_ticks;
static Common::Event s_sync_wakeup_event;
void Fifo_DoState(PointerWrap &p)
{
p.DoArray(s_video_buffer, FIFO_SIZE);
@ -99,7 +102,7 @@ void Fifo_Init()
ResetVideoBuffer();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
s_gpu_mainloop.Prepare();
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
s_sync_ticks.store(0);
}
void Fifo_Shutdown()
@ -282,6 +285,8 @@ void RunGpuLoop()
s_gpu_mainloop.Run(
[] {
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
g_video_backend->PeekMessages();
// Do nothing while paused
@ -310,63 +315,59 @@ void RunGpuLoop()
CommandProcessor::SetCPStatusFromGPU();
if (!fifo.isGpuReadingData)
{
CommandProcessor::SetVITicks(CommandProcessor::m_cpClockOrigin);
}
bool run_loop = true;
// check if we are able to run this buffer
while (run_loop && !CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
fifo.isGpuReadingData = true;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
break;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || CommandProcessor::GetVITicks() > CommandProcessor::m_cpClockOrigin)
{
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer;
ReadDataFromFifo(readPtr);
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && CommandProcessor::GetVITicks() >= cyclesExecuted)
CommandProcessor::DecrementVITicks(cyclesExecuted);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
}
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
{
run_loop = false;
}
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
CommandProcessor::SetCPStatusFromGPU();
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU)
{
cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
int old = s_sync_ticks.fetch_sub(cyclesExecuted);
if (old > 0 && old - (int)cyclesExecuted <= 0)
s_sync_wakeup_event.Set();
}
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
AsyncRequests::GetInstance()->PullEvents();
}
// fast skip remaining GPU time if fifo is empty
if (s_sync_ticks.load() > 0)
{
int old = s_sync_ticks.exchange(0);
if (old > 0)
s_sync_wakeup_event.Set();
}
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
VertexManager::Flush();
// don't release the GPU running state on sync GPU waits
fifo.isGpuReadingData = !run_loop;
}
}, 100);
@ -376,7 +377,9 @@ void RunGpuLoop()
void FlushGpu()
{
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
if (!param.bCPUThread || g_use_deterministic_gpu_thread)
return;
s_gpu_mainloop.Wait();
@ -396,9 +399,10 @@ bool AtBreakpoint()
void RunGpu()
{
SCPFifoStruct &fifo = CommandProcessor::fifo;
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
// execute GPU
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread || g_use_deterministic_gpu_thread)
if (!param.bCPUThread || g_use_deterministic_gpu_thread)
{
bool reset_simd_state = false;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
@ -438,7 +442,7 @@ void RunGpu()
}
// wake up GPU thread
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
if (param.bCPUThread)
{
s_gpu_mainloop.Wakeup();
}
@ -471,7 +475,7 @@ void Fifo_UpdateWantDeterminism(bool want)
break;
}
gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
gpu_thread = gpu_thread && param.bCPUThread;
if (g_use_deterministic_gpu_thread != gpu_thread)
{
@ -485,3 +489,40 @@ void Fifo_UpdateWantDeterminism(bool want)
}
}
}
int Fifo_Update(int ticks)
{
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
if (ticks == 0)
{
FlushGpu();
return param.iSyncGpuMaxDistance;
}
// GPU is sleeping, so no need for synchronization
if (s_gpu_mainloop.IsDone() || g_use_deterministic_gpu_thread)
{
if (s_sync_ticks.load() < 0)
{
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
}
return param.iSyncGpuMaxDistance;
}
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
if (s_sync_ticks.load() >= param.iSyncGpuMaxDistance)
{
while (s_sync_ticks.load() > 0)
{
s_sync_wakeup_event.Wait();
}
}
return param.iSyncGpuMaxDistance - s_sync_ticks.load();
}

View File

@ -50,3 +50,4 @@ void EmulatorState(bool running);
bool AtBreakpoint();
void ResetVideoBuffer();
void Fifo_SetRendering(bool bEnabled);
int Fifo_Update(int ticks);

View File

@ -245,9 +245,9 @@ void VideoBackendHardware::Video_GatherPipeBursted()
CommandProcessor::GatherPipeBursted();
}
void VideoBackendHardware::Video_Sync()
int VideoBackendHardware::Video_Sync(int ticks)
{
FlushGpu();
return Fifo_Update(ticks);
}
void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)

View File

@ -56,9 +56,6 @@ struct SCPFifoStruct
volatile u32 bFF_LoWatermark;
volatile u32 bFF_HiWatermark;
// for GP watchdog hack
volatile u32 isGpuReadingData;
};
class VideoBackend
@ -99,7 +96,7 @@ public:
virtual void Video_GatherPipeBursted() = 0;
virtual void Video_Sync() = 0;
virtual int Video_Sync(int ticks) = 0;
// Registers MMIO handlers for the CommandProcessor registers.
virtual void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) = 0;
@ -148,7 +145,7 @@ class VideoBackendHardware : public VideoBackend
void Video_GatherPipeBursted() override;
void Video_Sync() override;
int Video_Sync(int ticks) override;
void RegisterCPMMIO(MMIO::Mapping* mmio, u32 base) override;