Fifo: Use SyncGPU timings for single core.
This commit is contained in:
parent
22b5d89bf1
commit
735da0ed69
|
@ -70,7 +70,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
|
|||
static std::thread g_save_thread;
|
||||
|
||||
// Don't forget to increase this after doing changes on the savestate system
|
||||
static const u32 STATE_VERSION = 58;
|
||||
static const u32 STATE_VERSION = 59; // Last changed in PR 3490
|
||||
|
||||
// Maps savestate versions to Dolphin versions.
|
||||
// Versions after 42 don't need to be added to this list,
|
||||
|
|
|
@ -268,8 +268,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
|
||||
void GatherPipeBursted()
|
||||
{
|
||||
if (IsOnThread())
|
||||
SetCPStatusFromCPU();
|
||||
SetCPStatusFromCPU();
|
||||
|
||||
// if we aren't linked, we don't care about gather pipe data
|
||||
if (!m_CPCtrlReg.GPLinkEnable)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "Core/ConfigManager.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/HW/SystemTimers.h"
|
||||
#include "Core/NetPlayProto.h"
|
||||
|
||||
#include "VideoCommon/AsyncRequests.h"
|
||||
|
@ -31,6 +32,7 @@
|
|||
namespace Fifo
|
||||
{
|
||||
static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;
|
||||
static constexpr int GPU_TIME_SLOT_SIZE = 1000;
|
||||
|
||||
static bool s_skip_current_frame = false;
|
||||
|
||||
|
@ -47,7 +49,6 @@ static u8* s_fifo_aux_read_ptr;
|
|||
// and can change at runtime.
|
||||
static bool s_use_deterministic_gpu_thread;
|
||||
|
||||
static u64 s_last_sync_gpu_tick;
|
||||
static CoreTiming::EventType* s_event_sync_gpu;
|
||||
|
||||
// STATE_TO_SAVE
|
||||
|
@ -69,6 +70,7 @@ static u8* s_video_buffer_pp_read_ptr;
|
|||
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
|
||||
|
||||
static std::atomic<int> s_sync_ticks;
|
||||
static bool s_syncing_suspended;
|
||||
static Common::Event s_sync_wakeup_event;
|
||||
|
||||
void DoState(PointerWrap& p)
|
||||
|
@ -85,7 +87,7 @@ void DoState(PointerWrap& p)
|
|||
}
|
||||
|
||||
p.Do(s_skip_current_frame);
|
||||
p.Do(s_last_sync_gpu_tick);
|
||||
p.Do(s_sync_ticks);
|
||||
}
|
||||
|
||||
void PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
||||
|
@ -422,55 +424,78 @@ bool AtBreakpoint()
|
|||
|
||||
void RunGpu()
|
||||
{
|
||||
SCPFifoStruct& fifo = CommandProcessor::fifo;
|
||||
const SConfig& param = SConfig::GetInstance();
|
||||
|
||||
// execute GPU
|
||||
if (!param.bCPUThread || s_use_deterministic_gpu_thread)
|
||||
{
|
||||
bool reset_simd_state = false;
|
||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||
{
|
||||
if (s_use_deterministic_gpu_thread)
|
||||
{
|
||||
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
|
||||
s_gpu_mainloop.Wakeup();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!reset_simd_state)
|
||||
{
|
||||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
reset_simd_state = true;
|
||||
}
|
||||
ReadDataFromFifo(fifo.CPReadPointer);
|
||||
s_video_buffer_read_ptr = OpcodeDecoder::Run(
|
||||
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
|
||||
}
|
||||
|
||||
// DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
|
||||
|
||||
if (fifo.CPReadPointer == fifo.CPEnd)
|
||||
fifo.CPReadPointer = fifo.CPBase;
|
||||
else
|
||||
fifo.CPReadPointer += 32;
|
||||
|
||||
fifo.CPReadWriteDistance -= 32;
|
||||
}
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
if (reset_simd_state)
|
||||
{
|
||||
FPURoundMode::LoadSIMDState();
|
||||
}
|
||||
}
|
||||
|
||||
// wake up GPU thread
|
||||
if (param.bCPUThread)
|
||||
if (param.bCPUThread && !s_use_deterministic_gpu_thread)
|
||||
{
|
||||
s_gpu_mainloop.Wakeup();
|
||||
}
|
||||
|
||||
// if the sync GPU callback is suspended, wake it up.
|
||||
if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread ||
|
||||
SConfig::GetInstance().bSyncGPU)
|
||||
{
|
||||
if (s_syncing_suspended)
|
||||
{
|
||||
s_syncing_suspended = false;
|
||||
CoreTiming::ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu, GPU_TIME_SLOT_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int RunGpuOnCpu(int ticks)
|
||||
{
|
||||
SCPFifoStruct& fifo = CommandProcessor::fifo;
|
||||
bool reset_simd_state = false;
|
||||
int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
|
||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() &&
|
||||
available_ticks >= 0)
|
||||
{
|
||||
if (s_use_deterministic_gpu_thread)
|
||||
{
|
||||
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
|
||||
s_gpu_mainloop.Wakeup();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!reset_simd_state)
|
||||
{
|
||||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
reset_simd_state = true;
|
||||
}
|
||||
ReadDataFromFifo(fifo.CPReadPointer);
|
||||
u32 cycles = 0;
|
||||
s_video_buffer_read_ptr = OpcodeDecoder::Run(
|
||||
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
|
||||
available_ticks -= cycles;
|
||||
}
|
||||
|
||||
if (fifo.CPReadPointer == fifo.CPEnd)
|
||||
fifo.CPReadPointer = fifo.CPBase;
|
||||
else
|
||||
fifo.CPReadPointer += 32;
|
||||
|
||||
fifo.CPReadWriteDistance -= 32;
|
||||
}
|
||||
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
if (reset_simd_state)
|
||||
{
|
||||
FPURoundMode::LoadSIMDState();
|
||||
}
|
||||
|
||||
// Discard all available ticks as there is nothing to do any more.
|
||||
s_sync_ticks.store(std::min(available_ticks, 0));
|
||||
|
||||
// If the GPU is idle, drop the handler.
|
||||
if (available_ticks >= 0)
|
||||
return -1;
|
||||
|
||||
// Always wait at least for GPU_TIME_SLOT_SIZE cycles.
|
||||
return -available_ticks + GPU_TIME_SLOT_SIZE;
|
||||
}
|
||||
|
||||
void UpdateWantDeterminism(bool want)
|
||||
|
@ -521,24 +546,27 @@ bool UseDeterministicGPUThread()
|
|||
}
|
||||
|
||||
/* This function checks the emulated CPU - GPU distance and may wake up the GPU,
|
||||
* or block the CPU if required. It should be called by the CPU thread regulary.
|
||||
* or block the CPU if required. It should be called by the CPU thread regularly.
|
||||
* @ticks The gone emulated CPU time.
|
||||
* @return A good time to call Update() next.
|
||||
* @return A good time to call WaitForGpuThread() next.
|
||||
*/
|
||||
static int Update(int ticks)
|
||||
static int WaitForGpuThread(int ticks)
|
||||
{
|
||||
const SConfig& param = SConfig::GetInstance();
|
||||
|
||||
// GPU is sleeping, so no need for synchronization
|
||||
if (s_gpu_mainloop.IsDone() || s_use_deterministic_gpu_thread)
|
||||
{
|
||||
if (s_sync_ticks.load() < 0)
|
||||
if ((s_sync_ticks.load() + ticks) < 0)
|
||||
{
|
||||
int old = s_sync_ticks.fetch_add(ticks);
|
||||
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
|
||||
RunGpu();
|
||||
s_sync_ticks.store(s_sync_ticks.load() + ticks);
|
||||
return 0 - s_sync_ticks.load();
|
||||
}
|
||||
else
|
||||
{
|
||||
s_sync_ticks.store(0);
|
||||
return -1;
|
||||
}
|
||||
return param.iSyncGpuMaxDistance;
|
||||
}
|
||||
|
||||
// Wakeup GPU
|
||||
|
@ -558,24 +586,29 @@ static int Update(int ticks)
|
|||
return param.iSyncGpuMaxDistance - s_sync_ticks.load();
|
||||
}
|
||||
|
||||
static void SyncGPUCallback(u64 userdata, s64 cyclesLate)
|
||||
static void SyncGPUCallback(u64 ticks, s64 cyclesLate)
|
||||
{
|
||||
u64 now = CoreTiming::GetTicks();
|
||||
int next = Fifo::Update((int)(now - s_last_sync_gpu_tick));
|
||||
s_last_sync_gpu_tick = now;
|
||||
ticks += cyclesLate;
|
||||
int next = -1;
|
||||
|
||||
if (next > 0)
|
||||
CoreTiming::ScheduleEvent(next, s_event_sync_gpu);
|
||||
if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread)
|
||||
{
|
||||
next = RunGpuOnCpu((int)ticks);
|
||||
}
|
||||
else if (SConfig::GetInstance().bSyncGPU)
|
||||
{
|
||||
next = WaitForGpuThread((int)ticks);
|
||||
}
|
||||
|
||||
s_syncing_suspended = next < 0;
|
||||
if (!s_syncing_suspended)
|
||||
CoreTiming::ScheduleEvent(next, s_event_sync_gpu, next);
|
||||
}
|
||||
|
||||
// Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread.
|
||||
void Prepare()
|
||||
{
|
||||
if (SConfig::GetInstance().bCPUThread && SConfig::GetInstance().bSyncGPU)
|
||||
{
|
||||
s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
|
||||
CoreTiming::ScheduleEvent(0, s_event_sync_gpu);
|
||||
s_last_sync_gpu_tick = CoreTiming::GetTicks();
|
||||
}
|
||||
s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
|
||||
s_syncing_suspended = true;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue