Merge pull request #3490 from degasus/singlecore

Fifo: Use SyncGPU timings for single core.
This commit is contained in:
Markus Wick 2016-09-27 10:33:47 +02:00 committed by GitHub
commit 9525a9e048
3 changed files with 100 additions and 68 deletions

View File

@ -70,7 +70,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 58;
static const u32 STATE_VERSION = 59; // Last changed in PR 3490
// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,

View File

@ -268,8 +268,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
void GatherPipeBursted()
{
if (IsOnThread())
SetCPStatusFromCPU();
SetCPStatusFromCPU();
// if we aren't linked, we don't care about gather pipe data
if (!m_CPCtrlReg.GPLinkEnable)

View File

@ -17,6 +17,7 @@
#include "Core/ConfigManager.h"
#include "Core/CoreTiming.h"
#include "Core/HW/Memmap.h"
#include "Core/HW/SystemTimers.h"
#include "Core/NetPlayProto.h"
#include "VideoCommon/AsyncRequests.h"
@ -31,6 +32,7 @@
namespace Fifo
{
static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;
static constexpr int GPU_TIME_SLOT_SIZE = 1000;
static bool s_skip_current_frame = false;
@ -47,7 +49,6 @@ static u8* s_fifo_aux_read_ptr;
// and can change at runtime.
static bool s_use_deterministic_gpu_thread;
static u64 s_last_sync_gpu_tick;
static CoreTiming::EventType* s_event_sync_gpu;
// STATE_TO_SAVE
@ -69,6 +70,7 @@ static u8* s_video_buffer_pp_read_ptr;
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static std::atomic<int> s_sync_ticks;
static bool s_syncing_suspended;
static Common::Event s_sync_wakeup_event;
void DoState(PointerWrap& p)
@ -85,7 +87,7 @@ void DoState(PointerWrap& p)
}
p.Do(s_skip_current_frame);
p.Do(s_last_sync_gpu_tick);
p.Do(s_sync_ticks);
}
void PauseAndLock(bool doLock, bool unpauseOnUnlock)
@ -422,55 +424,78 @@ bool AtBreakpoint()
void RunGpu()
{
SCPFifoStruct& fifo = CommandProcessor::fifo;
const SConfig& param = SConfig::GetInstance();
// execute GPU
if (!param.bCPUThread || s_use_deterministic_gpu_thread)
{
bool reset_simd_state = false;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
if (s_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
s_gpu_mainloop.Wakeup();
}
else
{
if (!reset_simd_state)
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
reset_simd_state = true;
}
ReadDataFromFifo(fifo.CPReadPointer);
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
}
// DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
if (fifo.CPReadPointer == fifo.CPEnd)
fifo.CPReadPointer = fifo.CPBase;
else
fifo.CPReadPointer += 32;
fifo.CPReadWriteDistance -= 32;
}
CommandProcessor::SetCPStatusFromGPU();
if (reset_simd_state)
{
FPURoundMode::LoadSIMDState();
}
}
// wake up GPU thread
if (param.bCPUThread)
if (param.bCPUThread && !s_use_deterministic_gpu_thread)
{
s_gpu_mainloop.Wakeup();
}
// if the sync GPU callback is suspended, wake it up.
if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread ||
SConfig::GetInstance().bSyncGPU)
{
if (s_syncing_suspended)
{
s_syncing_suspended = false;
CoreTiming::ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu, GPU_TIME_SLOT_SIZE);
}
}
}
static int RunGpuOnCpu(int ticks)
{
SCPFifoStruct& fifo = CommandProcessor::fifo;
bool reset_simd_state = false;
int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() &&
available_ticks >= 0)
{
if (s_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer);
s_gpu_mainloop.Wakeup();
}
else
{
if (!reset_simd_state)
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
reset_simd_state = true;
}
ReadDataFromFifo(fifo.CPReadPointer);
u32 cycles = 0;
s_video_buffer_read_ptr = OpcodeDecoder::Run(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
available_ticks -= cycles;
}
if (fifo.CPReadPointer == fifo.CPEnd)
fifo.CPReadPointer = fifo.CPBase;
else
fifo.CPReadPointer += 32;
fifo.CPReadWriteDistance -= 32;
}
CommandProcessor::SetCPStatusFromGPU();
if (reset_simd_state)
{
FPURoundMode::LoadSIMDState();
}
// Discard all available ticks as there is nothing to do any more.
s_sync_ticks.store(std::min(available_ticks, 0));
// If the GPU is idle, drop the handler.
if (available_ticks >= 0)
return -1;
// Always wait at least for GPU_TIME_SLOT_SIZE cycles.
return -available_ticks + GPU_TIME_SLOT_SIZE;
}
void UpdateWantDeterminism(bool want)
@ -521,24 +546,27 @@ bool UseDeterministicGPUThread()
}
/* This function checks the emulated CPU - GPU distance and may wake up the GPU,
* or block the CPU if required. It should be called by the CPU thread regulary.
* or block the CPU if required. It should be called by the CPU thread regularly.
* @ticks The gone emulated CPU time.
* @return A good time to call Update() next.
* @return A good time to call WaitForGpuThread() next.
*/
static int Update(int ticks)
static int WaitForGpuThread(int ticks)
{
const SConfig& param = SConfig::GetInstance();
// GPU is sleeping, so no need for synchronization
if (s_gpu_mainloop.IsDone() || s_use_deterministic_gpu_thread)
{
if (s_sync_ticks.load() < 0)
if ((s_sync_ticks.load() + ticks) < 0)
{
int old = s_sync_ticks.fetch_add(ticks);
if (old < param.iSyncGpuMinDistance && old + ticks >= param.iSyncGpuMinDistance)
RunGpu();
s_sync_ticks.store(s_sync_ticks.load() + ticks);
return 0 - s_sync_ticks.load();
}
else
{
s_sync_ticks.store(0);
return -1;
}
return param.iSyncGpuMaxDistance;
}
// Wakeup GPU
@ -558,24 +586,29 @@ static int Update(int ticks)
return param.iSyncGpuMaxDistance - s_sync_ticks.load();
}
static void SyncGPUCallback(u64 userdata, s64 cyclesLate)
static void SyncGPUCallback(u64 ticks, s64 cyclesLate)
{
u64 now = CoreTiming::GetTicks();
int next = Fifo::Update((int)(now - s_last_sync_gpu_tick));
s_last_sync_gpu_tick = now;
ticks += cyclesLate;
int next = -1;
if (next > 0)
CoreTiming::ScheduleEvent(next, s_event_sync_gpu);
if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread)
{
next = RunGpuOnCpu((int)ticks);
}
else if (SConfig::GetInstance().bSyncGPU)
{
next = WaitForGpuThread((int)ticks);
}
s_syncing_suspended = next < 0;
if (!s_syncing_suspended)
CoreTiming::ScheduleEvent(next, s_event_sync_gpu, next);
}
// Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread.
void Prepare()
{
if (SConfig::GetInstance().bCPUThread && SConfig::GetInstance().bSyncGPU)
{
s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
CoreTiming::ScheduleEvent(0, s_event_sync_gpu);
s_last_sync_gpu_tick = CoreTiming::GetTicks();
}
s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
s_syncing_suspended = true;
}
}