Merge pull request #11334 from AdmiralCurtiss/globals-fifo

VideoCommon/Fifo: Refactor to class, move to Core::System.
This commit is contained in:
Mai 2022-12-10 19:13:23 +00:00 committed by GitHub
commit 54e01c660e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 390 additions and 319 deletions

View File

@ -277,7 +277,9 @@ void Stop() // - Hammertime!
// Dump left over jobs
HostDispatchJobs();
Fifo::EmulatorState(false);
auto& system = Core::System::GetInstance();
system.GetFifo().EmulatorState(false);
INFO_LOG_FMT(CONSOLE, "Stop [Main Thread]\t\t---- Shutting down ----");
@ -285,7 +287,7 @@ void Stop() // - Hammertime!
INFO_LOG_FMT(CONSOLE, "{}", StopMessage(true, "Stop CPU"));
CPU::Stop();
if (Core::System::GetInstance().IsDualCoreMode())
if (system.IsDualCoreMode())
{
// Video_EnterLoop() should now exit so that EmuThread()
// will continue concurrently with the rest of the commands
@ -597,7 +599,7 @@ static void EmuThread(std::unique_ptr<BootParameters> boot, WindowSystemInfo wsi
wiifs_guard.Dismiss();
// This adds the SyncGPU handler to CoreTiming, so now CoreTiming::Advance might block.
Fifo::Prepare();
system.GetFifo().Prepare(system);
// Setup our core
if (Config::Get(Config::MAIN_CPU_CORE) != PowerPC::CPUCore::Interpreter)
@ -622,7 +624,7 @@ static void EmuThread(std::unique_ptr<BootParameters> boot, WindowSystemInfo wsi
s_cpu_thread = std::thread(cpuThreadFunc, savestate_path, delete_savestate);
// become the GPU thread
Fifo::RunGpuLoop();
system.GetFifo().RunGpuLoop(system);
// We have now exited the Video Loop
INFO_LOG_FMT(CONSOLE, "{}", StopMessage(false, "Video Loop Ended"));
@ -766,7 +768,8 @@ static bool PauseAndLock(bool do_lock, bool unpause_on_unlock)
// video has to come after CPU, because CPU thread can wait for video thread
// (s_efbAccessRequested).
Fifo::PauseAndLock(do_lock, false);
auto& system = Core::System::GetInstance();
system.GetFifo().PauseAndLock(system, do_lock, false);
ResetRumble();
@ -1029,7 +1032,10 @@ void UpdateWantDeterminism(bool initial)
const auto ios = IOS::HLE::GetIOS();
if (ios)
ios->UpdateWantDeterminism(new_want_determinism);
Fifo::UpdateWantDeterminism(new_want_determinism);
auto& system = Core::System::GetInstance();
system.GetFifo().UpdateWantDeterminism(system, new_want_determinism);
// We need to clear the cache because some parts of the JIT depend on want_determinism,
// e.g. use of FMA.
JitInterface::ClearCache();

View File

@ -354,7 +354,8 @@ void CoreTimingManager::Idle()
// When the FIFO is processing data we must not advance because in this way
// the VI will be desynchronized. So, We are waiting until the FIFO finish and
// while we process only the events required by the FIFO.
Fifo::FlushGpu();
auto& system = Core::System::GetInstance();
system.GetFifo().FlushGpu(system);
}
PowerPC::UpdatePerformanceMonitor(PowerPC::ppcState.downcount, 0, 0);

View File

@ -191,7 +191,8 @@ void Run()
static void RunAdjacentSystems(bool running)
{
// NOTE: We're assuming these will not try to call Break or EnableStepping.
Fifo::EmulatorState(running);
auto& system = Core::System::GetInstance();
system.GetFifo().EmulatorState(running);
// Core is responsible for shutting down the sound stream.
if (s_state != State::PowerDown)
AudioCommon::SetSoundStreamRunning(Core::System::GetInstance(), running);

View File

@ -173,7 +173,7 @@ void PatchEngineCallback(Core::System& system, u64 userdata, s64 cycles_late)
void ThrottleCallback(Core::System& system, u64 deadline, s64 cyclesLate)
{
// Allow the GPU thread to sleep. Setting this flag here limits the wakeups to 1 kHz.
Fifo::GpuMaySleep();
system.GetFifo().GpuMaySleep();
const u64 time = Common::Timer::NowUs();

View File

@ -19,6 +19,7 @@
#include "Core/HW/Sram.h"
#include "Core/HW/VideoInterface.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
namespace Core
{
@ -35,6 +36,7 @@ struct System::Impl
DVDInterface::DVDInterfaceState m_dvd_interface_state;
DVDThread::DVDThreadState m_dvd_thread_state;
ExpansionInterface::ExpansionInterfaceState m_expansion_interface_state;
Fifo::FifoManager m_fifo;
Memory::MemoryManager m_memory;
MemoryInterface::MemoryInterfaceState m_memory_interface_state;
SerialInterface::SerialInterfaceState m_serial_interface_state;
@ -120,6 +122,11 @@ ExpansionInterface::ExpansionInterfaceState& System::GetExpansionInterfaceState(
return m_impl->m_expansion_interface_state;
}
Fifo::FifoManager& System::GetFifo() const
{
return m_impl->m_fifo;
}
Memory::MemoryManager& System::GetMemory() const
{
return m_impl->m_memory;

View File

@ -36,6 +36,10 @@ namespace ExpansionInterface
{
class ExpansionInterfaceState;
};
namespace Fifo
{
class FifoManager;
}
namespace Memory
{
class MemoryManager;
@ -94,6 +98,7 @@ public:
DVDInterface::DVDInterfaceState& GetDVDInterfaceState() const;
DVDThread::DVDThreadState& GetDVDThreadState() const;
ExpansionInterface::ExpansionInterfaceState& GetExpansionInterfaceState() const;
Fifo::FifoManager& GetFifo() const;
Memory::MemoryManager& GetMemory() const;
MemoryInterface::MemoryInterfaceState& GetMemoryInterfaceState() const;
SerialInterface::SerialInterfaceState& GetSerialInterfaceState() const;

View File

@ -114,9 +114,11 @@ static void RunWithGPUThreadInactive(std::function<void()> f)
// the CPU and GPU threads are the same thread, and we already checked for the GPU thread.)
const bool was_running = Core::GetState() == Core::State::Running;
Fifo::PauseAndLock(true, was_running);
auto& system = Core::System::GetInstance();
auto& fifo = system.GetFifo();
fifo.PauseAndLock(system, true, was_running);
f();
Fifo::PauseAndLock(false, was_running);
fifo.PauseAndLock(system, false, was_running);
}
else
{

View File

@ -5,6 +5,7 @@
#include <mutex>
#include "Core/System.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
@ -90,7 +91,8 @@ void AsyncRequests::PushEvent(const AsyncRequests::Event& event, bool blocking)
m_queue.push(event);
Fifo::RunGpu();
auto& system = Core::System::GetInstance();
system.GetFifo().RunGpu(system);
if (blocking)
{
m_cond.wait(lock, [this] { return m_queue.empty(); });
@ -159,7 +161,7 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
break;
case Event::FIFO_RESET:
Fifo::ResetVideoBuffer();
Core::System::GetInstance().GetFifo().ResetVideoBuffer();
break;
case Event::PERF_QUERY:

View File

@ -179,14 +179,17 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
switch (bp.newvalue & 0xFF)
{
case 0x02:
{
INCSTAT(g_stats.this_frame.num_draw_done);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
auto& system = Core::System::GetInstance();
if (!system.GetFifo().UseDeterministicGPUThread())
PixelEngine::SetFinish(cycles_into_future); // may generate interrupt
DEBUG_LOG_FMT(VIDEO, "GXSetDrawDone SetPEFinish (value: {:#04X})", bp.newvalue & 0xFFFF);
return;
}
default:
WARN_LOG_FMT(VIDEO, "GXSetDrawDone ??? (value {:#04X})", bp.newvalue & 0xFFFF);
@ -194,23 +197,29 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
}
return;
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
{
INCSTAT(g_stats.this_frame.num_token);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
auto& system = Core::System::GetInstance();
if (!system.GetFifo().UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken {:#06X}", bp.newvalue & 0xFFFF);
return;
}
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
{
INCSTAT(g_stats.this_frame.num_token_int);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
auto& system = Core::System::GetInstance();
if (!system.GetFifo().UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken + INT {:#06X}", bp.newvalue & 0xFFFF);
return;
}
// ------------------------
// EFB copy command. This copies a rectangle from the EFB to either RAM in a texture format or to

View File

@ -223,8 +223,8 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping*
mmio->Register(base | STATUS_REGISTER, MMIO::ComplexRead<u16>([](Core::System& system, u32) {
auto& cp = system.GetCommandProcessor();
Fifo::SyncGPUForRegisterAccess();
cp.SetCpStatusRegister();
system.GetFifo().SyncGPUForRegisterAccess(system);
cp.SetCpStatusRegister(system);
return cp.m_cp_status_reg.Hex;
}),
MMIO::InvalidWrite<u16>());
@ -234,8 +234,8 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping*
auto& cp = system.GetCommandProcessor();
UCPCtrlReg tmp(val);
cp.m_cp_ctrl_reg.Hex = tmp.Hex;
cp.SetCpControlRegister();
Fifo::RunGpu();
cp.SetCpControlRegister(system);
system.GetFifo().RunGpu(system);
}));
mmio->Register(base | CLEAR_REGISTER, MMIO::DirectRead<u16>(&m_cp_clear_reg.Hex),
@ -244,7 +244,7 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping*
UCPClearReg tmp(val);
cp.m_cp_clear_reg.Hex = tmp.Hex;
cp.SetCpClearRegister();
Fifo::RunGpu();
system.GetFifo().RunGpu(system);
}));
mmio->Register(base | PERF_SELECT, MMIO::InvalidRead<u16>(), MMIO::Nop<u16>());
@ -284,7 +284,7 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping*
{
fifo_rw_distance_hi_r = MMIO::ComplexRead<u16>([](Core::System& system, u32) {
const auto& fifo = system.GetCommandProcessor().GetFifo();
Fifo::SyncGPUForRegisterAccess();
system.GetFifo().SyncGPUForRegisterAccess(system);
if (fifo.CPWritePointer.load(std::memory_order_relaxed) >=
fifo.SafeCPReadPointer.load(std::memory_order_relaxed))
{
@ -306,16 +306,16 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping*
{
fifo_rw_distance_hi_r = MMIO::ComplexRead<u16>([](Core::System& system, u32) {
const auto& fifo = system.GetCommandProcessor().GetFifo();
Fifo::SyncGPUForRegisterAccess();
system.GetFifo().SyncGPUForRegisterAccess(system);
return fifo.CPReadWriteDistance.load(std::memory_order_relaxed) >> 16;
});
}
mmio->Register(base | FIFO_RW_DISTANCE_HI, fifo_rw_distance_hi_r,
MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](Core::System& system, u32, u16 val) {
auto& fifo = system.GetCommandProcessor().GetFifo();
Fifo::SyncGPUForRegisterAccess();
system.GetFifo().SyncGPUForRegisterAccess(system);
WriteHigh(fifo.CPReadWriteDistance, val & WMASK_HI_RESTRICT);
Fifo::RunGpu();
system.GetFifo().RunGpu(system);
}));
mmio->Register(
@ -330,12 +330,12 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping*
{
fifo_read_hi_r = MMIO::ComplexRead<u16>([](Core::System& system, u32) {
auto& fifo = system.GetCommandProcessor().GetFifo();
Fifo::SyncGPUForRegisterAccess();
system.GetFifo().SyncGPUForRegisterAccess(system);
return fifo.SafeCPReadPointer.load(std::memory_order_relaxed) >> 16;
});
fifo_read_hi_w = MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](Core::System& sys, u32, u16 val) {
auto& fifo = sys.GetCommandProcessor().GetFifo();
Fifo::SyncGPUForRegisterAccess();
sys.GetFifo().SyncGPUForRegisterAccess(sys);
WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT);
fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed),
std::memory_order_relaxed);
@ -345,12 +345,12 @@ void CommandProcessorManager::RegisterMMIO(Core::System& system, MMIO::Mapping*
{
fifo_read_hi_r = MMIO::ComplexRead<u16>([](Core::System& system, u32) {
const auto& fifo = system.GetCommandProcessor().GetFifo();
Fifo::SyncGPUForRegisterAccess();
system.GetFifo().SyncGPUForRegisterAccess(system);
return fifo.CPReadPointer.load(std::memory_order_relaxed) >> 16;
});
fifo_read_hi_w = MMIO::ComplexWrite<u16>([WMASK_HI_RESTRICT](Core::System& sys, u32, u16 val) {
auto& fifo = sys.GetCommandProcessor().GetFifo();
Fifo::SyncGPUForRegisterAccess();
sys.GetFifo().SyncGPUForRegisterAccess(sys);
WriteHigh(fifo.CPReadPointer, val & WMASK_HI_RESTRICT);
});
}
@ -366,7 +366,7 @@ void CommandProcessorManager::GatherPipeBursted(Core::System& system)
// if we aren't linked, we don't care about gather pipe data
if (!m_cp_ctrl_reg.GPLinkEnable)
{
if (IsOnThread(system) && !Fifo::UseDeterministicGPUThread())
if (IsOnThread(system) && !system.GetFifo().UseDeterministicGPUThread())
{
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
// Fix Pokemon XD in DC mode.
@ -374,10 +374,10 @@ void CommandProcessorManager::GatherPipeBursted(Core::System& system)
(ProcessorInterface::Fifo_CPUBase == fifo.CPBase.load(std::memory_order_relaxed)) &&
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) > 0)
{
Fifo::FlushGpu();
system.GetFifo().FlushGpu(system);
}
}
Fifo::RunGpu();
system.GetFifo().RunGpu(system);
return;
}
@ -405,7 +405,7 @@ void CommandProcessorManager::GatherPipeBursted(Core::System& system)
fifo.CPReadWriteDistance.fetch_add(GPFifo::GATHER_PIPE_SIZE, std::memory_order_seq_cst);
Fifo::RunGpu();
system.GetFifo().RunGpu(system);
ASSERT_MSG(COMMANDPROCESSOR,
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) <=
@ -442,12 +442,12 @@ void CommandProcessorManager::UpdateInterrupts(Core::System& system, u64 userdat
}
system.GetCoreTiming().ForceExceptionCheck(0);
m_interrupt_waiting.Clear();
Fifo::RunGpu();
system.GetFifo().RunGpu(system);
}
void CommandProcessorManager::UpdateInterruptsFromVideoBackend(Core::System& system, u64 userdata)
{
if (!Fifo::UseDeterministicGPUThread())
if (!system.GetFifo().UseDeterministicGPUThread())
{
system.GetCoreTiming().ScheduleEvent(0, m_event_type_update_interrupts, userdata,
CoreTiming::FromThread::NON_CPU);
@ -573,7 +573,7 @@ void CommandProcessorManager::SetCPStatusFromCPU(Core::System& system)
}
}
void CommandProcessorManager::SetCpStatusRegister()
void CommandProcessorManager::SetCpStatusRegister(Core::System& system)
{
const auto& fifo = m_fifo;
@ -583,7 +583,7 @@ void CommandProcessorManager::SetCpStatusRegister()
(fifo.CPReadPointer.load(std::memory_order_relaxed) ==
fifo.CPWritePointer.load(std::memory_order_relaxed));
m_cp_status_reg.CommandIdle = !fifo.CPReadWriteDistance.load(std::memory_order_relaxed) ||
Fifo::AtBreakpoint() ||
Fifo::AtBreakpoint(system) ||
!fifo.bFF_GPReadEnable.load(std::memory_order_relaxed);
m_cp_status_reg.UnderflowLoWatermark = fifo.bFF_LoWatermark.load(std::memory_order_relaxed);
m_cp_status_reg.OverflowHiWatermark = fifo.bFF_HiWatermark.load(std::memory_order_relaxed);
@ -597,7 +597,7 @@ void CommandProcessorManager::SetCpStatusRegister()
m_cp_status_reg.UnderflowLoWatermark ? "ON" : "OFF");
}
void CommandProcessorManager::SetCpControlRegister()
void CommandProcessorManager::SetCpControlRegister(Core::System& system)
{
auto& fifo = m_fifo;
@ -610,7 +610,7 @@ void CommandProcessorManager::SetCpControlRegister()
if (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) && !m_cp_ctrl_reg.GPReadEnable)
{
fifo.bFF_GPReadEnable.store(m_cp_ctrl_reg.GPReadEnable, std::memory_order_relaxed);
Fifo::FlushGpu();
system.GetFifo().FlushGpu(system);
}
else
{

View File

@ -174,8 +174,8 @@ public:
bool IsInterruptWaiting() const;
void SetCpClearRegister();
void SetCpControlRegister();
void SetCpStatusRegister();
void SetCpControlRegister(Core::System& system);
void SetCpStatusRegister(Core::System& system);
void HandleUnknownOpcode(u8 cmd_byte, const u8* buffer, bool preprocess);

View File

@ -34,88 +34,47 @@
namespace Fifo
{
static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;
static constexpr int GPU_TIME_SLOT_SIZE = 1000;
static Common::BlockingLoop s_gpu_mainloop;
FifoManager::FifoManager() = default;
FifoManager::~FifoManager() = default;
static Common::Flag s_emu_running_state;
// Most of this array is unlikely to be faulted in...
static u8 s_fifo_aux_data[FIFO_SIZE];
static u8* s_fifo_aux_write_ptr;
static u8* s_fifo_aux_read_ptr;
// This could be in SConfig, but it depends on multiple settings
// and can change at runtime.
static bool s_use_deterministic_gpu_thread;
static CoreTiming::EventType* s_event_sync_gpu;
// STATE_TO_SAVE
static u8* s_video_buffer;
static u8* s_video_buffer_read_ptr;
static std::atomic<u8*> s_video_buffer_write_ptr;
static std::atomic<u8*> s_video_buffer_seen_ptr;
static u8* s_video_buffer_pp_read_ptr;
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
// write_ptr, despite it being atomic. In deterministic GPU thread mode,
// things get a bit more complicated:
// - The seen_ptr is written by the GPU thread, and points to what it's already
// processed as much of as possible - in the case of a partial command which
// caused it to stop, not the same as the read ptr. It's written by the GPU,
// under the lock, and updating the cond.
// - The write_ptr is written by the CPU thread after it copies data from the
// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
static std::atomic<int> s_sync_ticks;
static bool s_syncing_suspended;
static Common::Event s_sync_wakeup_event;
static std::optional<size_t> s_config_callback_id = std::nullopt;
static bool s_config_sync_gpu = false;
static int s_config_sync_gpu_max_distance = 0;
static int s_config_sync_gpu_min_distance = 0;
static float s_config_sync_gpu_overclock = 0.0f;
static void RefreshConfig()
void FifoManager::RefreshConfig()
{
s_config_sync_gpu = Config::Get(Config::MAIN_SYNC_GPU);
s_config_sync_gpu_max_distance = Config::Get(Config::MAIN_SYNC_GPU_MAX_DISTANCE);
s_config_sync_gpu_min_distance = Config::Get(Config::MAIN_SYNC_GPU_MIN_DISTANCE);
s_config_sync_gpu_overclock = Config::Get(Config::MAIN_SYNC_GPU_OVERCLOCK);
m_config_sync_gpu = Config::Get(Config::MAIN_SYNC_GPU);
m_config_sync_gpu_max_distance = Config::Get(Config::MAIN_SYNC_GPU_MAX_DISTANCE);
m_config_sync_gpu_min_distance = Config::Get(Config::MAIN_SYNC_GPU_MIN_DISTANCE);
m_config_sync_gpu_overclock = Config::Get(Config::MAIN_SYNC_GPU_OVERCLOCK);
}
void DoState(PointerWrap& p)
void FifoManager::DoState(PointerWrap& p)
{
p.DoArray(s_video_buffer, FIFO_SIZE);
u8* write_ptr = s_video_buffer_write_ptr;
p.DoPointer(write_ptr, s_video_buffer);
s_video_buffer_write_ptr = write_ptr;
p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
if (p.IsReadMode() && s_use_deterministic_gpu_thread)
p.DoArray(m_video_buffer, FIFO_SIZE);
u8* write_ptr = m_video_buffer_write_ptr;
p.DoPointer(write_ptr, m_video_buffer);
m_video_buffer_write_ptr = write_ptr;
p.DoPointer(m_video_buffer_read_ptr, m_video_buffer);
if (p.IsReadMode() && m_use_deterministic_gpu_thread)
{
// We're good and paused, right?
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
m_video_buffer_seen_ptr = m_video_buffer_pp_read_ptr = m_video_buffer_read_ptr;
}
p.Do(s_sync_ticks);
p.Do(s_syncing_suspended);
p.Do(m_sync_ticks);
p.Do(m_syncing_suspended);
}
void PauseAndLock(bool doLock, bool unpauseOnUnlock)
void FifoManager::PauseAndLock(Core::System& system, bool doLock, bool unpauseOnUnlock)
{
if (doLock)
{
SyncGPU(SyncGPUReason::Other);
EmulatorState(false);
if (!Core::System::GetInstance().IsDualCoreMode() || s_use_deterministic_gpu_thread)
if (!system.IsDualCoreMode() || m_use_deterministic_gpu_thread)
return;
s_gpu_mainloop.WaitYield(std::chrono::milliseconds(100), Host_YieldToUI);
m_gpu_mainloop.WaitYield(std::chrono::milliseconds(100), Host_YieldToUI);
}
else
{
@ -124,116 +83,115 @@ void PauseAndLock(bool doLock, bool unpauseOnUnlock)
}
}
void Init()
void FifoManager::Init(Core::System& system)
{
if (!s_config_callback_id)
s_config_callback_id = Config::AddConfigChangedCallback(RefreshConfig);
if (!m_config_callback_id)
m_config_callback_id = Config::AddConfigChangedCallback([this] { RefreshConfig(); });
RefreshConfig();
// Padded so that SIMD overreads in the vertex loader are safe
s_video_buffer = static_cast<u8*>(Common::AllocateMemoryPages(FIFO_SIZE + 4));
m_video_buffer = static_cast<u8*>(Common::AllocateMemoryPages(FIFO_SIZE + 4));
ResetVideoBuffer();
if (Core::System::GetInstance().IsDualCoreMode())
s_gpu_mainloop.Prepare();
s_sync_ticks.store(0);
if (system.IsDualCoreMode())
m_gpu_mainloop.Prepare();
m_sync_ticks.store(0);
}
void Shutdown()
void FifoManager::Shutdown()
{
if (s_gpu_mainloop.IsRunning())
if (m_gpu_mainloop.IsRunning())
PanicAlertFmt("FIFO shutting down while active");
Common::FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
s_video_buffer = nullptr;
s_video_buffer_write_ptr = nullptr;
s_video_buffer_pp_read_ptr = nullptr;
s_video_buffer_read_ptr = nullptr;
s_video_buffer_seen_ptr = nullptr;
s_fifo_aux_write_ptr = nullptr;
s_fifo_aux_read_ptr = nullptr;
Common::FreeMemoryPages(m_video_buffer, FIFO_SIZE + 4);
m_video_buffer = nullptr;
m_video_buffer_write_ptr = nullptr;
m_video_buffer_pp_read_ptr = nullptr;
m_video_buffer_read_ptr = nullptr;
m_video_buffer_seen_ptr = nullptr;
m_fifo_aux_write_ptr = nullptr;
m_fifo_aux_read_ptr = nullptr;
if (s_config_callback_id)
if (m_config_callback_id)
{
Config::RemoveConfigChangedCallback(*s_config_callback_id);
s_config_callback_id = std::nullopt;
Config::RemoveConfigChangedCallback(*m_config_callback_id);
m_config_callback_id = std::nullopt;
}
}
// May be executed from any thread, even the graphics thread.
// Created to allow for self shutdown.
void ExitGpuLoop()
void FifoManager::ExitGpuLoop(Core::System& system)
{
auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
auto& fifo = command_processor.GetFifo();
// This should break the wait loop in CPU thread
fifo.bFF_GPReadEnable.store(0, std::memory_order_relaxed);
FlushGpu();
FlushGpu(system);
// Terminate GPU thread loop
s_emu_running_state.Set();
s_gpu_mainloop.Stop(s_gpu_mainloop.kNonBlock);
m_emu_running_state.Set();
m_gpu_mainloop.Stop(m_gpu_mainloop.kNonBlock);
}
void EmulatorState(bool running)
void FifoManager::EmulatorState(bool running)
{
s_emu_running_state.Set(running);
m_emu_running_state.Set(running);
if (running)
s_gpu_mainloop.Wakeup();
m_gpu_mainloop.Wakeup();
else
s_gpu_mainloop.AllowSleep();
m_gpu_mainloop.AllowSleep();
}
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
void FifoManager::SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{
if (s_use_deterministic_gpu_thread)
if (m_use_deterministic_gpu_thread)
{
s_gpu_mainloop.Wait();
if (!s_gpu_mainloop.IsRunning())
m_gpu_mainloop.Wait();
if (!m_gpu_mainloop.IsRunning())
return;
// Opportunistically reset FIFOs so we don't wrap around.
if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
if (may_move_read_ptr && m_fifo_aux_write_ptr != m_fifo_aux_read_ptr)
{
PanicAlertFmt("Aux FIFO not synced ({}, {})", fmt::ptr(s_fifo_aux_write_ptr),
fmt::ptr(s_fifo_aux_read_ptr));
PanicAlertFmt("Aux FIFO not synced ({}, {})", fmt::ptr(m_fifo_aux_write_ptr),
fmt::ptr(m_fifo_aux_read_ptr));
}
memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
s_fifo_aux_read_ptr = s_fifo_aux_data;
memmove(m_fifo_aux_data, m_fifo_aux_read_ptr, m_fifo_aux_write_ptr - m_fifo_aux_read_ptr);
m_fifo_aux_write_ptr -= (m_fifo_aux_read_ptr - m_fifo_aux_data);
m_fifo_aux_read_ptr = m_fifo_aux_data;
if (may_move_read_ptr)
{
u8* write_ptr = s_video_buffer_write_ptr;
u8* write_ptr = m_video_buffer_write_ptr;
// what's left over in the buffer
size_t size = write_ptr - s_video_buffer_pp_read_ptr;
size_t size = write_ptr - m_video_buffer_pp_read_ptr;
memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
memmove(m_video_buffer, m_video_buffer_pp_read_ptr, size);
// This change always decreases the pointers. We write seen_ptr
// after write_ptr here, and read it before in RunGpuLoop, so
// 'write_ptr > seen_ptr' there cannot become spuriously true.
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_seen_ptr = write_ptr;
m_video_buffer_write_ptr = write_ptr = m_video_buffer + size;
m_video_buffer_pp_read_ptr = m_video_buffer;
m_video_buffer_read_ptr = m_video_buffer;
m_video_buffer_seen_ptr = write_ptr;
}
}
}
void PushFifoAuxBuffer(const void* ptr, size_t size)
void FifoManager::PushFifoAuxBuffer(const void* ptr, size_t size)
{
if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
if (size > (size_t)(m_fifo_aux_data + FIFO_SIZE - m_fifo_aux_write_ptr))
{
SyncGPU(SyncGPUReason::AuxSpace, /* may_move_read_ptr */ false);
if (!s_gpu_mainloop.IsRunning())
if (!m_gpu_mainloop.IsRunning())
{
// GPU is shutting down
return;
}
if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
if (size > (size_t)(m_fifo_aux_data + FIFO_SIZE - m_fifo_aux_write_ptr))
{
// That will sync us up to the last 32 bytes, so this short region
// of FIFO would have to point to a 2MB display list or something.
@ -241,63 +199,62 @@ void PushFifoAuxBuffer(const void* ptr, size_t size)
return;
}
}
memcpy(s_fifo_aux_write_ptr, ptr, size);
s_fifo_aux_write_ptr += size;
memcpy(m_fifo_aux_write_ptr, ptr, size);
m_fifo_aux_write_ptr += size;
}
void* PopFifoAuxBuffer(size_t size)
void* FifoManager::PopFifoAuxBuffer(size_t size)
{
void* ret = s_fifo_aux_read_ptr;
s_fifo_aux_read_ptr += size;
void* ret = m_fifo_aux_read_ptr;
m_fifo_aux_read_ptr += size;
return ret;
}
// Description: RunGpuLoop() sends data through this function.
static void ReadDataFromFifo(u32 readPtr)
void FifoManager::ReadDataFromFifo(Core::System& system, u32 readPtr)
{
if (GPFifo::GATHER_PIPE_SIZE >
static_cast<size_t>(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
static_cast<size_t>(m_video_buffer + FIFO_SIZE - m_video_buffer_write_ptr))
{
const size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
const size_t existing_len = m_video_buffer_write_ptr - m_video_buffer_read_ptr;
if (GPFifo::GATHER_PIPE_SIZE > static_cast<size_t>(FIFO_SIZE - existing_len))
{
PanicAlertFmt("FIFO out of bounds (existing {} + new {} > {})", existing_len,
GPFifo::GATHER_PIPE_SIZE, FIFO_SIZE);
return;
}
memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
s_video_buffer_write_ptr = s_video_buffer + existing_len;
s_video_buffer_read_ptr = s_video_buffer;
memmove(m_video_buffer, m_video_buffer_read_ptr, existing_len);
m_video_buffer_write_ptr = m_video_buffer + existing_len;
m_video_buffer_read_ptr = m_video_buffer;
}
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
auto& system = Core::System::GetInstance();
// Copy new video instructions to m_video_buffer for future use in rendering the new picture
auto& memory = system.GetMemory();
memory.CopyFromEmu(s_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE);
s_video_buffer_write_ptr += GPFifo::GATHER_PIPE_SIZE;
memory.CopyFromEmu(m_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE);
m_video_buffer_write_ptr += GPFifo::GATHER_PIPE_SIZE;
}
// The deterministic_gpu_thread version.
static void ReadDataFromFifoOnCPU(u32 readPtr)
void FifoManager::ReadDataFromFifoOnCPU(Core::System& system, u32 readPtr)
{
u8* write_ptr = s_video_buffer_write_ptr;
if (GPFifo::GATHER_PIPE_SIZE > static_cast<size_t>(s_video_buffer + FIFO_SIZE - write_ptr))
u8* write_ptr = m_video_buffer_write_ptr;
if (GPFifo::GATHER_PIPE_SIZE > static_cast<size_t>(m_video_buffer + FIFO_SIZE - write_ptr))
{
// We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU.
SyncGPU(SyncGPUReason::Wraparound);
if (!s_gpu_mainloop.IsRunning())
if (!m_gpu_mainloop.IsRunning())
{
// GPU is shutting down, so the next asserts may fail
return;
}
if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
if (m_video_buffer_pp_read_ptr != m_video_buffer_read_ptr)
{
PanicAlertFmt("Desynced read pointers");
return;
}
write_ptr = s_video_buffer_write_ptr;
const size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
write_ptr = m_video_buffer_write_ptr;
const size_t existing_len = write_ptr - m_video_buffer_pp_read_ptr;
if (GPFifo::GATHER_PIPE_SIZE > static_cast<size_t>(FIFO_SIZE - existing_len))
{
PanicAlertFmt("FIFO out of bounds (existing {} + new {} > {})", existing_len,
@ -305,57 +262,55 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
return;
}
}
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
memory.CopyFromEmu(s_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE);
s_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo<true>(
DataReader(s_video_buffer_pp_read_ptr, write_ptr + GPFifo::GATHER_PIPE_SIZE), nullptr);
memory.CopyFromEmu(m_video_buffer_write_ptr, readPtr, GPFifo::GATHER_PIPE_SIZE);
m_video_buffer_pp_read_ptr = OpcodeDecoder::RunFifo<true>(
DataReader(m_video_buffer_pp_read_ptr, write_ptr + GPFifo::GATHER_PIPE_SIZE), nullptr);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + GPFifo::GATHER_PIPE_SIZE;
m_video_buffer_write_ptr = write_ptr + GPFifo::GATHER_PIPE_SIZE;
}
void ResetVideoBuffer()
void FifoManager::ResetVideoBuffer()
{
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_write_ptr = s_video_buffer;
s_video_buffer_seen_ptr = s_video_buffer;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_fifo_aux_write_ptr = s_fifo_aux_data;
s_fifo_aux_read_ptr = s_fifo_aux_data;
m_video_buffer_read_ptr = m_video_buffer;
m_video_buffer_write_ptr = m_video_buffer;
m_video_buffer_seen_ptr = m_video_buffer;
m_video_buffer_pp_read_ptr = m_video_buffer;
m_fifo_aux_write_ptr = m_fifo_aux_data;
m_fifo_aux_read_ptr = m_fifo_aux_data;
}
// Description: Main FIFO update loop
// Purpose: Keep the Core HW updated about the CPU-GPU distance
void RunGpuLoop()
void FifoManager::RunGpuLoop(Core::System& system)
{
AsyncRequests::GetInstance()->SetEnable(true);
AsyncRequests::GetInstance()->SetPassthrough(false);
s_gpu_mainloop.Run(
[] {
m_gpu_mainloop.Run(
[this, &system] {
// Run events from the CPU thread.
AsyncRequests::GetInstance()->PullEvents();
// Do nothing while paused
if (!s_emu_running_state.IsSet())
if (!m_emu_running_state.IsSet())
return;
if (s_use_deterministic_gpu_thread)
if (m_use_deterministic_gpu_thread)
{
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr;
u8* seen_ptr = m_video_buffer_seen_ptr;
u8* write_ptr = m_video_buffer_write_ptr;
// See comment in SyncGPU
if (write_ptr > seen_ptr)
{
s_video_buffer_read_ptr =
OpcodeDecoder::RunFifo(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr);
s_video_buffer_seen_ptr = write_ptr;
m_video_buffer_read_ptr =
OpcodeDecoder::RunFifo(DataReader(m_video_buffer_read_ptr, write_ptr), nullptr);
m_video_buffer_seen_ptr = write_ptr;
}
}
else
{
auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
auto& fifo = command_processor.GetFifo();
command_processor.SetCPStatusFromGPU(system);
@ -363,14 +318,14 @@ void RunGpuLoop()
// check if we are able to run this buffer
while (!command_processor.IsInterruptWaiting() &&
fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) &&
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint())
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint(system))
{
if (s_config_sync_gpu && s_sync_ticks.load() < s_config_sync_gpu_min_distance)
if (m_config_sync_gpu && m_sync_ticks.load() < m_config_sync_gpu_min_distance)
break;
u32 cyclesExecuted = 0;
u32 readPtr = fifo.CPReadPointer.load(std::memory_order_relaxed);
ReadDataFromFifo(readPtr);
ReadDataFromFifo(system, readPtr);
if (readPtr == fifo.CPEnd.load(std::memory_order_relaxed))
readPtr = fifo.CPBase.load(std::memory_order_relaxed);
@ -385,13 +340,13 @@ void RunGpuLoop()
"instability in the game. Please report it.",
distance);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted);
u8* write_ptr = m_video_buffer_write_ptr;
m_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(m_video_buffer_read_ptr, write_ptr), &cyclesExecuted);
fifo.CPReadPointer.store(readPtr, std::memory_order_relaxed);
fifo.CPReadWriteDistance.fetch_sub(GPFifo::GATHER_PIPE_SIZE, std::memory_order_seq_cst);
if ((write_ptr - s_video_buffer_read_ptr) == 0)
if ((write_ptr - m_video_buffer_read_ptr) == 0)
{
fifo.SafeCPReadPointer.store(fifo.CPReadPointer.load(std::memory_order_relaxed),
std::memory_order_relaxed);
@ -399,13 +354,15 @@ void RunGpuLoop()
command_processor.SetCPStatusFromGPU(system);
if (s_config_sync_gpu)
if (m_config_sync_gpu)
{
cyclesExecuted = (int)(cyclesExecuted / s_config_sync_gpu_overclock);
int old = s_sync_ticks.fetch_sub(cyclesExecuted);
if (old >= s_config_sync_gpu_max_distance &&
old - (int)cyclesExecuted < s_config_sync_gpu_max_distance)
s_sync_wakeup_event.Set();
cyclesExecuted = (int)(cyclesExecuted / m_config_sync_gpu_overclock);
int old = m_sync_ticks.fetch_sub(cyclesExecuted);
if (old >= m_config_sync_gpu_max_distance &&
old - (int)cyclesExecuted < m_config_sync_gpu_max_distance)
{
m_sync_wakeup_event.Set();
}
}
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
@ -416,11 +373,11 @@ void RunGpuLoop()
}
// fast skip remaining GPU time if fifo is empty
if (s_sync_ticks.load() > 0)
if (m_sync_ticks.load() > 0)
{
int old = s_sync_ticks.exchange(0);
if (old >= s_config_sync_gpu_max_distance)
s_sync_wakeup_event.Set();
int old = m_sync_ticks.exchange(0);
if (old >= m_config_sync_gpu_max_distance)
m_sync_wakeup_event.Set();
}
// The fifo is empty and it's unlikely we will get any more work in the near future.
@ -435,22 +392,21 @@ void RunGpuLoop()
AsyncRequests::GetInstance()->SetPassthrough(true);
}
void FlushGpu()
void FifoManager::FlushGpu(Core::System& system)
{
if (!Core::System::GetInstance().IsDualCoreMode() || s_use_deterministic_gpu_thread)
if (!system.IsDualCoreMode() || m_use_deterministic_gpu_thread)
return;
s_gpu_mainloop.Wait();
m_gpu_mainloop.Wait();
}
void GpuMaySleep()
void FifoManager::GpuMaySleep()
{
s_gpu_mainloop.AllowSleep();
m_gpu_mainloop.AllowSleep();
}
bool AtBreakpoint()
bool AtBreakpoint(Core::System& system)
{
auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
const auto& fifo = command_processor.GetFifo();
return fifo.bFF_BPEnable.load(std::memory_order_relaxed) &&
@ -458,44 +414,42 @@ bool AtBreakpoint()
fifo.CPBreakpoint.load(std::memory_order_relaxed));
}
void RunGpu()
void FifoManager::RunGpu(Core::System& system)
{
auto& system = Core::System::GetInstance();
const bool is_dual_core = system.IsDualCoreMode();
// wake up GPU thread
if (is_dual_core && !s_use_deterministic_gpu_thread)
if (is_dual_core && !m_use_deterministic_gpu_thread)
{
s_gpu_mainloop.Wakeup();
m_gpu_mainloop.Wakeup();
}
// if the sync GPU callback is suspended, wake it up.
if (!is_dual_core || s_use_deterministic_gpu_thread || s_config_sync_gpu)
if (!is_dual_core || m_use_deterministic_gpu_thread || m_config_sync_gpu)
{
if (s_syncing_suspended)
if (m_syncing_suspended)
{
s_syncing_suspended = false;
system.GetCoreTiming().ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu,
m_syncing_suspended = false;
system.GetCoreTiming().ScheduleEvent(GPU_TIME_SLOT_SIZE, m_event_sync_gpu,
GPU_TIME_SLOT_SIZE);
}
}
}
static int RunGpuOnCpu(int ticks)
int FifoManager::RunGpuOnCpu(Core::System& system, int ticks)
{
auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
auto& fifo = command_processor.GetFifo();
bool reset_simd_state = false;
int available_ticks = int(ticks * s_config_sync_gpu_overclock) + s_sync_ticks.load();
int available_ticks = int(ticks * m_config_sync_gpu_overclock) + m_sync_ticks.load();
while (fifo.bFF_GPReadEnable.load(std::memory_order_relaxed) &&
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint() &&
fifo.CPReadWriteDistance.load(std::memory_order_relaxed) && !AtBreakpoint(system) &&
available_ticks >= 0)
{
if (s_use_deterministic_gpu_thread)
if (m_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(fifo.CPReadPointer.load(std::memory_order_relaxed));
s_gpu_mainloop.Wakeup();
ReadDataFromFifoOnCPU(system, fifo.CPReadPointer.load(std::memory_order_relaxed));
m_gpu_mainloop.Wakeup();
}
else
{
@ -505,10 +459,10 @@ static int RunGpuOnCpu(int ticks)
FPURoundMode::LoadDefaultSIMDState();
reset_simd_state = true;
}
ReadDataFromFifo(fifo.CPReadPointer.load(std::memory_order_relaxed));
ReadDataFromFifo(system, fifo.CPReadPointer.load(std::memory_order_relaxed));
u32 cycles = 0;
s_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles);
m_video_buffer_read_ptr = OpcodeDecoder::RunFifo(
DataReader(m_video_buffer_read_ptr, m_video_buffer_write_ptr), &cycles);
available_ticks -= cycles;
}
@ -534,7 +488,7 @@ static int RunGpuOnCpu(int ticks)
}
// Discard all available ticks as there is nothing to do any more.
s_sync_ticks.store(std::min(available_ticks, 0));
m_sync_ticks.store(std::min(available_ticks, 0));
// If the GPU is idle, drop the handler.
if (available_ticks >= 0)
@ -544,7 +498,7 @@ static int RunGpuOnCpu(int ticks)
return -available_ticks + GPU_TIME_SLOT_SIZE;
}
void UpdateWantDeterminism(bool want)
void FifoManager::UpdateWantDeterminism(Core::System& system, bool want)
{
// We are paused (or not running at all yet), so
// it should be safe to change this.
@ -562,89 +516,84 @@ void UpdateWantDeterminism(bool want)
break;
}
gpu_thread = gpu_thread && Core::System::GetInstance().IsDualCoreMode();
gpu_thread = gpu_thread && system.IsDualCoreMode();
if (s_use_deterministic_gpu_thread != gpu_thread)
if (m_use_deterministic_gpu_thread != gpu_thread)
{
s_use_deterministic_gpu_thread = gpu_thread;
m_use_deterministic_gpu_thread = gpu_thread;
if (gpu_thread)
{
// These haven't been updated in non-deterministic mode.
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
m_video_buffer_seen_ptr = m_video_buffer_pp_read_ptr = m_video_buffer_read_ptr;
CopyPreprocessCPStateFromMain();
VertexLoaderManager::MarkAllDirty();
}
}
}
bool UseDeterministicGPUThread()
{
return s_use_deterministic_gpu_thread;
}
/* This function checks the emulated CPU - GPU distance and may wake up the GPU,
* or block the CPU if required. It should be called by the CPU thread regularly.
* @ticks The gone emulated CPU time.
* @return A good time to call WaitForGpuThread() next.
*/
static int WaitForGpuThread(int ticks)
int FifoManager::WaitForGpuThread(Core::System& system, int ticks)
{
int old = s_sync_ticks.fetch_add(ticks);
int old = m_sync_ticks.fetch_add(ticks);
int now = old + ticks;
// GPU is idle, so stop polling.
if (old >= 0 && s_gpu_mainloop.IsDone())
if (old >= 0 && m_gpu_mainloop.IsDone())
return -1;
// Wakeup GPU
if (old < s_config_sync_gpu_min_distance && now >= s_config_sync_gpu_min_distance)
RunGpu();
if (old < m_config_sync_gpu_min_distance && now >= m_config_sync_gpu_min_distance)
RunGpu(system);
// If the GPU is still sleeping, wait for a longer time
if (now < s_config_sync_gpu_min_distance)
return GPU_TIME_SLOT_SIZE + s_config_sync_gpu_min_distance - now;
if (now < m_config_sync_gpu_min_distance)
return GPU_TIME_SLOT_SIZE + m_config_sync_gpu_min_distance - now;
// Wait for GPU
if (now >= s_config_sync_gpu_max_distance)
s_sync_wakeup_event.Wait();
if (now >= m_config_sync_gpu_max_distance)
m_sync_wakeup_event.Wait();
return GPU_TIME_SLOT_SIZE;
}
static void SyncGPUCallback(Core::System& system, u64 ticks, s64 cyclesLate)
void FifoManager::SyncGPUCallback(Core::System& system, u64 ticks, s64 cyclesLate)
{
ticks += cyclesLate;
int next = -1;
if (!system.IsDualCoreMode() || s_use_deterministic_gpu_thread)
auto& fifo = system.GetFifo();
if (!system.IsDualCoreMode() || fifo.m_use_deterministic_gpu_thread)
{
next = RunGpuOnCpu((int)ticks);
next = fifo.RunGpuOnCpu(system, (int)ticks);
}
else if (s_config_sync_gpu)
else if (fifo.m_config_sync_gpu)
{
next = WaitForGpuThread((int)ticks);
next = fifo.WaitForGpuThread(system, (int)ticks);
}
s_syncing_suspended = next < 0;
if (!s_syncing_suspended)
system.GetCoreTiming().ScheduleEvent(next, s_event_sync_gpu, next);
fifo.m_syncing_suspended = next < 0;
if (!fifo.m_syncing_suspended)
system.GetCoreTiming().ScheduleEvent(next, fifo.m_event_sync_gpu, next);
}
void SyncGPUForRegisterAccess()
void FifoManager::SyncGPUForRegisterAccess(Core::System& system)
{
SyncGPU(SyncGPUReason::Other);
if (!Core::System::GetInstance().IsDualCoreMode() || s_use_deterministic_gpu_thread)
RunGpuOnCpu(GPU_TIME_SLOT_SIZE);
else if (s_config_sync_gpu)
WaitForGpuThread(GPU_TIME_SLOT_SIZE);
if (!system.IsDualCoreMode() || m_use_deterministic_gpu_thread)
RunGpuOnCpu(system, GPU_TIME_SLOT_SIZE);
else if (m_config_sync_gpu)
WaitForGpuThread(system, GPU_TIME_SLOT_SIZE);
}
// Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread.
void Prepare()
void FifoManager::Prepare(Core::System& system)
{
s_event_sync_gpu =
Core::System::GetInstance().GetCoreTiming().RegisterEvent("SyncGPUCallback", SyncGPUCallback);
s_syncing_suspended = true;
m_event_sync_gpu = system.GetCoreTiming().RegisterEvent("SyncGPUCallback", SyncGPUCallback);
m_syncing_suspended = true;
}
} // namespace Fifo

View File

@ -3,21 +3,28 @@
#pragma once
#include <atomic>
#include <cstddef>
#include <optional>
#include "Common/BlockingLoop.h"
#include "Common/CommonTypes.h"
#include "Common/Event.h"
#include "Common/Flag.h"
class PointerWrap;
namespace Core
{
class System;
}
namespace CoreTiming
{
struct EventType;
}
namespace Fifo
{
void Init();
void Shutdown();
void Prepare(); // Must be called from the CPU thread.
void DoState(PointerWrap& f);
void PauseAndLock(bool doLock, bool unpauseOnUnlock);
void UpdateWantDeterminism(bool want);
bool UseDeterministicGPUThread();
// Used for diagnostics.
enum class SyncGPUReason
{
@ -29,23 +36,96 @@ enum class SyncGPUReason
Swap,
AuxSpace,
};
// In deterministic GPU thread mode this waits for the GPU to be done with pending work.
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
// In single core mode, this runs the GPU for a single slice.
// In dual core mode, this synchronizes with the GPU thread.
void SyncGPUForRegisterAccess();
class FifoManager final
{
public:
FifoManager();
FifoManager(const FifoManager& other) = delete;
FifoManager(FifoManager&& other) = delete;
FifoManager& operator=(const FifoManager& other) = delete;
FifoManager& operator=(FifoManager&& other) = delete;
~FifoManager();
void PushFifoAuxBuffer(const void* ptr, size_t size);
void* PopFifoAuxBuffer(size_t size);
void Init(Core::System& system);
void Shutdown();
void Prepare(Core::System& system); // Must be called from the CPU thread.
void DoState(PointerWrap& f);
void PauseAndLock(Core::System& system, bool doLock, bool unpauseOnUnlock);
void UpdateWantDeterminism(Core::System& system, bool want);
bool UseDeterministicGPUThread() const { return m_use_deterministic_gpu_thread; }
void FlushGpu();
void RunGpu();
void GpuMaySleep();
void RunGpuLoop();
void ExitGpuLoop();
void EmulatorState(bool running);
bool AtBreakpoint();
void ResetVideoBuffer();
// In deterministic GPU thread mode this waits for the GPU to be done with pending work.
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
// In single core mode, this runs the GPU for a single slice.
// In dual core mode, this synchronizes with the GPU thread.
void SyncGPUForRegisterAccess(Core::System& system);
void PushFifoAuxBuffer(const void* ptr, size_t size);
void* PopFifoAuxBuffer(size_t size);
void FlushGpu(Core::System& system);
void RunGpu(Core::System& system);
void GpuMaySleep();
void RunGpuLoop(Core::System& system);
void ExitGpuLoop(Core::System& system);
void EmulatorState(bool running);
void ResetVideoBuffer();
private:
void RefreshConfig();
void ReadDataFromFifo(Core::System& system, u32 readPtr);
void ReadDataFromFifoOnCPU(Core::System& system, u32 readPtr);
int RunGpuOnCpu(Core::System& system, int ticks);
int WaitForGpuThread(Core::System& system, int ticks);
static void SyncGPUCallback(Core::System& system, u64 ticks, s64 cyclesLate);
static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;
Common::BlockingLoop m_gpu_mainloop;
Common::Flag m_emu_running_state;
// Most of this array is unlikely to be faulted in...
u8 m_fifo_aux_data[FIFO_SIZE]{};
u8* m_fifo_aux_write_ptr = nullptr;
u8* m_fifo_aux_read_ptr = nullptr;
// This could be in SConfig, but it depends on multiple settings
// and can change at runtime.
bool m_use_deterministic_gpu_thread = false;
CoreTiming::EventType* m_event_sync_gpu = nullptr;
// STATE_TO_SAVE
u8* m_video_buffer = nullptr;
u8* m_video_buffer_read_ptr = nullptr;
std::atomic<u8*> m_video_buffer_write_ptr = nullptr;
std::atomic<u8*> m_video_buffer_seen_ptr = nullptr;
u8* m_video_buffer_pp_read_ptr = nullptr;
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
// write_ptr, despite it being atomic. In deterministic GPU thread mode,
// things get a bit more complicated:
// - The seen_ptr is written by the GPU thread, and points to what it's already
// processed as much of as possible - in the case of a partial command which
// caused it to stop, not the same as the read ptr. It's written by the GPU,
// under the lock, and updating the cond.
// - The write_ptr is written by the CPU thread after it copies data from the
// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
std::atomic<int> m_sync_ticks = 0;
bool m_syncing_suspended = false;
Common::Event m_sync_wakeup_event;
std::optional<size_t> m_config_callback_id = std::nullopt;
bool m_config_sync_gpu = false;
int m_config_sync_gpu_max_distance = 0;
int m_config_sync_gpu_min_distance = 0;
float m_config_sync_gpu_overclock = 0.0f;
};
bool AtBreakpoint(Core::System& system);
} // namespace Fifo

View File

@ -151,13 +151,14 @@ public:
{
m_in_display_list = true;
auto& system = Core::System::GetInstance();
if constexpr (is_preprocess)
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
const u8* const start_address = memory.GetPointer(address);
Fifo::PushFifoAuxBuffer(start_address, size);
system.GetFifo().PushFifoAuxBuffer(start_address, size);
if (start_address != nullptr)
{
@ -168,13 +169,13 @@ public:
{
const u8* start_address;
if (Fifo::UseDeterministicGPUThread())
auto& fifo = system.GetFifo();
if (fifo.UseDeterministicGPUThread())
{
start_address = static_cast<u8*>(Fifo::PopFifoAuxBuffer(size));
start_address = static_cast<u8*>(fifo.PopFifoAuxBuffer(size));
}
else
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
start_address = memory.GetPointer(address);
}

View File

@ -333,7 +333,8 @@ static void RaiseEvent(int cycles_into_future)
CoreTiming::FromThread from = CoreTiming::FromThread::NON_CPU;
s64 cycles = 0; // we don't care about timings for dual core mode.
if (!Core::System::GetInstance().IsDualCoreMode() || Fifo::UseDeterministicGPUThread())
auto& system = Core::System::GetInstance();
if (!system.IsDualCoreMode() || system.GetFifo().UseDeterministicGPUThread())
{
from = CoreTiming::FromThread::CPU;

View File

@ -83,7 +83,8 @@ std::string VideoBackendBase::BadShaderFilename(const char* shader_stage, int co
void VideoBackendBase::Video_ExitLoop()
{
Fifo::ExitGpuLoop();
auto& system = Core::System::GetInstance();
system.GetFifo().ExitGpuLoop(system);
}
// Run from the CPU thread (from VideoInterface.cpp)
@ -92,7 +93,8 @@ void VideoBackendBase::Video_OutputXFB(u32 xfb_addr, u32 fb_width, u32 fb_stride
{
if (m_initialized && g_renderer && !g_ActiveConfig.bImmediateXFB)
{
Fifo::SyncGPU(Fifo::SyncGPUReason::Swap);
auto& system = Core::System::GetInstance();
system.GetFifo().SyncGPU(Fifo::SyncGPUReason::Swap);
AsyncRequests::Event e;
e.time = ticks;
@ -147,7 +149,8 @@ u32 VideoBackendBase::Video_GetQueryResult(PerfQueryType type)
return 0;
}
Fifo::SyncGPU(Fifo::SyncGPUReason::PerfQuery);
auto& system = Core::System::GetInstance();
system.GetFifo().SyncGPU(Fifo::SyncGPUReason::PerfQuery);
AsyncRequests::Event e;
e.time = 0;
@ -185,7 +188,8 @@ u16 VideoBackendBase::Video_GetBoundingBox(int index)
warn_once = false;
}
Fifo::SyncGPU(Fifo::SyncGPUReason::BBox);
auto& system = Core::System::GetInstance();
system.GetFifo().SyncGPU(Fifo::SyncGPUReason::BBox);
AsyncRequests::Event e;
u16 result;
@ -291,7 +295,8 @@ void VideoBackendBase::PopulateBackendInfoFromUI()
void VideoBackendBase::DoState(PointerWrap& p)
{
if (!Core::System::GetInstance().IsDualCoreMode())
auto& system = Core::System::GetInstance();
if (!system.IsDualCoreMode())
{
VideoCommon_DoState(p);
return;
@ -304,7 +309,7 @@ void VideoBackendBase::DoState(PointerWrap& p)
// Let the GPU thread sleep after loading the state, so we're not spinning if paused after loading
// a state. The next GP burst will wake it up again.
Fifo::GpuMaySleep();
system.GetFifo().GpuMaySleep();
}
void VideoBackendBase::InitializeShared()
@ -319,7 +324,7 @@ void VideoBackendBase::InitializeShared()
auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
command_processor.Init(system);
Fifo::Init();
system.GetFifo().Init(system);
PixelEngine::Init();
BPInit();
VertexLoaderManager::Init();
@ -336,6 +341,7 @@ void VideoBackendBase::ShutdownShared()
{
m_initialized = false;
auto& system = Core::System::GetInstance();
VertexLoaderManager::Clear();
Fifo::Shutdown();
system.GetFifo().Shutdown();
}

View File

@ -60,10 +60,10 @@ void VideoCommon_DoState(PointerWrap& p)
p.DoMarker("TMEM");
// FIFO
Fifo::DoState(p);
auto& system = Core::System::GetInstance();
system.GetFifo().DoState(p);
p.DoMarker("Fifo");
auto& system = Core::System::GetInstance();
auto& command_processor = system.GetCommandProcessor();
command_processor.DoState(p);
p.DoMarker("CommandProcessor");

View File

@ -257,13 +257,14 @@ void LoadIndexedXF(CPArray array, u32 index, u16 address, u8 size)
u32* currData = (u32*)(&xfmem) + address;
u32* newData;
if (Fifo::UseDeterministicGPUThread())
auto& system = Core::System::GetInstance();
auto& fifo = system.GetFifo();
if (fifo.UseDeterministicGPUThread())
{
newData = (u32*)Fifo::PopFifoAuxBuffer(size * sizeof(u32));
newData = (u32*)fifo.PopFifoAuxBuffer(size * sizeof(u32));
}
else
{
auto& system = Core::System::GetInstance();
auto& memory = system.GetMemory();
newData = (u32*)memory.GetPointer(g_main_cp_state.array_bases[array] +
g_main_cp_state.array_strides[array] * index);
@ -293,7 +294,7 @@ void PreprocessIndexedXF(CPArray array, u32 index, u16 address, u8 size)
g_preprocess_cp_state.array_strides[array] * index);
const size_t buf_size = size * sizeof(u32);
Fifo::PushFifoAuxBuffer(new_data, buf_size);
system.GetFifo().PushFifoAuxBuffer(new_data, buf_size);
}
std::pair<std::string, std::string> GetXFRegInfo(u32 address, u32 value)