From 831c982f3b93b31b13e0f8cb36cf8241228ccb25 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 7 Dec 2024 16:55:54 +1000 Subject: [PATCH] System: Rewrite memory save state handling Makes it more friendly to GPU thread. --- src/core/gpu.cpp | 80 +++++--- src/core/gpu.h | 7 +- src/core/gpu_hw.cpp | 134 +++++++++----- src/core/gpu_hw.h | 3 +- src/core/gpu_sw.cpp | 12 +- src/core/gpu_sw.h | 3 +- src/core/hotkeys.cpp | 11 +- src/core/system.cpp | 376 ++++++++++++++++++++++---------------- src/core/system.h | 2 +- src/core/system_private.h | 10 +- 10 files changed, 397 insertions(+), 241 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 606167bc2..028b3313d 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -285,16 +285,8 @@ void GPU::SoftReset() UpdateGPUIdle(); } -bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) +bool GPU::DoState(StateWrapper& sw, bool update_display) { - FlushRender(); - - if (sw.IsReading()) - { - // perform a reset to discard all pending draws/fb state - Reset(host_texture == nullptr); - } - sw.Do(&m_GPUSTAT.bits); sw.Do(&m_draw_mode.mode_reg.bits); @@ -390,34 +382,76 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ sw.Do(&m_max_run_ahead); sw.Do(&m_fifo_size); + if (!sw.DoMarker("GPU-VRAM")) + return false; + + sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + if (sw.IsReading()) { m_draw_mode.texture_page_changed = true; m_drawing_area_changed = true; SetClampedDrawingArea(); UpdateDMARequest(); - } - - if (!host_texture) - { - if (!sw.DoMarker("GPU-VRAM")) - return false; - - sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - } - - if (sw.IsReading()) - { UpdateCRTCConfig(); + UpdateCommandTickEvent(); + + // If we're paused, need to update the display FB. if (update_display) UpdateDisplay(); - - UpdateCommandTickEvent(); } return !sw.HasError(); } +bool GPU::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) +{ + sw.Do(&m_GPUSTAT.bits); + + sw.DoBytes(&m_draw_mode, sizeof(m_draw_mode)); + sw.DoBytes(&m_drawing_area, sizeof(m_drawing_area)); + sw.DoBytes(&m_drawing_offset, sizeof(m_drawing_offset)); + + sw.Do(&m_console_is_pal); + sw.Do(&m_set_texture_disable_mask); + + sw.DoBytes(&m_crtc_state, sizeof(m_crtc_state)); + + sw.Do(&m_blitter_state); + sw.Do(&m_pending_command_ticks); + sw.Do(&m_command_total_words); + sw.Do(&m_GPUREAD_latch); + + sw.Do(&m_current_clut_reg_bits); + sw.Do(&m_current_clut_is_8bit); + sw.DoBytes(g_gpu_clut, sizeof(g_gpu_clut)); + + sw.DoBytes(&m_vram_transfer, sizeof(m_vram_transfer)); + + sw.Do(&m_fifo); + sw.Do(&m_blit_buffer); + sw.Do(&m_blit_remaining_words); + sw.Do(&m_render_command.bits); + + sw.Do(&m_max_run_ahead); + sw.Do(&m_fifo_size); + + if (sw.IsReading()) + { + m_draw_mode.texture_page_changed = true; + m_drawing_area_changed = true; + SetClampedDrawingArea(); + UpdateDMARequest(); + UpdateCRTCConfig(); + UpdateCommandTickEvent(); + + if (update_display) + UpdateDisplay(); + } + + return true; +} + void GPU::RestoreDeviceContext() { } diff --git a/src/core/gpu.h b/src/core/gpu.h index cb57fa3d5..8f73b5023 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -45,6 +45,10 @@ namespace Threading { class Thread; } +namespace System { +struct MemorySaveState; +} + class GPU { public: @@ -95,7 +99,8 @@ public: virtual bool Initialize(Error* error); virtual void Reset(bool clear_vram); - virtual bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display); + virtual bool DoState(StateWrapper& sw, bool update_display); + virtual bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display); // Graphics API state reset/restore - call when drawing the UI etc. // TODO: replace with "invalidate cached state" diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 390d73ed5..c923790ea 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -9,7 +9,7 @@ #include "gpu_sw_rasterizer.h" #include "host.h" #include "settings.h" -#include "system.h" +#include "system_private.h" #include "util/imgui_manager.h" #include "util/postprocessing.h" @@ -315,14 +315,16 @@ void GPU_HW::Reset(bool clear_vram) ClearFramebuffer(); } -bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) +bool GPU_HW::DoState(StateWrapper& sw, bool update_display) { + FlushRender(); + // Need to download local VRAM copy before calling the base class, because it serializes this. if (m_sw_renderer) { m_sw_renderer->Sync(true); } - else if (sw.IsWriting() && !host_texture) + else if (sw.IsWriting()) { // If SW renderer readbacks aren't enabled, the CLUT won't be populated, which means it'll be invalid if the user // loads this state with software instead of hardware renderers. So force-update the CLUT. @@ -331,67 +333,109 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di GPU::ReadCLUT(g_gpu_clut, GPUTexturePaletteReg{Truncate16(m_current_clut_reg_bits)}, m_current_clut_is_8bit); } - if (!GPU::DoState(sw, host_texture, update_display)) + if (!GPU::DoState(sw, false)) return false; - if (host_texture) + if (sw.IsReading()) { - GPUTexture* tex = *host_texture; - if (sw.IsReading()) - { - if (tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || - tex->GetSamples() != m_vram_texture->GetSamples()) - { - return false; - } + // Wipe out state. + m_batch = {}; + m_current_depth = 1; + SetClampedDrawingArea(); - g_gpu_device->CopyTextureRegion(m_vram_texture.get(), 0, 0, 0, 0, tex, 0, 0, 0, 0, tex->GetWidth(), - tex->GetHeight()); - } - else - { - if (!tex || tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || - tex->GetSamples() != m_vram_texture->GetSamples()) - { - delete tex; - - // We copy to/from the save state texture, but we can't have multisampled non-RTs. - tex = g_gpu_device - ->FetchTexture( - m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(), - m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture, - GPUTexture::Format::RGBA8, GPUTexture::Flags::None) - .release(); - *host_texture = tex; - if (!tex) - return false; - } - - g_gpu_device->CopyTextureRegion(tex, 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 0, 0, tex->GetWidth(), - tex->GetHeight()); - } - } - else if (sw.IsReading()) - { // Need to update the VRAM copy on the GPU with the state data. // Would invalidate the TC, but base DoState() calls Reset(). UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); - } - // invalidate the whole VRAM read texture when loading state - if (sw.IsReading()) - { + // invalidate the whole VRAM read texture when loading state DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); ClearVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle(); UpdateVRAMReadTexture(true, false); ClearVRAMDirtyRectangle(); ResetBatchVertexDepth(); + + // refresh display, has to be done here because of the upload above + if (update_display) + UpdateDisplay(); } return GPUTextureCache::DoState(sw, !m_use_texture_cache); } +bool GPU_HW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) +{ + // sw-for-readbacks just makes a mess here + if (m_sw_renderer) + m_sw_renderer->Sync(true); + if (m_sw_renderer || m_use_texture_cache) + sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + + // This could be faster too. + if (m_use_texture_cache) + GPUTextureCache::DoState(sw, m_use_texture_cache); + + // Base class never fails. + GPU::DoMemoryState(sw, mss, false); + + if (sw.IsReading()) + { + if (m_batch_vertex_ptr) + UnmapGPUBuffer(0, 0); + + DebugAssert(mss.vram_texture->GetWidth() == m_vram_texture->GetWidth() && + mss.vram_texture->GetHeight() == m_vram_texture->GetHeight() && + mss.vram_texture->GetSamples() == m_vram_texture->GetSamples()); + g_gpu_device->CopyTextureRegion(m_vram_texture.get(), 0, 0, 0, 0, mss.vram_texture.get(), 0, 0, 0, 0, + m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + + // Wipe out state. + DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); + m_batch = {}; + SetClampedDrawingArea(); + ClearVRAMDirtyRectangle(); + SetFullVRAMDirtyRectangle(); + UpdateVRAMReadTexture(true, false); + ClearVRAMDirtyRectangle(); + ResetBatchVertexDepth(); + + if (update_display) + UpdateDisplay(); + } + else + { + FlushRender(); + + // saving state + if (!mss.vram_texture || mss.vram_texture->GetWidth() != m_vram_texture->GetWidth() || + mss.vram_texture->GetHeight() != m_vram_texture->GetHeight() || + mss.vram_texture->GetSamples() != m_vram_texture->GetSamples()) [[unlikely]] + { + g_gpu_device->RecycleTexture(std::move(mss.vram_texture)); + mss.vram_texture.reset(); + } + if (!mss.vram_texture) + { + // We copy to/from the save state texture, but we can't have multisampled non-RTs. + Error error; + mss.vram_texture = g_gpu_device->FetchTexture( + m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(), + m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, GPUTexture::Flags::None); + if (!mss.vram_texture) [[unlikely]] + { + ERROR_LOG("Failed to allocate VRAM texture for memory save state: {}", error.GetDescription()); + return false; + } + } + + g_gpu_device->CopyTextureRegion(mss.vram_texture.get(), 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 0, 0, + m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + } + + return true; +} + void GPU_HW::RestoreDeviceContext() { g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 77595b5ba..f40ecb7cd 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -68,7 +68,8 @@ public: bool Initialize(Error* error) override; void Reset(bool clear_vram) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + bool DoState(StateWrapper& sw, bool update_display) override; + bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) override; void RestoreDeviceContext() override; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 3020246d6..8aa99c1dc 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -7,6 +7,7 @@ #include "system.h" #include "util/gpu_device.h" +#include "util/state_wrapper.h" #include "common/align.h" #include "common/assert.h" @@ -58,19 +59,26 @@ bool GPU_SW::Initialize(Error* error) return true; } -bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) +bool GPU_SW::DoState(StateWrapper& sw, bool update_display) { // need to ensure the worker thread is done m_backend.Sync(true); // ignore the host texture for software mode, since we want to save vram here - if (!GPU::DoState(sw, nullptr, update_display)) + if (!GPU::DoState(sw, update_display)) return false; // need to still call the TC, to toss any data in the state return GPUTextureCache::DoState(sw, true); } +bool GPU_SW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) +{ + m_backend.Sync(true); + sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + return GPU::DoMemoryState(sw, mss, update_display); +} + void GPU_SW::Reset(bool clear_vram) { GPU::Reset(clear_vram); diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index b99bdd168..e113fa48e 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -30,7 +30,8 @@ public: bool IsHardwareRenderer() const override; bool Initialize(Error* error) override; - bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + bool DoState(StateWrapper& sw, bool update_display) override; + bool DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display) override; void Reset(bool clear_vram) override; void UpdateSettings(const Settings& old_settings) override; diff --git a/src/core/hotkeys.cpp b/src/core/hotkeys.cpp index 2d1857d20..f2ae0568c 100644 --- a/src/core/hotkeys.cpp +++ b/src/core/hotkeys.cpp @@ -58,9 +58,9 @@ static void HotkeyModifyResolutionScale(s32 increment) if (System::IsValid()) { + System::ClearMemorySaveStates(true); g_gpu->RestoreDeviceContext(); g_gpu->UpdateSettings(old_settings); - System::ClearMemorySaveStates(); } } @@ -375,11 +375,12 @@ DEFINE_HOTKEY("TogglePGXP", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_NOO [](s32 pressed) { if (!pressed && System::IsValid()) { + System::ClearMemorySaveStates(true); + Settings old_settings = g_settings; g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; g_gpu->RestoreDeviceContext(); g_gpu->UpdateSettings(old_settings); - System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXP", g_settings.gpu_pgxp_enable ? TRANSLATE_STR("OSDMessage", "PGXP is now enabled.") : @@ -448,12 +449,13 @@ DEFINE_HOTKEY("TogglePGXPDepth", TRANSLATE_NOOP("Hotkeys", "Graphics"), if (!g_settings.gpu_pgxp_enable) return; + System::ClearMemorySaveStates(true); + const Settings old_settings = g_settings; g_settings.gpu_pgxp_depth_buffer = !g_settings.gpu_pgxp_depth_buffer; g_gpu->RestoreDeviceContext(); g_gpu->UpdateSettings(old_settings); - System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXPDepth", g_settings.gpu_pgxp_depth_buffer ? TRANSLATE_STR("OSDMessage", "PGXP Depth Buffer is now enabled.") : @@ -469,12 +471,13 @@ DEFINE_HOTKEY("TogglePGXPCPU", TRANSLATE_NOOP("Hotkeys", "Graphics"), TRANSLATE_ if (!g_settings.gpu_pgxp_enable) return; + System::ClearMemorySaveStates(true); + const Settings old_settings = g_settings; g_settings.gpu_pgxp_cpu = !g_settings.gpu_pgxp_cpu; g_gpu->RestoreDeviceContext(); g_gpu->UpdateSettings(old_settings); - System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXPCPU", g_settings.gpu_pgxp_cpu ? TRANSLATE_STR("OSDMessage", "PGXP CPU mode is now enabled.") : diff --git a/src/core/system.cpp b/src/core/system.cpp index b57f65637..74d6886d5 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -76,8 +76,6 @@ #include #include #include -#include -#include #include #include #include @@ -199,7 +197,7 @@ static void UpdateMultitaps(); static std::string GetMediaPathFromSaveState(const char* path); static bool SaveUndoLoadState(); static void UpdateMemorySaveStateSettings(); -static bool LoadRewindState(u32 skip_saves = 0, bool consume_state = true); +static bool LoadOneRewindState(); static bool LoadStateFromBuffer(const SaveStateBuffer& buffer, Error* error, bool update_display); static bool LoadStateBufferFromFile(SaveStateBuffer* buffer, std::FILE* fp, Error* error, bool read_title, bool read_media_path, bool read_screenshot, bool read_data); @@ -210,6 +208,8 @@ static bool SaveStateBufferToFile(const SaveStateBuffer& buffer, std::FILE* fp, SaveStateCompressionMode compression_mode); static u32 CompressAndWriteStateData(std::FILE* fp, std::span src, SaveStateCompressionMode method, u32* header_type, Error* error); +static bool DoState(StateWrapper& sw, bool update_display); +static bool DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_display); static bool IsExecutionInterrupted(); static void CheckForAndExitExecution(); @@ -290,8 +290,9 @@ struct ALIGN_TO_CACHE_LINE StateVars s32 rewind_save_frequency = 0; s32 rewind_save_counter = 0; - std::deque runahead_states; - std::deque rewind_states; + std::vector memory_save_states; + u32 memory_save_state_front = 0; + u32 memory_save_state_count = 0; const BIOS::ImageInfo* bios_image_info = nullptr; BIOS::ImageInfo::Hash bios_hash = {}; @@ -308,8 +309,6 @@ struct ALIGN_TO_CACHE_LINE StateVars bool keep_gpu_device_on_shutdown = false; std::atomic_bool startup_cancelled{false}; - bool rewinding_first_save = false; - std::unique_ptr game_settings_interface; std::unique_ptr input_settings_interface; std::string input_profile_name; @@ -1175,14 +1174,14 @@ DiscRegion System::GetRegionForPsf(const char* path) bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool update_display /* = true*/) { - ClearMemorySaveStates(); + ClearMemorySaveStates(true); g_gpu->RestoreDeviceContext(); // save current state DynamicHeapArray state_data(GetMaxSaveStateSize()); { StateWrapper sw(state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); - if (!g_gpu->DoState(sw, nullptr, false) || !TimingEvents::DoState(sw)) + if (!g_gpu->DoState(sw, update_display) || !TimingEvents::DoState(sw)) { ERROR_LOG("Failed to save old GPU state when switching renderers"); state_data.deallocate(); @@ -1212,7 +1211,7 @@ bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_device, bool { StateWrapper sw(state_data.span(), StateWrapper::Mode::Read, SAVE_STATE_VERSION); g_gpu->RestoreDeviceContext(); - g_gpu->DoState(sw, nullptr, update_display); + g_gpu->DoState(sw, update_display); TimingEvents::DoState(sw); } @@ -1988,7 +1987,6 @@ bool System::Initialize(std::unique_ptr disc, DiscRegion disc_region, b s_state.rewind_load_counter = -1; s_state.rewind_save_frequency = -1; s_state.rewind_save_counter = -1; - s_state.rewinding_first_save = true; TimingEvents::Initialize(); @@ -2072,7 +2070,7 @@ void System::DestroySystem() if (g_settings.inhibit_screensaver) PlatformMisc::ResumeScreensaver(); - ClearMemorySaveStates(); + ClearMemorySaveStates(true); Cheats::UnloadAll(); PCDrv::Shutdown(); @@ -2532,7 +2530,7 @@ bool System::CreateGPU(GPURenderer renderer, bool is_switching, bool fullscreen, return true; } -bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state) +bool System::DoState(StateWrapper& sw, bool update_display) { if (!sw.DoMarker("System")) return false; @@ -2559,20 +2557,16 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di sw.Do(&s_state.frame_number); sw.Do(&s_state.internal_frame_number); - // Don't bother checking this at all for memory states, since they won't have a different BIOS... - if (!is_memory_state) + BIOS::ImageInfo::Hash bios_hash = s_state.bios_hash; + sw.DoBytesEx(bios_hash.data(), BIOS::ImageInfo::HASH_SIZE, 58, s_state.bios_hash.data()); + if (bios_hash != s_state.bios_hash) { - BIOS::ImageInfo::Hash bios_hash = s_state.bios_hash; - sw.DoBytesEx(bios_hash.data(), BIOS::ImageInfo::HASH_SIZE, 58, s_state.bios_hash.data()); - if (bios_hash != s_state.bios_hash) - { - WARNING_LOG("BIOS hash mismatch: System: {} | State: {}", BIOS::ImageInfo::GetHashString(s_state.bios_hash), - BIOS::ImageInfo::GetHashString(bios_hash)); - Host::AddIconOSDWarning( - "StateBIOSMismatch", ICON_FA_EXCLAMATION_TRIANGLE, - TRANSLATE_STR("System", "This save state was created with a different BIOS. This may cause stability issues."), - Host::OSD_WARNING_DURATION); - } + WARNING_LOG("BIOS hash mismatch: System: {} | State: {}", BIOS::ImageInfo::GetHashString(s_state.bios_hash), + BIOS::ImageInfo::GetHashString(bios_hash)); + Host::AddIconOSDWarning( + "StateBIOSMismatch", ICON_FA_EXCLAMATION_TRIANGLE, + TRANSLATE_STR("System", "This save state was created with a different BIOS. This may cause stability issues."), + Host::OSD_WARNING_DURATION); } if (!sw.DoMarker("CPU") || !CPU::DoState(sw)) @@ -2580,17 +2574,11 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di if (sw.IsReading()) { - if (is_memory_state) - CPU::CodeCache::InvalidateAllRAMBlocks(); - else - CPU::CodeCache::Reset(); + CPU::CodeCache::Reset(); + if (g_settings.gpu_pgxp_enable) + CPU::PGXP::Reset(); } - // only reset pgxp if we're not runahead-rollbacking. the value checks will save us from broken rendering, and it - // saves using imprecise values for a frame in 30fps games. - if (sw.IsReading() && g_settings.gpu_pgxp_enable && !is_memory_state) - CPU::PGXP::Reset(); - if (!sw.DoMarker("Bus") || !Bus::DoState(sw)) return false; @@ -2601,13 +2589,13 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di return false; g_gpu->RestoreDeviceContext(); - if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, host_texture, update_display)) + if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, update_display)) return false; if (!sw.DoMarker("CDROM") || !CDROM::DoState(sw)) return false; - if (!sw.DoMarker("Pad") || !Pad::DoState(sw, is_memory_state)) + if (!sw.DoMarker("Pad") || !Pad::DoState(sw, false)) return false; if (!sw.DoMarker("Timers") || !Timers::DoState(sw)) @@ -2656,26 +2644,153 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di UpdateOverclock(); } - if (!is_memory_state) + if (sw.GetVersion() >= 56) [[unlikely]] { - if (sw.GetVersion() >= 56) [[unlikely]] - { - if (!sw.DoMarker("Cheevos")) - return false; + if (!sw.DoMarker("Cheevos")) + return false; - if (!Achievements::DoState(sw)) - return false; - } - else - { - // loading an old state without cheevos, so reset the runtime - Achievements::ResetClient(); - } + if (!Achievements::DoState(sw)) + return false; + } + else + { + // loading an old state without cheevos, so reset the runtime + Achievements::ResetClient(); } return !sw.HasError(); } +System::MemorySaveState& System::AllocateMemoryState() +{ + const u32 max_count = static_cast(s_state.memory_save_states.size()); + DebugAssert(s_state.memory_save_state_count <= max_count); + if (s_state.memory_save_state_count < max_count) + s_state.memory_save_state_count++; + + MemorySaveState& ret = s_state.memory_save_states[s_state.memory_save_state_front]; + s_state.memory_save_state_front = (s_state.memory_save_state_front + 1) % max_count; + return ret; +} + +System::MemorySaveState& System::GetFirstMemoryState() +{ + const u32 max_count = static_cast(s_state.memory_save_states.size()); + DebugAssert(s_state.memory_save_state_count > 0); + + const s32 front = + static_cast(s_state.memory_save_state_front) - static_cast(s_state.memory_save_state_count); + const u32 idx = static_cast((front < 0) ? (front + static_cast(max_count)) : front); + return s_state.memory_save_states[idx]; +} + +System::MemorySaveState& System::PopMemoryState() +{ + const u32 max_count = static_cast(s_state.memory_save_states.size()); + DebugAssert(s_state.memory_save_state_count > 0); + s_state.memory_save_state_count--; + + const s32 front = static_cast(s_state.memory_save_state_front) - 1; + s_state.memory_save_state_front = static_cast((front < 0) ? (front + static_cast(max_count)) : front); + return s_state.memory_save_states[s_state.memory_save_state_front]; +} + +void System::ClearMemorySaveStates(bool deallocate_resources) +{ + if (deallocate_resources) + { + for (MemorySaveState& mss : s_state.memory_save_states) + { + g_gpu_device->RecycleTexture(std::move(mss.vram_texture)); + mss.state_data.deallocate(); + mss.state_size = 0; + } + } + + s_state.memory_save_state_front = 0; + s_state.memory_save_state_count = 0; +} + +void System::FreeMemoryStateStorage() +{ + for (MemorySaveState& mss : s_state.memory_save_states) + g_gpu_device->RecycleTexture(std::move(mss.vram_texture)); + s_state.memory_save_states = std::vector(); + s_state.memory_save_state_front = 0; + s_state.memory_save_state_count = 0; +} + +void System::LoadMemoryState(MemorySaveState& mss, bool update_display) +{ + StateWrapper sw(mss.state_data.cspan(0, mss.state_size), StateWrapper::Mode::Read, SAVE_STATE_VERSION); + [[maybe_unused]] const bool res = DoMemoryState(sw, mss, update_display); + DebugAssert(res); + + DEBUG_LOG("Loaded frame {} from memory state slot {}", s_state.frame_number, + &mss - s_state.memory_save_states.data()); +} + +bool System::SaveMemoryState(MemorySaveState& mss) +{ + DEBUG_LOG("Saving frame {} to memory state slot {}", s_state.frame_number, &mss - s_state.memory_save_states.data()); + + if (mss.state_data.empty()) + mss.state_data.resize(GetMaxSaveStateSize()); + + StateWrapper sw(mss.state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); + const bool res = DoMemoryState(sw, mss, false); + mss.state_size = sw.GetPosition(); + return res; +} + +bool System::DoMemoryState(StateWrapper& sw, MemorySaveState& mss, bool update_display) +{ +#if defined(_DEBUG) || defined(_DEVEL) +#define SAVE_COMPONENT(name, expr) \ + do \ + { \ + Assert(sw.DoMarker(name)); \ + if (!(expr)) [[unlikely]] \ + Panic("Failed to memory save " name); \ + } while (0) +#else +#define SAVE_COMPONENT(name, expr) expr +#endif + + sw.Do(&s_state.frame_number); + sw.Do(&s_state.internal_frame_number); + + SAVE_COMPONENT("CPU", CPU::DoState(sw)); + + // don't need to reset pgxp because the value checks will save us from broken rendering, and it + // saves using imprecise values for a frame in 30fps games. + // TODO: Save PGXP state to memory state instead. It'll be 8MB, but potentially worth it. + if (sw.IsReading()) + CPU::CodeCache::InvalidateAllRAMBlocks(); + + SAVE_COMPONENT("Bus", Bus::DoState(sw)); + SAVE_COMPONENT("DMA", DMA::DoState(sw)); + SAVE_COMPONENT("InterruptController", InterruptController::DoState(sw)); + + // GPU can fail due to running out of VRAM. + g_gpu->RestoreDeviceContext(); + if (!g_gpu->DoMemoryState(sw, mss, update_display)) [[unlikely]] + return false; + + SAVE_COMPONENT("CDROM", CDROM::DoState(sw)); + SAVE_COMPONENT("Pad", Pad::DoState(sw, true)); + SAVE_COMPONENT("Timers", Timers::DoState(sw)); + SAVE_COMPONENT("SPU", SPU::DoState(sw)); + SAVE_COMPONENT("MDEC", MDEC::DoState(sw)); + SAVE_COMPONENT("SIO", SIO::DoState(sw)); + SAVE_COMPONENT("Events", TimingEvents::DoState(sw)); + SAVE_COMPONENT("Achievements", Achievements::DoState(sw)); + +#undef SAVE_COMPONENT + + return true; +} + bool System::LoadBIOS(Error* error) { std::optional bios_image = BIOS::GetBIOSImage(s_state.region, error); @@ -2824,7 +2939,7 @@ bool System::LoadState(const char* path, Error* error, bool save_undo_state) SaveStateBuffer buffer; if (!LoadStateBufferFromFile(&buffer, fp.get(), error, false, true, false, true) || - !LoadStateFromBuffer(buffer, error, true)) + !LoadStateFromBuffer(buffer, error, IsPaused())) { if (save_undo_state) UndoLoadState(); @@ -2836,7 +2951,7 @@ bool System::LoadState(const char* path, Error* error, bool save_undo_state) return true; } -bool System::LoadStateFromBuffer(const SaveStateBuffer& buffer, Error* error, bool update_display_if_paused) +bool System::LoadStateFromBuffer(const SaveStateBuffer& buffer, Error* error, bool update_display) { Assert(IsValid()); @@ -2896,13 +3011,13 @@ bool System::LoadStateFromBuffer(const SaveStateBuffer& buffer, Error* error, bo if (g_settings.HasAnyPerGameMemoryCards()) UpdatePerGameMemoryCards(); - ClearMemorySaveStates(); + ClearMemorySaveStates(false); // Updating game/loading settings can turn on hardcore mode. Catch this. Achievements::DisableHardcoreMode(); StateWrapper sw(buffer.state_data.cspan(0, buffer.state_size), StateWrapper::Mode::Read, buffer.version); - if (!DoState(sw, nullptr, update_display_if_paused && IsPaused(), false)) + if (!DoState(sw, update_display)) { Error::SetStringView(error, "Save state stream is corrupted."); return false; @@ -2913,7 +3028,7 @@ bool System::LoadStateFromBuffer(const SaveStateBuffer& buffer, Error* error, bo PerformanceCounters::Reset(); ResetThrottler(); - if (update_display_if_paused && IsPaused()) + if (update_display) InvalidateDisplay(); return true; @@ -3236,7 +3351,7 @@ bool System::SaveStateToBuffer(SaveStateBuffer* buffer, Error* error, u32 screen g_gpu->RestoreDeviceContext(); StateWrapper sw(buffer->state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); - if (!DoState(sw, nullptr, false, false)) + if (!DoState(sw, false)) { Error::SetStringView(error, "DoState() failed"); return false; @@ -4031,6 +4146,8 @@ bool System::InsertMedia(const char* path) if (IsGPUDumpPath(path)) [[unlikely]] return ChangeGPUDump(path); + ClearMemorySaveStates(true); + Error error; std::unique_ptr image = CDImage::Open(path, g_settings.cdrom_load_image_patches, &error); const DiscRegion region = @@ -4063,14 +4180,13 @@ bool System::InsertMedia(const char* path) UpdatePerGameMemoryCards(); } - ClearMemorySaveStates(); return true; } void System::RemoveMedia() { + ClearMemorySaveStates(true); CDROM::RemoveMedia(false); - ClearMemorySaveStates(); } void System::UpdateRunningGame(const std::string& path, CDImage* image, bool booting) @@ -4284,6 +4400,8 @@ bool System::SwitchMediaSubImage(u32 index) if (!CDROM::HasMedia()) return false; + ClearMemorySaveStates(true); + std::unique_ptr image = CDROM::RemoveMedia(true); Assert(image); @@ -4319,8 +4437,6 @@ bool System::SwitchMediaSubImage(u32 index) fmt::format(TRANSLATE_FS("System", "Switched to sub-image {} ({}) in '{}'."), subimage_title, title, index + 1u, Path::GetFileName(CDROM::GetMediaPath())), Host::OSD_INFO_DURATION); - - ClearMemorySaveStates(); return true; } @@ -4374,7 +4490,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) if (IsValid()) { - ClearMemorySaveStates(); + ClearMemorySaveStates(false); if (g_settings.cpu_overclock_active != old_settings.cpu_overclock_active || (g_settings.cpu_overclock_active && @@ -4492,6 +4608,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.texture_replacements.dump_textures != old_settings.texture_replacements.dump_textures || g_settings.texture_replacements.config != old_settings.texture_replacements.config) { + ClearMemorySaveStates(true); g_gpu->UpdateSettings(old_settings); if (IsPaused()) InvalidateDisplay(); @@ -4890,15 +5007,9 @@ void System::CalculateRewindMemoryUsage(u32 num_saves, u32 resolution_scale, u64 static_cast(g_settings.gpu_multisamples) * static_cast(num_saves); } -void System::ClearMemorySaveStates() -{ - s_state.rewind_states.clear(); - s_state.runahead_states.clear(); -} - void System::UpdateMemorySaveStateSettings() { - ClearMemorySaveStates(); + FreeMemoryStateStorage(); if (IsReplayingGPUDump()) [[unlikely]] { @@ -4907,11 +5018,13 @@ void System::UpdateMemorySaveStateSettings() return; } - if (g_settings.rewind_enable) + u32 num_slots = 0; + if (g_settings.rewind_enable && !g_settings.IsRunaheadEnabled()) { s_state.rewind_save_frequency = static_cast(std::ceil(g_settings.rewind_save_frequency * s_state.video_frame_rate)); s_state.rewind_save_counter = 0; + num_slots = g_settings.rewind_save_slots; u64 ram_usage, vram_usage; CalculateRewindMemoryUsage(g_settings.rewind_save_slots, g_settings.gpu_resolution_scale, &ram_usage, &vram_usage); @@ -4931,97 +5044,53 @@ void System::UpdateMemorySaveStateSettings() s_state.runahead_frames = g_settings.runahead_frames; s_state.runahead_replay_pending = false; if (s_state.runahead_frames > 0) + { INFO_LOG("Runahead is active with {} frames", s_state.runahead_frames); + num_slots = s_state.runahead_frames; + } + + // allocate storage for memory save states + if (num_slots > 0) + { + DEV_LOG("Allocating {} memory save state slots", num_slots); + s_state.memory_save_states.resize(num_slots); + } // reenter execution loop, don't want to try to save a state now if runahead was turned off InterruptExecution(); } -bool System::LoadMemoryState(const MemorySaveState& mss) -{ - StateWrapper sw(mss.state_data.cspan(), StateWrapper::Mode::Read, SAVE_STATE_VERSION); - GPUTexture* host_texture = mss.vram_texture.get(); - if (!DoState(sw, &host_texture, true, true)) [[unlikely]] - { - Host::ReportErrorAsync("Error", "Failed to load memory save state, resetting."); - ResetSystem(); - return false; - } - - return true; -} - -bool System::SaveMemoryState(MemorySaveState* mss) -{ - if (mss->state_data.empty()) - mss->state_data.resize(GetMaxSaveStateSize()); - - GPUTexture* host_texture = mss->vram_texture.release(); - StateWrapper sw(mss->state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); - if (!DoState(sw, &host_texture, false, true)) - { - ERROR_LOG("Failed to create rewind state."); - delete host_texture; - return false; - } - -#ifdef PROFILE_MEMORY_SAVE_STATES - mss->state_size = sw.GetPosition(); -#endif - - mss->vram_texture.reset(host_texture); - return true; -} - bool System::SaveRewindState() { #ifdef PROFILE_MEMORY_SAVE_STATES Timer save_timer; #endif - // try to reuse the frontmost slot - const u32 save_slots = g_settings.rewind_save_slots; - MemorySaveState mss; - while (s_state.rewind_states.size() >= save_slots) + MemorySaveState& mss = AllocateMemoryState(); + if (!SaveMemoryState(mss)) { - mss = std::move(s_state.rewind_states.front()); - s_state.rewind_states.pop_front(); + PopMemoryState(); + return false; } - if (!SaveMemoryState(&mss)) - return false; - - s_state.rewind_states.push_back(std::move(mss)); - #ifdef PROFILE_MEMORY_SAVE_STATES - DEV_LOG("Saved rewind state ({} bytes, took {:.4f} ms)", s_state.rewind_states.back().state_size, - save_timer.GetTimeMilliseconds()); + DEV_LOG("Saved rewind state ({} bytes, took {:.4f} ms)", mss.state_size, save_timer.GetTimeMilliseconds()); #endif return true; } -bool System::LoadRewindState(u32 skip_saves /*= 0*/, bool consume_state /*=true */) +bool System::LoadOneRewindState() { - while (skip_saves > 0 && !s_state.rewind_states.empty()) - { - g_gpu_device->RecycleTexture(std::move(s_state.rewind_states.back().vram_texture)); - s_state.rewind_states.pop_back(); - skip_saves--; - } - - if (s_state.rewind_states.empty()) + if (s_state.memory_save_state_count == 0) return false; #ifdef PROFILE_MEMORY_SAVE_STATES Timer load_timer; #endif - if (!LoadMemoryState(s_state.rewind_states.back())) - return false; - - if (consume_state) - s_state.rewind_states.pop_back(); + MemorySaveState& mss = PopMemoryState(); + LoadMemoryState(mss, true); // back in time, need to reset perf counters PerformanceCounters::Reset(); @@ -5050,13 +5119,16 @@ void System::SetRewinding(bool enabled) s_state.rewind_load_counter = 0; if (!was_enabled && s_state.system_executing) + { + // Drop the save we just created, since we don't want to rewind to where we are. + PopMemoryState(); s_state.system_interrupted = true; + } } else { s_state.rewind_load_frequency = -1; s_state.rewind_load_counter = -1; - s_state.rewinding_first_save = true; } } @@ -5064,9 +5136,7 @@ void System::DoRewind() { if (s_state.rewind_load_counter == 0) { - const u32 skip_saves = BoolToUInt32(!s_state.rewinding_first_save); - s_state.rewinding_first_save = false; - LoadRewindState(skip_saves, false); + LoadOneRewindState(); s_state.rewind_load_counter = s_state.rewind_load_frequency; } else @@ -5084,20 +5154,9 @@ void System::DoRewind() void System::SaveRunaheadState() { // try to reuse the frontmost slot - MemorySaveState mss; - while (s_state.runahead_states.size() >= s_state.runahead_frames) - { - mss = std::move(s_state.runahead_states.front()); - s_state.runahead_states.pop_front(); - } - - if (!SaveMemoryState(&mss)) - { - ERROR_LOG("Failed to save runahead state."); - return; - } - - s_state.runahead_states.push_back(std::move(mss)); + MemorySaveState& mss = AllocateMemoryState(); + if (!SaveMemoryState(mss)) + PopMemoryState(); } bool System::DoRunahead() @@ -5109,23 +5168,22 @@ bool System::DoRunahead() if (s_state.runahead_replay_pending) { #ifdef PROFILE_MEMORY_SAVE_STATES - DEV_LOG("runahead starting at frame {}", s_state.frame_number); + DEBUG_LOG("runahead starting at frame {}", s_state.frame_number); replay_timer.Reset(); #endif // we need to replay and catch up - load the state, s_state.runahead_replay_pending = false; - if (s_state.runahead_states.empty() || !LoadMemoryState(s_state.runahead_states.front())) - { - s_state.runahead_states.clear(); + if (s_state.memory_save_state_count == 0) return false; - } + + LoadMemoryState(GetFirstMemoryState(), false); // figure out how many frames we need to run to catch up - s_state.runahead_replay_frames = static_cast(s_state.runahead_states.size()); + s_state.runahead_replay_frames = s_state.memory_save_state_count; // and throw away all the states, forcing us to catch up below - s_state.runahead_states.clear(); + ClearMemorySaveStates(false); // run the frames with no audio SPU::SetAudioOutputMuted(true); @@ -5170,7 +5228,7 @@ bool System::DoRunahead() void System::SetRunaheadReplayFlag() { - if (s_state.runahead_frames == 0 || s_state.runahead_states.empty()) + if (s_state.runahead_frames == 0 || s_state.memory_save_state_count == 0) return; #ifdef PROFILE_MEMORY_SAVE_STATES @@ -5230,7 +5288,7 @@ bool System::UndoLoadState() Assert(IsValid()); Error error; - if (!LoadStateFromBuffer(s_state.undo_load_state.value(), &error, true)) + if (!LoadStateFromBuffer(s_state.undo_load_state.value(), &error, IsPaused())) { Host::ReportErrorAsync("Error", fmt::format("Failed to load undo state, resetting system:\n", error.GetDescription())); diff --git a/src/core/system.h b/src/core/system.h index be7f612e8..e23468bdf 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -421,7 +421,7 @@ void InvalidateDisplay(); // Memory Save States (Rewind and Runahead) ////////////////////////////////////////////////////////////////////////// void CalculateRewindMemoryUsage(u32 num_saves, u32 resolution_scale, u64* ram_usage, u64* vram_usage); -void ClearMemorySaveStates(); +void ClearMemorySaveStates(bool deallocate_resources); void SetRunaheadReplayFlag(); /// Shared socket multiplexer, used by PINE/GDB/etc. diff --git a/src/core/system_private.h b/src/core/system_private.h index e79882ff3..de3ccab3e 100644 --- a/src/core/system_private.h +++ b/src/core/system_private.h @@ -17,14 +17,16 @@ struct MemorySaveState size_t state_size; }; -bool SaveMemoryState(MemorySaveState* mss); -bool LoadMemoryState(const MemorySaveState& mss); +MemorySaveState& AllocateMemoryState(); +MemorySaveState& GetFirstMemoryState(); +MemorySaveState& PopMemoryState(); +void FreeMemoryStateStorage(); +void LoadMemoryState(MemorySaveState& mss, bool update_display); +bool SaveMemoryState(MemorySaveState& mss); /// Returns the maximum size of a save state, considering the current configuration. size_t GetMaxSaveStateSize(); -bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state); - void IncrementFrameNumber(); void IncrementInternalFrameNumber(); void FrameDone();