Merge pull request #6321 from stenzek/efb-savestates

Support saving EFB and texture cache in save states
This commit is contained in:
Connor McLaughlin 2019-07-25 13:50:57 +10:00 committed by GitHub
commit ac9912bad3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 835 additions and 165 deletions

View File

@ -14,3 +14,8 @@
[Video_Stereoscopy] [Video_Stereoscopy]
StereoConvergence = 5000 StereoConvergence = 5000
[Video_Settings]
# This game creates a large number of EFB copies at different addresses, resulting
# in a large texture cache which takes considerable time to save.
SaveTextureCacheToState = False

View File

@ -0,0 +1,18 @@
# NATJ01, NATP01, NATE01 - Mario Tennis (Virtual Console)
[Core]
# Values set here will override the main Dolphin settings.
[OnLoad]
# Add memory patches to be loaded once on boot here.
[OnFrame]
# Add memory patches to be applied every frame here.
[ActionReplay]
# Add action replay cheats here.
[Video_Settings]
# This game creates a large number of EFB copies at different addresses, resulting
# in a large texture cache which takes considerable time to save.
SaveTextureCacheToState = False

View File

@ -91,6 +91,8 @@ const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS{
{System::GFX, "Settings", "ShaderCompilerThreads"}, 1}; {System::GFX, "Settings", "ShaderCompilerThreads"}, 1};
const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS{ const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS{
{System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1}; {System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1};
const ConfigInfo<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
const ConfigInfo<bool> GFX_SW_ZCOMPLOC{{System::GFX, "Settings", "SWZComploc"}, true}; const ConfigInfo<bool> GFX_SW_ZCOMPLOC{{System::GFX, "Settings", "SWZComploc"}, true};
const ConfigInfo<bool> GFX_SW_ZFREEZE{{System::GFX, "Settings", "SWZFreeze"}, true}; const ConfigInfo<bool> GFX_SW_ZFREEZE{{System::GFX, "Settings", "SWZFreeze"}, true};

View File

@ -67,6 +67,7 @@ extern const ConfigInfo<bool> GFX_WAIT_FOR_SHADERS_BEFORE_STARTING;
extern const ConfigInfo<ShaderCompilationMode> GFX_SHADER_COMPILATION_MODE; extern const ConfigInfo<ShaderCompilationMode> GFX_SHADER_COMPILATION_MODE;
extern const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS; extern const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS;
extern const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS; extern const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS;
extern const ConfigInfo<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
extern const ConfigInfo<bool> GFX_SW_ZCOMPLOC; extern const ConfigInfo<bool> GFX_SW_ZCOMPLOC;
extern const ConfigInfo<bool> GFX_SW_ZFREEZE; extern const ConfigInfo<bool> GFX_SW_ZFREEZE;

View File

@ -90,6 +90,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location)
Config::GFX_SHADER_COMPILATION_MODE.location, Config::GFX_SHADER_COMPILATION_MODE.location,
Config::GFX_SHADER_COMPILER_THREADS.location, Config::GFX_SHADER_COMPILER_THREADS.location,
Config::GFX_SHADER_PRECOMPILER_THREADS.location, Config::GFX_SHADER_PRECOMPILER_THREADS.location,
Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE.location,
Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZCOMPLOC.location,
Config::GFX_SW_ZFREEZE.location, Config::GFX_SW_ZFREEZE.location,

View File

@ -21,6 +21,7 @@
#include "Common/CPUDetect.h" #include "Common/CPUDetect.h"
#include "Common/CommonPaths.h" #include "Common/CommonPaths.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Event.h"
#include "Common/FileUtil.h" #include "Common/FileUtil.h"
#include "Common/Flag.h" #include "Common/Flag.h"
#include "Common/Logging/LogManager.h" #include "Common/Logging/LogManager.h"
@ -110,6 +111,7 @@ struct HostJob
}; };
static std::mutex s_host_jobs_lock; static std::mutex s_host_jobs_lock;
static std::queue<HostJob> s_host_jobs_queue; static std::queue<HostJob> s_host_jobs_queue;
static Common::Event s_cpu_thread_job_finished;
static thread_local bool tls_is_cpu_thread = false; static thread_local bool tls_is_cpu_thread = false;
@ -433,6 +435,7 @@ static void EmuThread(std::unique_ptr<BootParameters> boot, WindowSystemInfo wsi
Common::ScopeGuard movie_guard{Movie::Shutdown}; Common::ScopeGuard movie_guard{Movie::Shutdown};
HW::Init(); HW::Init();
Common::ScopeGuard hw_guard{[] { Common::ScopeGuard hw_guard{[] {
// We must set up this flag before executing HW::Shutdown() // We must set up this flag before executing HW::Shutdown()
s_hardware_initialized = false; s_hardware_initialized = false;
@ -771,6 +774,45 @@ void RunAsCPUThread(std::function<void()> function)
PauseAndLock(false, was_unpaused); PauseAndLock(false, was_unpaused);
} }
void RunOnCPUThread(std::function<void()> function, bool wait_for_completion)
{
// If the CPU thread is not running, assume there is no active CPU thread we can race against.
if (!IsRunning() || IsCPUThread())
{
function();
return;
}
// Pause the CPU (set it to stepping mode).
const bool was_running = PauseAndLock(true, true);
// Queue the job function.
if (wait_for_completion)
{
// Trigger the event after executing the function.
s_cpu_thread_job_finished.Reset();
CPU::AddCPUThreadJob([&function]() {
function();
s_cpu_thread_job_finished.Set();
});
}
else
{
CPU::AddCPUThreadJob(std::move(function));
}
// Release the CPU thread, and let it execute the callback.
PauseAndLock(false, was_running);
// If we're waiting for completion, block until the event fires.
if (wait_for_completion)
{
// Periodically yield to the UI thread, so we don't deadlock.
while (!s_cpu_thread_job_finished.WaitFor(std::chrono::milliseconds(10)))
Host_YieldToUI();
}
}
// Display FPS info // Display FPS info
// This should only be called from VI // This should only be called from VI
void VideoThrottle() void VideoThrottle()

View File

@ -82,6 +82,10 @@ void UpdateTitle();
// This should only be called from the CPU thread or the host thread. // This should only be called from the CPU thread or the host thread.
void RunAsCPUThread(std::function<void()> function); void RunAsCPUThread(std::function<void()> function);
// Run a function on the CPU thread, asynchronously.
// This is only valid to call from the host thread, since it uses PauseAndLock() internally.
void RunOnCPUThread(std::function<void()> function, bool wait_for_completion);
// for calling back into UI code without introducing a dependency on it in core // for calling back into UI code without introducing a dependency on it in core
using StateChangedCallbackFunc = std::function<void(Core::State)>; using StateChangedCallbackFunc = std::function<void(Core::State)>;
void SetOnStateChangedCallback(StateChangedCallbackFunc callback); void SetOnStateChangedCallback(StateChangedCallbackFunc callback);

View File

@ -6,6 +6,7 @@
#include <condition_variable> #include <condition_variable>
#include <mutex> #include <mutex>
#include <queue>
#include "AudioCommon/AudioCommon.h" #include "AudioCommon/AudioCommon.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
@ -44,6 +45,7 @@ static bool s_state_paused_and_locked = false;
static bool s_state_system_request_stepping = false; static bool s_state_system_request_stepping = false;
static bool s_state_cpu_step_instruction = false; static bool s_state_cpu_step_instruction = false;
static Common::Event* s_state_cpu_step_instruction_sync = nullptr; static Common::Event* s_state_cpu_step_instruction_sync = nullptr;
static std::queue<std::function<void()>> s_pending_jobs;
void Init(PowerPC::CPUCore cpu_core) void Init(PowerPC::CPUCore cpu_core)
{ {
@ -60,6 +62,9 @@ void Shutdown()
// Requires holding s_state_change_lock // Requires holding s_state_change_lock
static void FlushStepSyncEventLocked() static void FlushStepSyncEventLocked()
{ {
if (!s_state_cpu_step_instruction)
return;
if (s_state_cpu_step_instruction_sync) if (s_state_cpu_step_instruction_sync)
{ {
s_state_cpu_step_instruction_sync->Set(); s_state_cpu_step_instruction_sync->Set();
@ -68,12 +73,25 @@ static void FlushStepSyncEventLocked()
s_state_cpu_step_instruction = false; s_state_cpu_step_instruction = false;
} }
static void ExecutePendingJobs(std::unique_lock<std::mutex>& state_lock)
{
while (!s_pending_jobs.empty())
{
auto callback = s_pending_jobs.front();
s_pending_jobs.pop();
state_lock.unlock();
callback();
state_lock.lock();
}
}
void Run() void Run()
{ {
std::unique_lock<std::mutex> state_lock(s_state_change_lock); std::unique_lock<std::mutex> state_lock(s_state_change_lock);
while (s_state != State::PowerDown) while (s_state != State::PowerDown)
{ {
s_state_cpu_cvar.wait(state_lock, [] { return !s_state_paused_and_locked; }); s_state_cpu_cvar.wait(state_lock, [] { return !s_state_paused_and_locked; });
ExecutePendingJobs(state_lock);
switch (s_state) switch (s_state)
{ {
@ -108,8 +126,10 @@ void Run()
case State::Stepping: case State::Stepping:
// Wait for step command. // Wait for step command.
s_state_cpu_cvar.wait(state_lock, s_state_cpu_cvar.wait(state_lock, [&state_lock] {
[] { return s_state_cpu_step_instruction || !IsStepping(); }); ExecutePendingJobs(state_lock);
return s_state_cpu_step_instruction || !IsStepping();
});
if (!IsStepping()) if (!IsStepping())
{ {
// Signal event if the mode changes. // Signal event if the mode changes.
@ -330,4 +350,11 @@ bool PauseAndLock(bool do_lock, bool unpause_on_unlock, bool control_adjacent)
} }
return was_unpaused; return was_unpaused;
} }
void AddCPUThreadJob(std::function<void()> function)
{
std::unique_lock<std::mutex> state_lock(s_state_change_lock);
s_pending_jobs.push(std::move(function));
}
} // namespace CPU } // namespace CPU

View File

@ -3,6 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#pragma once #pragma once
#include <functional>
namespace Common namespace Common
{ {
@ -74,4 +75,8 @@ const State* GetStatePtr();
// "control_adjacent" causes PauseAndLock to behave like EnableStepping by modifying the // "control_adjacent" causes PauseAndLock to behave like EnableStepping by modifying the
// state of the Audio and FIFO subsystems as well. // state of the Audio and FIFO subsystems as well.
bool PauseAndLock(bool do_lock, bool unpause_on_unlock = true, bool control_adjacent = false); bool PauseAndLock(bool do_lock, bool unpause_on_unlock = true, bool control_adjacent = false);
// Adds a job to be executed during on the CPU thread. This should be combined with PauseAndLock(),
// as while the CPU is in the run loop, it won't execute the function.
void AddCPUThreadJob(std::function<void()> function);
} // namespace CPU } // namespace CPU

View File

@ -63,7 +63,7 @@ static AfterLoadCallbackFunc s_on_after_load_callback;
// Temporary undo state buffer // Temporary undo state buffer
static std::vector<u8> g_undo_load_buffer; static std::vector<u8> g_undo_load_buffer;
static std::vector<u8> g_current_buffer; static std::vector<u8> g_current_buffer;
static int g_loadDepth = 0; static bool s_load_or_save_in_progress;
static std::mutex g_cs_undo_load_buffer; static std::mutex g_cs_undo_load_buffer;
static std::mutex g_cs_current_buffer; static std::mutex g_cs_current_buffer;
@ -72,7 +72,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread; static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system // Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 110; // Last changed in PR 8036 static const u32 STATE_VERSION = 111; // Last changed in PR 6321
// Maps savestate versions to Dolphin versions. // Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list, // Versions after 42 don't need to be added to this list,
@ -170,6 +170,11 @@ static void DoState(PointerWrap& p)
return; return;
} }
// Movie must be done before the video backend, because the window is redrawn in the video backend
// state load, and the frame number must be up-to-date.
Movie::DoState(p);
p.DoMarker("Movie");
// Begin with video backend, so that it gets a chance to clear its caches and writeback modified // Begin with video backend, so that it gets a chance to clear its caches and writeback modified
// things to RAM // things to RAM
g_video_backend->DoState(p); g_video_backend->DoState(p);
@ -186,8 +191,6 @@ static void DoState(PointerWrap& p)
if (SConfig::GetInstance().bWii) if (SConfig::GetInstance().bWii)
Wiimote::DoState(p); Wiimote::DoState(p);
p.DoMarker("Wiimote"); p.DoMarker("Wiimote");
Movie::DoState(p);
p.DoMarker("Movie");
Gecko::DoState(p); Gecko::DoState(p);
p.DoMarker("Gecko"); p.DoMarker("Gecko");
@ -204,27 +207,31 @@ void LoadFromBuffer(std::vector<u8>& buffer)
return; return;
} }
Core::RunAsCPUThread([&] { Core::RunOnCPUThread(
u8* ptr = &buffer[0]; [&] {
PointerWrap p(&ptr, PointerWrap::MODE_READ); u8* ptr = &buffer[0];
DoState(p); PointerWrap p(&ptr, PointerWrap::MODE_READ);
}); DoState(p);
},
true);
} }
void SaveToBuffer(std::vector<u8>& buffer) void SaveToBuffer(std::vector<u8>& buffer)
{ {
Core::RunAsCPUThread([&] { Core::RunOnCPUThread(
u8* ptr = nullptr; [&] {
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE); u8* ptr = nullptr;
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE);
DoState(p); DoState(p);
const size_t buffer_size = reinterpret_cast<size_t>(ptr); const size_t buffer_size = reinterpret_cast<size_t>(ptr);
buffer.resize(buffer_size); buffer.resize(buffer_size);
ptr = &buffer[0]; ptr = &buffer[0];
p.SetMode(PointerWrap::MODE_WRITE); p.SetMode(PointerWrap::MODE_WRITE);
DoState(p); DoState(p);
}); },
true);
} }
// return state number not in map // return state number not in map
@ -381,42 +388,51 @@ static void CompressAndDumpState(CompressAndDumpState_args save_args)
void SaveAs(const std::string& filename, bool wait) void SaveAs(const std::string& filename, bool wait)
{ {
Core::RunAsCPUThread([&] { if (s_load_or_save_in_progress)
// Measure the size of the buffer. return;
u8* ptr = nullptr;
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE);
DoState(p);
const size_t buffer_size = reinterpret_cast<size_t>(ptr);
// Then actually do the write. s_load_or_save_in_progress = true;
{
std::lock_guard<std::mutex> lk(g_cs_current_buffer);
g_current_buffer.resize(buffer_size);
ptr = &g_current_buffer[0];
p.SetMode(PointerWrap::MODE_WRITE);
DoState(p);
}
if (p.GetMode() == PointerWrap::MODE_WRITE) Core::RunOnCPUThread(
{ [&] {
Core::DisplayMessage("Saving State...", 1000); // Measure the size of the buffer.
u8* ptr = nullptr;
PointerWrap p(&ptr, PointerWrap::MODE_MEASURE);
DoState(p);
const size_t buffer_size = reinterpret_cast<size_t>(ptr);
CompressAndDumpState_args save_args; // Then actually do the write.
save_args.buffer_vector = &g_current_buffer; {
save_args.buffer_mutex = &g_cs_current_buffer; std::lock_guard<std::mutex> lk(g_cs_current_buffer);
save_args.filename = filename; g_current_buffer.resize(buffer_size);
save_args.wait = wait; ptr = &g_current_buffer[0];
p.SetMode(PointerWrap::MODE_WRITE);
DoState(p);
}
Flush(); if (p.GetMode() == PointerWrap::MODE_WRITE)
g_save_thread = std::thread(CompressAndDumpState, save_args); {
g_compressAndDumpStateSyncEvent.Wait(); Core::DisplayMessage("Saving State...", 1000);
}
else CompressAndDumpState_args save_args;
{ save_args.buffer_vector = &g_current_buffer;
// someone aborted the save by changing the mode? save_args.buffer_mutex = &g_cs_current_buffer;
Core::DisplayMessage("Unable to save: Internal DoState Error", 4000); save_args.filename = filename;
} save_args.wait = wait;
});
Flush();
g_save_thread = std::thread(CompressAndDumpState, save_args);
g_compressAndDumpStateSyncEvent.Wait();
}
else
{
// someone aborted the save by changing the mode?
Core::DisplayMessage("Unable to save: Internal DoState Error", 4000);
}
},
true);
s_load_or_save_in_progress = false;
} }
bool ReadHeader(const std::string& filename, StateHeader& header) bool ReadHeader(const std::string& filename, StateHeader& header)
@ -515,7 +531,7 @@ static void LoadFileStateData(const std::string& filename, std::vector<u8>& ret_
void LoadAs(const std::string& filename) void LoadAs(const std::string& filename)
{ {
if (!Core::IsRunning()) if (!Core::IsRunning() || s_load_or_save_in_progress)
{ {
return; return;
} }
@ -525,64 +541,65 @@ void LoadAs(const std::string& filename)
return; return;
} }
Core::RunAsCPUThread([&] { s_load_or_save_in_progress = true;
g_loadDepth++;
// Save temp buffer for undo load state Core::RunOnCPUThread(
if (!Movie::IsJustStartingRecordingInputFromSaveState()) [&] {
{ // Save temp buffer for undo load state
std::lock_guard<std::mutex> lk(g_cs_undo_load_buffer); if (!Movie::IsJustStartingRecordingInputFromSaveState())
SaveToBuffer(g_undo_load_buffer); {
if (Movie::IsMovieActive()) std::lock_guard<std::mutex> lk(g_cs_undo_load_buffer);
Movie::SaveRecording(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"); SaveToBuffer(g_undo_load_buffer);
else if (File::Exists(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm")) if (Movie::IsMovieActive())
File::Delete(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"); Movie::SaveRecording(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm");
} else if (File::Exists(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm"))
File::Delete(File::GetUserPath(D_STATESAVES_IDX) + "undo.dtm");
}
bool loaded = false; bool loaded = false;
bool loadedSuccessfully = false; bool loadedSuccessfully = false;
// brackets here are so buffer gets freed ASAP // brackets here are so buffer gets freed ASAP
{ {
std::vector<u8> buffer; std::vector<u8> buffer;
LoadFileStateData(filename, buffer); LoadFileStateData(filename, buffer);
if (!buffer.empty()) if (!buffer.empty())
{ {
u8* ptr = &buffer[0]; u8* ptr = &buffer[0];
PointerWrap p(&ptr, PointerWrap::MODE_READ); PointerWrap p(&ptr, PointerWrap::MODE_READ);
DoState(p); DoState(p);
loaded = true; loaded = true;
loadedSuccessfully = (p.GetMode() == PointerWrap::MODE_READ); loadedSuccessfully = (p.GetMode() == PointerWrap::MODE_READ);
} }
} }
if (loaded) if (loaded)
{ {
if (loadedSuccessfully) if (loadedSuccessfully)
{ {
Core::DisplayMessage(StringFromFormat("Loaded state from %s", filename.c_str()), 2000); Core::DisplayMessage(StringFromFormat("Loaded state from %s", filename.c_str()), 2000);
if (File::Exists(filename + ".dtm")) if (File::Exists(filename + ".dtm"))
Movie::LoadInput(filename + ".dtm"); Movie::LoadInput(filename + ".dtm");
else if (!Movie::IsJustStartingRecordingInputFromSaveState() && else if (!Movie::IsJustStartingRecordingInputFromSaveState() &&
!Movie::IsJustStartingPlayingInputFromSaveState()) !Movie::IsJustStartingPlayingInputFromSaveState())
Movie::EndPlayInput(false); Movie::EndPlayInput(false);
} }
else else
{ {
Core::DisplayMessage("The savestate could not be loaded", OSD::Duration::NORMAL); Core::DisplayMessage("The savestate could not be loaded", OSD::Duration::NORMAL);
// since we could be in an inconsistent state now (and might crash or whatever), undo. // since we could be in an inconsistent state now (and might crash or whatever), undo.
if (g_loadDepth < 2) UndoLoadState();
UndoLoadState(); }
} }
}
if (s_on_after_load_callback) if (s_on_after_load_callback)
s_on_after_load_callback(); s_on_after_load_callback();
},
true);
g_loadDepth--; s_load_or_save_in_progress = false;
});
} }
void SetOnAfterLoadCallback(AfterLoadCallbackFunc callback) void SetOnAfterLoadCallback(AfterLoadCallbackFunc callback)

View File

@ -100,10 +100,13 @@ void HacksWidget::CreateWidgets()
m_disable_bounding_box = m_disable_bounding_box =
new GraphicsBool(tr("Disable Bounding Box"), Config::GFX_HACK_BBOX_ENABLE, true); new GraphicsBool(tr("Disable Bounding Box"), Config::GFX_HACK_BBOX_ENABLE, true);
m_vertex_rounding = new GraphicsBool(tr("Vertex Rounding"), Config::GFX_HACK_VERTEX_ROUDING); m_vertex_rounding = new GraphicsBool(tr("Vertex Rounding"), Config::GFX_HACK_VERTEX_ROUDING);
m_save_texture_cache_state =
new GraphicsBool(tr("Save Texture Cache to State"), Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE);
other_layout->addWidget(m_fast_depth_calculation, 0, 0); other_layout->addWidget(m_fast_depth_calculation, 0, 0);
other_layout->addWidget(m_disable_bounding_box, 0, 1); other_layout->addWidget(m_disable_bounding_box, 0, 1);
other_layout->addWidget(m_vertex_rounding, 1, 0); other_layout->addWidget(m_vertex_rounding, 1, 0);
other_layout->addWidget(m_save_texture_cache_state, 1, 1);
main_layout->addWidget(efb_box); main_layout->addWidget(efb_box);
main_layout->addWidget(texture_cache_box); main_layout->addWidget(texture_cache_box);
@ -244,6 +247,10 @@ void HacksWidget::AddDescriptions()
static const char TR_DISABLE_BOUNDINGBOX_DESCRIPTION[] = static const char TR_DISABLE_BOUNDINGBOX_DESCRIPTION[] =
QT_TR_NOOP("Disables bounding box emulation.\n\nThis may improve GPU performance " QT_TR_NOOP("Disables bounding box emulation.\n\nThis may improve GPU performance "
"significantly, but some games will break.\n\nIf unsure, leave this checked."); "significantly, but some games will break.\n\nIf unsure, leave this checked.");
static const char TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION[] = QT_TR_NOOP(
"Includes the contents of the embedded frame buffer (EFB) and upscaled EFB copies "
"in save states. Fixes missing and/or non-upscaled textures/objects when loading "
"states at the cost of additional save/load time.\n\nIf unsure, leave this checked.");
static const char TR_VERTEX_ROUNDING_DESCRIPTION[] = static const char TR_VERTEX_ROUNDING_DESCRIPTION[] =
QT_TR_NOOP("Rounds 2D vertices to whole pixels.\n\nFixes graphical problems in some games at " QT_TR_NOOP("Rounds 2D vertices to whole pixels.\n\nFixes graphical problems in some games at "
"higher internal resolutions. This setting has no effect when native internal " "higher internal resolutions. This setting has no effect when native internal "
@ -259,6 +266,7 @@ void HacksWidget::AddDescriptions()
AddDescription(m_gpu_texture_decoding, TR_GPU_DECODING_DESCRIPTION); AddDescription(m_gpu_texture_decoding, TR_GPU_DECODING_DESCRIPTION);
AddDescription(m_fast_depth_calculation, TR_FAST_DEPTH_CALC_DESCRIPTION); AddDescription(m_fast_depth_calculation, TR_FAST_DEPTH_CALC_DESCRIPTION);
AddDescription(m_disable_bounding_box, TR_DISABLE_BOUNDINGBOX_DESCRIPTION); AddDescription(m_disable_bounding_box, TR_DISABLE_BOUNDINGBOX_DESCRIPTION);
AddDescription(m_save_texture_cache_state, TR_SAVE_TEXTURE_CACHE_TO_STATE_DESCRIPTION);
AddDescription(m_vertex_rounding, TR_VERTEX_ROUNDING_DESCRIPTION); AddDescription(m_vertex_rounding, TR_VERTEX_ROUNDING_DESCRIPTION);
} }

View File

@ -42,6 +42,7 @@ private:
QCheckBox* m_fast_depth_calculation; QCheckBox* m_fast_depth_calculation;
QCheckBox* m_disable_bounding_box; QCheckBox* m_disable_bounding_box;
QCheckBox* m_vertex_rounding; QCheckBox* m_vertex_rounding;
QCheckBox* m_save_texture_cache_state;
QCheckBox* m_defer_efb_copies; QCheckBox* m_defer_efb_copies;
void CreateWidgets(); void CreateWidgets();

View File

@ -11,6 +11,7 @@
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoState.h"
AsyncRequests AsyncRequests::s_singleton; AsyncRequests AsyncRequests::s_singleton;
@ -154,6 +155,10 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
case Event::PERF_QUERY: case Event::PERF_QUERY:
g_perf_query->FlushResults(); g_perf_query->FlushResults();
break; break;
case Event::DO_SAVE_STATE:
VideoCommon_DoState(*e.do_save_state.p);
break;
} }
} }

View File

@ -13,6 +13,7 @@
#include "Common/Flag.h" #include "Common/Flag.h"
struct EfbPokeData; struct EfbPokeData;
class PointerWrap;
class AsyncRequests class AsyncRequests
{ {
@ -28,6 +29,7 @@ public:
SWAP_EVENT, SWAP_EVENT,
BBOX_READ, BBOX_READ,
PERF_QUERY, PERF_QUERY,
DO_SAVE_STATE,
} type; } type;
u64 time; u64 time;
@ -64,6 +66,11 @@ public:
struct struct
{ {
} perf_query; } perf_query;
struct
{
PointerWrap* p;
} do_save_state;
}; };
}; };

View File

@ -68,9 +68,6 @@ static void BPWritten(const BPCmd& bp)
---------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------------------
*/ */
// check for invalid state, else unneeded configuration are built
g_video_backend->CheckInvalidState();
if (((s32*)&bpmem)[bp.address] == bp.newvalue) if (((s32*)&bpmem)[bp.address] == bp.newvalue)
{ {
if (!(bp.address == BPMEM_TRIGGER_EFB_COPY || bp.address == BPMEM_CLEARBBOX1 || if (!(bp.address == BPMEM_TRIGGER_EFB_COPY || bp.address == BPMEM_CLEARBBOX1 ||

View File

@ -299,14 +299,15 @@ void RunGpuLoop()
[] { [] {
const SConfig& param = SConfig::GetInstance(); const SConfig& param = SConfig::GetInstance();
// Run events from the CPU thread.
AsyncRequests::GetInstance()->PullEvents();
// Do nothing while paused // Do nothing while paused
if (!s_emu_running_state.IsSet()) if (!s_emu_running_state.IsSet())
return; return;
if (s_use_deterministic_gpu_thread) if (s_use_deterministic_gpu_thread)
{ {
AsyncRequests::GetInstance()->PullEvents();
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder. // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
u8* seen_ptr = s_video_buffer_seen_ptr; u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr; u8* write_ptr = s_video_buffer_write_ptr;
@ -321,9 +322,6 @@ void RunGpuLoop()
else else
{ {
CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo; CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
AsyncRequests::GetInstance()->PullEvents();
CommandProcessor::SetCPStatusFromGPU(); CommandProcessor::SetCPStatusFromGPU();
// check if we are able to run this buffer // check if we are able to run this buffer

View File

@ -7,8 +7,10 @@
#include "VideoCommon/FramebufferShaderGen.h" #include "VideoCommon/FramebufferShaderGen.h"
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
#include "Common/ChunkFile.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "Core/Config/GraphicsSettings.h"
#include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractFramebuffer.h"
#include "VideoCommon/AbstractPipeline.h" #include "VideoCommon/AbstractPipeline.h"
#include "VideoCommon/AbstractShader.h" #include "VideoCommon/AbstractShader.h"
@ -464,6 +466,20 @@ bool FramebufferManager::CompileReadbackPipelines()
return false; return false;
} }
// EFB restore pipeline
auto restore_shader = g_renderer->CreateShaderFromSource(
ShaderStage::Pixel, FramebufferShaderGen::GenerateEFBRestorePixelShader());
if (!restore_shader)
return false;
config.framebuffer_state = GetEFBFramebufferState();
config.framebuffer_state.per_sample_shading = false;
config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader();
config.pixel_shader = restore_shader.get();
m_efb_restore_pipeline = g_renderer->CreatePipeline(config);
if (!m_efb_restore_pipeline)
return false;
return true; return true;
} }
@ -842,3 +858,107 @@ void FramebufferManager::DestroyPokePipelines()
m_color_poke_pipeline.reset(); m_color_poke_pipeline.reset();
m_poke_vertex_format.reset(); m_poke_vertex_format.reset();
} }
void FramebufferManager::DoState(PointerWrap& p)
{
FlushEFBPokes();
bool save_efb_state = Config::Get(Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE);
p.Do(save_efb_state);
if (!save_efb_state)
return;
if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE)
DoSaveState(p);
else
DoLoadState(p);
}
void FramebufferManager::DoSaveState(PointerWrap& p)
{
// For multisampling, we need to resolve first before we can save.
// This won't be bit-exact when loading, which could cause interesting rendering side-effects for
// a frame. But whatever, MSAA doesn't exactly behave that well anyway.
AbstractTexture* color_texture = ResolveEFBColorTexture(m_efb_color_texture->GetRect());
AbstractTexture* depth_texture = ResolveEFBDepthTexture(m_efb_depth_texture->GetRect());
// We don't want to save these as rendertarget textures, just the data itself when deserializing.
const TextureConfig color_texture_config(color_texture->GetWidth(), color_texture->GetHeight(),
color_texture->GetLevels(), color_texture->GetLayers(),
1, GetEFBColorFormat(), 0);
g_texture_cache->SerializeTexture(color_texture, color_texture_config, p);
if (GetEFBDepthFormat() == AbstractTextureFormat::D32F)
{
const TextureConfig depth_texture_config(
depth_texture->GetWidth(), depth_texture->GetHeight(), depth_texture->GetLevels(),
depth_texture->GetLayers(), 1,
AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), 0);
g_texture_cache->SerializeTexture(depth_texture, depth_texture_config, p);
}
else
{
// If the EFB is backed by a D24S8 texture, we first have to convert it to R32F.
const TextureConfig temp_texture_config(depth_texture->GetWidth(), depth_texture->GetHeight(),
depth_texture->GetLevels(), depth_texture->GetLayers(),
1, AbstractTextureFormat::R32F,
AbstractTextureFlag_RenderTarget);
std::unique_ptr<AbstractTexture> temp_texture = g_renderer->CreateTexture(temp_texture_config);
std::unique_ptr<AbstractFramebuffer> temp_fb =
g_renderer->CreateFramebuffer(temp_texture.get(), nullptr);
if (temp_texture && temp_fb)
{
g_renderer->ScaleTexture(temp_fb.get(), temp_texture->GetRect(), depth_texture,
depth_texture->GetRect());
const TextureConfig depth_texture_config(
depth_texture->GetWidth(), depth_texture->GetHeight(), depth_texture->GetLevels(),
depth_texture->GetLayers(), 1, temp_texture->GetFormat(), 0);
g_texture_cache->SerializeTexture(depth_texture, depth_texture_config, p);
}
else
{
PanicAlert("Failed to create temp texture for depth saving");
g_texture_cache->SerializeTexture(color_texture, color_texture_config, p);
}
}
}
void FramebufferManager::DoLoadState(PointerWrap& p)
{
// Invalidate any peek cache tiles.
InvalidatePeekCache(true);
// Deserialize the color and depth textures. This could fail.
auto color_tex = g_texture_cache->DeserializeTexture(p);
auto depth_tex = g_texture_cache->DeserializeTexture(p);
// If the stereo mode is different in the save state, throw it away.
if (!color_tex || !depth_tex ||
color_tex->texture->GetLayers() != m_efb_color_texture->GetLayers())
{
WARN_LOG(VIDEO, "Failed to deserialize EFB contents. Clearing instead.");
g_renderer->SetAndClearFramebuffer(
m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}},
g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : 0.0f);
return;
}
// Size differences are okay here, since the linear filtering will downscale/upscale it.
// Depth buffer is always point sampled, since we don't want to interpolate depth values.
const bool rescale = color_tex->texture->GetWidth() != m_efb_color_texture->GetWidth() ||
color_tex->texture->GetHeight() != m_efb_color_texture->GetHeight();
// Draw the deserialized textures over the EFB.
g_renderer->BeginUtilityDrawing();
g_renderer->SetAndDiscardFramebuffer(m_efb_framebuffer.get());
g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect());
g_renderer->SetPipeline(m_efb_restore_pipeline.get());
g_renderer->SetTexture(0, color_tex->texture.get());
g_renderer->SetTexture(1, depth_tex->texture.get());
g_renderer->SetSamplerState(0, rescale ? RenderState::GetLinearSamplerState() :
RenderState::GetPointSamplerState());
g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState());
g_renderer->Draw(0, 3);
g_renderer->EndUtilityDrawing();
}

View File

@ -17,6 +17,7 @@
#include "VideoCommon/TextureConfig.h" #include "VideoCommon/TextureConfig.h"
class NativeVertexFormat; class NativeVertexFormat;
class PointerWrap;
enum class EFBReinterpretType enum class EFBReinterpretType
{ {
@ -95,6 +96,9 @@ public:
void PokeEFBDepth(u32 x, u32 y, float depth); void PokeEFBDepth(u32 x, u32 y, float depth);
void FlushEFBPokes(); void FlushEFBPokes();
// Save state load/save.
void DoState(PointerWrap& p);
protected: protected:
struct EFBPokeVertex struct EFBPokeVertex
{ {
@ -145,6 +149,9 @@ protected:
void DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count, void DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count,
const AbstractPipeline* pipeline); const AbstractPipeline* pipeline);
void DoLoadState(PointerWrap& p);
void DoSaveState(PointerWrap& p);
std::unique_ptr<AbstractTexture> m_efb_color_texture; std::unique_ptr<AbstractTexture> m_efb_color_texture;
std::unique_ptr<AbstractTexture> m_efb_convert_color_texture; std::unique_ptr<AbstractTexture> m_efb_convert_color_texture;
std::unique_ptr<AbstractTexture> m_efb_depth_texture; std::unique_ptr<AbstractTexture> m_efb_depth_texture;
@ -156,6 +163,9 @@ protected:
std::unique_ptr<AbstractFramebuffer> m_efb_depth_resolve_framebuffer; std::unique_ptr<AbstractFramebuffer> m_efb_depth_resolve_framebuffer;
std::unique_ptr<AbstractPipeline> m_efb_depth_resolve_pipeline; std::unique_ptr<AbstractPipeline> m_efb_depth_resolve_pipeline;
// Pipeline for restoring the contents of the EFB from a save state
std::unique_ptr<AbstractPipeline> m_efb_restore_pipeline;
// Format conversion shaders // Format conversion shaders
std::array<std::unique_ptr<AbstractPipeline>, 6> m_format_conversion_pipelines; std::array<std::unique_ptr<AbstractPipeline>, 6> m_format_conversion_pipelines;

View File

@ -644,4 +644,24 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
return ss.str(); return ss.str();
} }
std::string GenerateEFBRestorePixelShader()
{
std::stringstream ss;
EmitSamplerDeclarations(ss, 0, 2, false);
EmitPixelMainDeclaration(ss, 1, 0, "float4",
GetAPIType() == APIType::D3D ? "out float depth : SV_Depth, " : "");
ss << "{\n";
ss << " float3 coords = float3(v_tex0.x, "
<< (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin ? "1.0 - " : "")
<< "v_tex0.y, v_tex0.z);\n";
ss << " ocol0 = ";
EmitSampleTexture(ss, 0, "coords");
ss << ";\n";
ss << " " << (GetAPIType() == APIType::D3D ? "depth" : "gl_FragDepth") << " = ";
EmitSampleTexture(ss, 1, "coords");
ss << ".r;\n";
ss << "}\n";
return ss.str();
}
} // namespace FramebufferShaderGen } // namespace FramebufferShaderGen

View File

@ -30,5 +30,6 @@ std::string GenerateEFBPokeVertexShader();
std::string GenerateColorPixelShader(); std::string GenerateColorPixelShader();
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples); std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples);
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format); std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format);
std::string GenerateEFBRestorePixelShader();
} // namespace FramebufferShaderGen } // namespace FramebufferShaderGen

View File

@ -25,6 +25,7 @@
#include <imgui.h> #include <imgui.h>
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Config/Config.h" #include "Common/Config/Config.h"
#include "Common/Event.h" #include "Common/Event.h"
@ -1324,8 +1325,11 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
} }
// Update our last xfb values // Update our last xfb values
m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width; m_last_xfb_addr = xfb_addr;
m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height; m_last_xfb_ticks = ticks;
m_last_xfb_width = fb_width;
m_last_xfb_stride = fb_stride;
m_last_xfb_height = fb_height;
} }
else else
{ {
@ -1681,6 +1685,27 @@ bool Renderer::UseVertexDepthRange() const
return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f; return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f;
} }
void Renderer::DoState(PointerWrap& p)
{
p.Do(m_aspect_wide);
p.Do(m_frame_count);
p.Do(m_prev_efb_format);
p.Do(m_last_xfb_ticks);
p.Do(m_last_xfb_addr);
p.Do(m_last_xfb_width);
p.Do(m_last_xfb_stride);
p.Do(m_last_xfb_height);
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Force the next xfb to be displayed.
m_last_xfb_id = std::numeric_limits<u64>::max();
// And actually display it.
Swap(m_last_xfb_addr, m_last_xfb_width, m_last_xfb_stride, m_last_xfb_height, m_last_xfb_ticks);
}
}
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler() std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{ {
return std::make_unique<VideoCommon::AsyncShaderCompiler>(); return std::make_unique<VideoCommon::AsyncShaderCompiler>();

View File

@ -41,6 +41,7 @@ class AbstractTexture;
class AbstractStagingTexture; class AbstractStagingTexture;
class NativeVertexFormat; class NativeVertexFormat;
class NetPlayChatUI; class NetPlayChatUI;
class PointerWrap;
struct TextureConfig; struct TextureConfig;
struct ComputePipelineConfig; struct ComputePipelineConfig;
struct AbstractPipelineConfig; struct AbstractPipelineConfig;
@ -237,6 +238,7 @@ public:
void ChangeSurface(void* new_surface_handle); void ChangeSurface(void* new_surface_handle);
void ResizeSurface(); void ResizeSurface();
bool UseVertexDepthRange() const; bool UseVertexDepthRange() const;
void DoState(PointerWrap& p);
virtual std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler(); virtual std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler();
@ -356,9 +358,10 @@ private:
// Tracking of XFB textures so we don't render duplicate frames. // Tracking of XFB textures so we don't render duplicate frames.
u64 m_last_xfb_id = std::numeric_limits<u64>::max(); u64 m_last_xfb_id = std::numeric_limits<u64>::max();
u64 m_last_xfb_ticks = 0;
// Note: Only used for auto-ir u32 m_last_xfb_addr = 0;
u32 m_last_xfb_width = 0; u32 m_last_xfb_width = 0;
u32 m_last_xfb_stride = 0;
u32 m_last_xfb_height = 0; u32 m_last_xfb_height = 0;
// NOTE: The methods below are called on the framedumping thread. // NOTE: The methods below are called on the framedumping thread.

View File

@ -15,6 +15,7 @@
#include "Common/Align.h" #include "Common/Align.h"
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/FileUtil.h" #include "Common/FileUtil.h"
#include "Common/Hash.h" #include "Common/Hash.h"
@ -23,6 +24,7 @@
#include "Common/MemoryUtil.h" #include "Common/MemoryUtil.h"
#include "Common/StringUtil.h" #include "Common/StringUtil.h"
#include "Core/Config/GraphicsSettings.h"
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "Core/FifoPlayer/FifoPlayer.h" #include "Core/FifoPlayer/FifoPlayer.h"
#include "Core/FifoPlayer/FifoRecorder.h" #include "Core/FifoPlayer/FifoRecorder.h"
@ -404,6 +406,329 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e
config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer))); config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer)));
} }
bool TextureCacheBase::CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format)
{
if (m_readback_texture && m_readback_texture->GetConfig().width >= width &&
m_readback_texture->GetConfig().height >= height &&
m_readback_texture->GetConfig().format == format)
{
return true;
}
TextureConfig staging_config(std::max(width, 128u), std::max(height, 128u), 1, 1, 1, format, 0);
m_readback_texture.reset();
m_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, staging_config);
return m_readback_texture != nullptr;
}
void TextureCacheBase::SerializeTexture(AbstractTexture* tex, const TextureConfig& config,
PointerWrap& p)
{
// If we're in measure mode, skip the actual readback to save some time.
const bool skip_readback = p.GetMode() == PointerWrap::MODE_MEASURE;
p.DoPOD(config);
std::vector<u8> texture_data;
if (skip_readback || CheckReadbackTexture(config.width, config.height, config.format))
{
// Save out each layer of the texture to the staging texture, and then
// append it onto the end of the vector. This gives us all the sub-images
// in one single buffer which can be written out to the save state.
for (u32 layer = 0; layer < config.layers; layer++)
{
for (u32 level = 0; level < config.levels; level++)
{
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
auto rect = tex->GetConfig().GetMipRect(level);
if (!skip_readback)
m_readback_texture->CopyFromTexture(tex, rect, layer, level, rect);
size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width);
size_t size = stride * level_height;
size_t start = texture_data.size();
texture_data.resize(texture_data.size() + size);
if (!skip_readback)
m_readback_texture->ReadTexels(rect, &texture_data[start], static_cast<u32>(stride));
}
}
}
else
{
PanicAlert("Failed to create staging texture for serialization");
}
p.Do(texture_data);
}
std::optional<TextureCacheBase::TexPoolEntry> TextureCacheBase::DeserializeTexture(PointerWrap& p)
{
TextureConfig config;
p.Do(config);
std::vector<u8> texture_data;
p.Do(texture_data);
if (p.GetMode() != PointerWrap::MODE_READ || texture_data.empty())
return std::nullopt;
auto tex = AllocateTexture(config);
if (!tex)
{
PanicAlert("Failed to create texture for deserialization");
return std::nullopt;
}
size_t start = 0;
for (u32 layer = 0; layer < config.layers; layer++)
{
for (u32 level = 0; level < config.levels; level++)
{
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width);
size_t size = stride * level_height;
if ((start + size) > texture_data.size())
{
ERROR_LOG(VIDEO, "Insufficient texture data for layer %u level %u", layer, level);
return tex;
}
tex->texture->Load(level, level_width, level_height, level_width, &texture_data[start], size);
start += size;
}
}
return tex;
}
void TextureCacheBase::DoState(PointerWrap& p)
{
// Flush all pending XFB copies before either loading or saving.
FlushEFBCopies();
p.Do(last_entry_id);
if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE)
DoSaveState(p);
else
DoLoadState(p);
}
void TextureCacheBase::DoSaveState(PointerWrap& p)
{
std::map<const TCacheEntry*, u32> entry_map;
std::vector<TCacheEntry*> entries_to_save;
auto ShouldSaveEntry = [](const TCacheEntry* entry) {
// We skip non-copies as they can be decoded from RAM when the state is loaded.
// Storing them would duplicate data in the save state file, adding to decompression time.
return entry->IsCopy();
};
auto AddCacheEntryToMap = [&entry_map, &entries_to_save, &p](TCacheEntry* entry) -> u32 {
auto iter = entry_map.find(entry);
if (iter != entry_map.end())
return iter->second;
// Since we are sequentially allocating texture entries, we need to save the textures in the
// same order they were collected. This is because of iterating both the address and hash maps.
// Therefore, the map is used for fast lookup, and the vector for ordering.
u32 id = static_cast<u32>(entry_map.size());
entry_map.emplace(entry, id);
entries_to_save.push_back(entry);
return id;
};
auto GetCacheEntryId = [&entry_map](const TCacheEntry* entry) -> std::optional<u32> {
auto iter = entry_map.find(entry);
return iter != entry_map.end() ? std::make_optional(iter->second) : std::nullopt;
};
// Transform the textures_by_address and textures_by_hash maps to a mapping
// of address/hash to entry ID.
std::vector<std::pair<u32, u32>> textures_by_address_list;
std::vector<std::pair<u64, u32>> textures_by_hash_list;
if (Config::Get(Config::GFX_SAVE_TEXTURE_CACHE_TO_STATE))
{
for (const auto& it : textures_by_address)
{
if (ShouldSaveEntry(it.second))
{
u32 id = AddCacheEntryToMap(it.second);
textures_by_address_list.push_back(std::make_pair(it.first, id));
}
}
for (const auto& it : textures_by_hash)
{
if (ShouldSaveEntry(it.second))
{
u32 id = AddCacheEntryToMap(it.second);
textures_by_hash_list.push_back(std::make_pair(it.first, id));
}
}
}
// Save the texture cache entries out in the order the were referenced.
u32 size = static_cast<u32>(entries_to_save.size());
p.Do(size);
for (TCacheEntry* entry : entries_to_save)
{
g_texture_cache->SerializeTexture(entry->texture.get(), entry->texture->GetConfig(), p);
entry->DoState(p);
}
p.DoMarker("TextureCacheEntries");
// Save references for each cache entry.
// As references are circular, we need to have everything created before linking entries.
std::set<std::pair<u32, u32>> reference_pairs;
for (const auto& it : entry_map)
{
const TCacheEntry* entry = it.first;
auto id1 = GetCacheEntryId(entry);
if (!id1)
continue;
for (const TCacheEntry* referenced_entry : entry->references)
{
auto id2 = GetCacheEntryId(referenced_entry);
if (!id2)
continue;
auto refpair1 = std::make_pair(*id1, *id2);
auto refpair2 = std::make_pair(*id2, *id1);
if (reference_pairs.count(refpair1) == 0 && reference_pairs.count(refpair2) == 0)
reference_pairs.insert(refpair1);
}
}
size = static_cast<u32>(reference_pairs.size());
p.Do(size);
for (const auto& it : reference_pairs)
{
p.Do(it.first);
p.Do(it.second);
}
size = static_cast<u32>(textures_by_address_list.size());
p.Do(size);
for (const auto& it : textures_by_address_list)
{
p.Do(it.first);
p.Do(it.second);
}
size = static_cast<u32>(textures_by_hash_list.size());
p.Do(size);
for (const auto& it : textures_by_hash_list)
{
p.Do(it.first);
p.Do(it.second);
}
// Free the readback texture to potentially save host-mapped GPU memory, depending on where
// the driver mapped the staging buffer.
m_readback_texture.reset();
}
void TextureCacheBase::DoLoadState(PointerWrap& p)
{
// Helper for getting a cache entry from an ID.
std::map<u32, TCacheEntry*> id_map;
auto GetEntry = [&id_map](u32 id) {
auto iter = id_map.find(id);
return iter == id_map.end() ? nullptr : iter->second;
};
// Only clear out state when actually restoring/loading.
// Since we throw away entries when not in loading mode now, we don't need to check
// before inserting entries into the cache, as GetEntry will always return null.
const bool commit_state = p.GetMode() == PointerWrap::MODE_READ;
if (commit_state)
Invalidate();
// Preload all cache entries.
u32 size = 0;
p.Do(size);
for (u32 i = 0; i < size; i++)
{
// Even if the texture isn't valid, we still need to create the cache entry object
// to update the point in the state state. We'll just throw it away if it's invalid.
auto tex = g_texture_cache->DeserializeTexture(p);
TCacheEntry* entry = new TCacheEntry(std::move(tex->texture), std::move(tex->framebuffer));
entry->textures_by_hash_iter = g_texture_cache->textures_by_hash.end();
entry->DoState(p);
if (entry->texture && commit_state)
id_map.emplace(i, entry);
else
delete entry;
}
p.DoMarker("TextureCacheEntries");
// Link all cache entry references.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u32 id1 = 0, id2 = 0;
p.Do(id1);
p.Do(id2);
TCacheEntry* e1 = GetEntry(id1);
TCacheEntry* e2 = GetEntry(id2);
if (e1 && e2)
e1->CreateReference(e2);
}
// Fill in address map.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u32 addr = 0;
u32 id = 0;
p.Do(addr);
p.Do(id);
TCacheEntry* entry = GetEntry(id);
if (entry)
textures_by_address.emplace(addr, entry);
}
// Fill in hash map.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u64 hash = 0;
u32 id = 0;
p.Do(hash);
p.Do(id);
TCacheEntry* entry = GetEntry(id);
if (entry)
entry->textures_by_hash_iter = textures_by_hash.emplace(hash, entry);
}
}
void TextureCacheBase::TCacheEntry::DoState(PointerWrap& p)
{
p.Do(addr);
p.Do(size_in_bytes);
p.Do(base_hash);
p.Do(hash);
p.Do(format);
p.Do(memory_stride);
p.Do(is_efb_copy);
p.Do(is_custom_tex);
p.Do(may_have_overlapping_textures);
p.Do(tmem_only);
p.Do(has_arbitrary_mips);
p.Do(should_force_safe_hashing);
p.Do(is_xfb_copy);
p.Do(is_xfb_container);
p.Do(id);
p.Do(reference_changed);
p.Do(native_width);
p.Do(native_height);
p.Do(native_levels);
p.Do(frameCount);
}
TextureCacheBase::TCacheEntry* TextureCacheBase::TCacheEntry*
TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt) TLUTFormat tlutfmt)

View File

@ -24,6 +24,7 @@
class AbstractFramebuffer; class AbstractFramebuffer;
class AbstractStagingTexture; class AbstractStagingTexture;
class PointerWrap;
struct VideoConfig; struct VideoConfig;
struct TextureAndTLUTFormat struct TextureAndTLUTFormat
@ -185,6 +186,17 @@ public:
u32 GetNumLevels() const { return texture->GetConfig().levels; } u32 GetNumLevels() const { return texture->GetConfig().levels; }
u32 GetNumLayers() const { return texture->GetConfig().layers; } u32 GetNumLayers() const { return texture->GetConfig().layers; }
AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; } AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; }
void DoState(PointerWrap& p);
};
// Minimal version of TCacheEntry just for TexPool
struct TexPoolEntry
{
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
int frameCount = FRAMECOUNT_INVALID;
TexPoolEntry(std::unique_ptr<AbstractTexture> tex, std::unique_ptr<AbstractFramebuffer> fb);
}; };
TextureCacheBase(); TextureCacheBase();
@ -224,6 +236,13 @@ public:
// Flushes all pending EFB copies to emulated RAM. // Flushes all pending EFB copies to emulated RAM.
void FlushEFBCopies(); void FlushEFBCopies();
// Texture Serialization
void SerializeTexture(AbstractTexture* tex, const TextureConfig& config, PointerWrap& p);
std::optional<TexPoolEntry> DeserializeTexture(PointerWrap& p);
// Save States
void DoState(PointerWrap& p);
// Returns false if the top/bottom row coefficients are zero. // Returns false if the top/bottom row coefficients are zero.
static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients);
@ -256,15 +275,6 @@ protected:
static std::bitset<8> valid_bind_points; static std::bitset<8> valid_bind_points;
private: private:
// Minimal version of TCacheEntry just for TexPool
struct TexPoolEntry
{
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
int frameCount = FRAMECOUNT_INVALID;
TexPoolEntry(std::unique_ptr<AbstractTexture> tex, std::unique_ptr<AbstractFramebuffer> fb);
};
using TexAddrCache = std::multimap<u32, TCacheEntry*>; using TexAddrCache = std::multimap<u32, TCacheEntry*>;
using TexHashCache = std::multimap<u64, TCacheEntry*>; using TexHashCache = std::multimap<u64, TCacheEntry*>;
using TexPool = std::unordered_multimap<TextureConfig, TexPoolEntry>; using TexPool = std::unordered_multimap<TextureConfig, TexPoolEntry>;
@ -319,6 +329,10 @@ private:
// Returns an EFB copy staging texture to the pool, so it can be re-used. // Returns an EFB copy staging texture to the pool, so it can be re-used.
void ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex); void ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex);
bool CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format);
void DoSaveState(PointerWrap& p);
void DoLoadState(PointerWrap& p);
TexAddrCache textures_by_address; TexAddrCache textures_by_address;
TexHashCache textures_by_hash; TexHashCache textures_by_hash;
TexPool texture_pool; TexPool texture_pool;
@ -354,6 +368,11 @@ private:
// List of pending EFB copies. It is important that the order is preserved for these, // List of pending EFB copies. It is important that the order is preserved for these,
// so that overlapping textures are written to guest RAM in the order they are issued. // so that overlapping textures are written to guest RAM in the order they are issued.
std::vector<TCacheEntry*> m_pending_efb_copies; std::vector<TCacheEntry*> m_pending_efb_copies;
// Staging texture used for readbacks.
// We store this in the class so that the same staging texture can be used for multiple
// readbacks, saving the overhead of allocating a new buffer every time.
std::unique_ptr<AbstractStagingTexture> m_readback_texture;
}; };
extern std::unique_ptr<TextureCacheBase> g_texture_cache; extern std::unique_ptr<TextureCacheBase> g_texture_cache;

View File

@ -338,9 +338,6 @@ void VertexManagerBase::Flush()
m_is_flushed = true; m_is_flushed = true;
// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%u, numchan=%u, dualtex=%u, ztex=%u, cole=%u, alpe=%u, ze=%u", PRIM_LOG("frame%d:\n texgen=%u, numchan=%u, dualtex=%u, ztex=%u, cole=%u, alpe=%u, ze=%u",
g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans, g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans,
@ -464,6 +461,16 @@ void VertexManagerBase::Flush()
void VertexManagerBase::DoState(PointerWrap& p) void VertexManagerBase::DoState(PointerWrap& p)
{ {
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Flush old vertex data before loading state.
Flush();
// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
}
p.Do(m_zslope); p.Do(m_zslope);
} }

View File

@ -40,6 +40,7 @@
#include "VideoCommon/RenderBase.h" #include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
@ -236,41 +237,22 @@ void VideoBackendBase::PopulateBackendInfo()
g_Config.Refresh(); g_Config.Refresh();
} }
// Run from the CPU thread
void VideoBackendBase::DoState(PointerWrap& p) void VideoBackendBase::DoState(PointerWrap& p)
{ {
bool software = false; if (!SConfig::GetInstance().bCPUThread)
p.Do(software);
if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{ {
// change mode to abort load of incompatible save state. VideoCommon_DoState(p);
p.SetMode(PointerWrap::MODE_VERIFY); return;
} }
VideoCommon_DoState(p); AsyncRequests::Event ev = {};
p.DoMarker("VideoCommon"); ev.do_save_state.p = &p;
ev.type = AsyncRequests::Event::DO_SAVE_STATE;
AsyncRequests::GetInstance()->PushEvent(ev, true);
// Refresh state. // Let the GPU thread sleep after loading the state, so we're not spinning if paused after loading
if (p.GetMode() == PointerWrap::MODE_READ) // a state. The next GP burst will wake it up again.
{ Fifo::GpuMaySleep();
m_invalid = true;
// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
}
}
void VideoBackendBase::CheckInvalidState()
{
if (m_invalid)
{
m_invalid = false;
BPReload();
g_texture_cache->Invalidate();
}
} }
void VideoBackendBase::InitializeShared() void VideoBackendBase::InitializeShared()
@ -282,8 +264,6 @@ void VideoBackendBase::InitializeShared()
// do not initialize again for the config window // do not initialize again for the config window
m_initialized = true; m_initialized = true;
m_invalid = false;
CommandProcessor::Init(); CommandProcessor::Init();
Fifo::Init(); Fifo::Init();
OpcodeDecoder::Init(); OpcodeDecoder::Init();

View File

@ -63,18 +63,14 @@ public:
// Called by the UI thread when the graphics config is opened. // Called by the UI thread when the graphics config is opened.
static void PopulateBackendInfo(); static void PopulateBackendInfo();
// the implementation needs not do synchronization logic, because calls to it are surrounded by // Wrapper function which pushes the event to the GPU thread.
// PauseAndLock now
void DoState(PointerWrap& p); void DoState(PointerWrap& p);
void CheckInvalidState();
protected: protected:
void InitializeShared(); void InitializeShared();
void ShutdownShared(); void ShutdownShared();
bool m_initialized = false; bool m_initialized = false;
bool m_invalid = false;
}; };
extern std::vector<std::unique_ptr<VideoBackendBase>> g_available_video_backends; extern std::vector<std::unique_ptr<VideoBackendBase>> g_available_video_backends;

View File

@ -10,9 +10,12 @@
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h" #include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h" #include "VideoCommon/Fifo.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureDecoder.h" #include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VertexShaderManager.h"
@ -21,6 +24,15 @@
void VideoCommon_DoState(PointerWrap& p) void VideoCommon_DoState(PointerWrap& p)
{ {
bool software = false;
p.Do(software);
if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{
// change mode to abort load of incompatible save state.
p.SetMode(PointerWrap::MODE_VERIFY);
}
// BP Memory // BP Memory
p.Do(bpmem); p.Do(bpmem);
p.DoMarker("BP Memory"); p.DoMarker("BP Memory");
@ -63,5 +75,19 @@ void VideoCommon_DoState(PointerWrap& p)
BoundingBox::DoState(p); BoundingBox::DoState(p);
p.DoMarker("BoundingBox"); p.DoMarker("BoundingBox");
// TODO: search for more data that should be saved and add it here g_framebuffer_manager->DoState(p);
p.DoMarker("FramebufferManager");
g_texture_cache->DoState(p);
p.DoMarker("TextureCache");
g_renderer->DoState(p);
p.DoMarker("Renderer");
// Refresh state.
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Inform backend of new state from registers.
BPReload();
}
} }