TextureCache: Support saving cache entries, including EFB copies

This commit is contained in:
Stenzek 2019-06-29 19:27:53 +10:00
parent b26bb0605b
commit 1082468133
10 changed files with 418 additions and 48 deletions

View File

@ -72,7 +72,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 110; // Last changed in PR 8036
static const u32 STATE_VERSION = 111; // Last changed in PR 6321
// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,

View File

@ -11,6 +11,7 @@
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VideoBackendBase.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoState.h"
AsyncRequests AsyncRequests::s_singleton;
@ -156,7 +157,7 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
break;
case Event::DO_SAVE_STATE:
g_video_backend->DoStateGPUThread(*e.do_save_state.p);
VideoCommon_DoState(*e.do_save_state.p);
break;
}
}

View File

@ -25,6 +25,7 @@
#include <imgui.h>
#include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/Config/Config.h"
#include "Common/Event.h"
@ -1324,8 +1325,11 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
}
// Update our last xfb values
m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width;
m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height;
m_last_xfb_addr = xfb_addr;
m_last_xfb_ticks = ticks;
m_last_xfb_width = fb_width;
m_last_xfb_stride = fb_stride;
m_last_xfb_height = fb_height;
}
else
{
@ -1681,6 +1685,27 @@ bool Renderer::UseVertexDepthRange() const
return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f;
}
void Renderer::DoState(PointerWrap& p)
{
p.Do(m_aspect_wide);
p.Do(m_frame_count);
p.Do(m_prev_efb_format);
p.Do(m_last_xfb_ticks);
p.Do(m_last_xfb_addr);
p.Do(m_last_xfb_width);
p.Do(m_last_xfb_stride);
p.Do(m_last_xfb_height);
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Force the next xfb to be displayed.
m_last_xfb_id = std::numeric_limits<u64>::max();
// And actually display it.
Swap(m_last_xfb_addr, m_last_xfb_width, m_last_xfb_stride, m_last_xfb_height, m_last_xfb_ticks);
}
}
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<VideoCommon::AsyncShaderCompiler>();

View File

@ -41,6 +41,7 @@ class AbstractTexture;
class AbstractStagingTexture;
class NativeVertexFormat;
class NetPlayChatUI;
class PointerWrap;
struct TextureConfig;
struct ComputePipelineConfig;
struct AbstractPipelineConfig;
@ -237,6 +238,7 @@ public:
void ChangeSurface(void* new_surface_handle);
void ResizeSurface();
bool UseVertexDepthRange() const;
void DoState(PointerWrap& p);
virtual std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler();
@ -356,9 +358,10 @@ private:
// Tracking of XFB textures so we don't render duplicate frames.
u64 m_last_xfb_id = std::numeric_limits<u64>::max();
// Note: Only used for auto-ir
u64 m_last_xfb_ticks = 0;
u32 m_last_xfb_addr = 0;
u32 m_last_xfb_width = 0;
u32 m_last_xfb_stride = 0;
u32 m_last_xfb_height = 0;
// NOTE: The methods below are called on the framedumping thread.

View File

@ -15,6 +15,7 @@
#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/Hash.h"
@ -404,6 +405,326 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e
config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer)));
}
bool TextureCacheBase::CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format)
{
if (m_readback_texture && m_readback_texture->GetConfig().width >= width &&
m_readback_texture->GetConfig().height >= height &&
m_readback_texture->GetConfig().format == format)
{
return true;
}
TextureConfig staging_config(std::max(width, 128u), std::max(height, 128u), 1, 1, 1, format, 0);
m_readback_texture.reset();
m_readback_texture =
g_renderer->CreateStagingTexture(StagingTextureType::Readback, staging_config);
return m_readback_texture != nullptr;
}
void TextureCacheBase::SerializeTexture(AbstractTexture* tex, const TextureConfig& config,
PointerWrap& p)
{
// If we're in measure mode, skip the actual readback to save some time.
const bool skip_readback = p.GetMode() == PointerWrap::MODE_MEASURE;
p.DoPOD(config);
std::vector<u8> texture_data;
if (skip_readback || CheckReadbackTexture(config.width, config.height, config.format))
{
// Save out each layer of the texture to the staging texture, and then
// append it onto the end of the vector. This gives us all the sub-images
// in one single buffer which can be written out to the save state.
for (u32 layer = 0; layer < config.layers; layer++)
{
for (u32 level = 0; level < config.levels; level++)
{
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
auto rect = tex->GetConfig().GetMipRect(level);
if (!skip_readback)
m_readback_texture->CopyFromTexture(tex, rect, layer, level, rect);
size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width);
size_t size = stride * level_height;
size_t start = texture_data.size();
texture_data.resize(texture_data.size() + size);
if (!skip_readback)
m_readback_texture->ReadTexels(rect, &texture_data[start], static_cast<u32>(stride));
}
}
}
else
{
PanicAlert("Failed to create staging texture for serialization");
}
p.Do(texture_data);
}
std::optional<TextureCacheBase::TexPoolEntry> TextureCacheBase::DeserializeTexture(PointerWrap& p)
{
TextureConfig config;
p.Do(config);
std::vector<u8> texture_data;
p.Do(texture_data);
if (p.GetMode() != PointerWrap::MODE_READ)
return std::nullopt;
auto tex = AllocateTexture(config);
if (!tex)
{
PanicAlert("Failed to create texture for deserialization");
return std::nullopt;
}
size_t start = 0;
for (u32 layer = 0; layer < config.layers; layer++)
{
for (u32 level = 0; level < config.levels; level++)
{
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
size_t stride = AbstractTexture::CalculateStrideForFormat(config.format, level_width);
size_t size = stride * level_height;
if ((start + size) > texture_data.size())
{
ERROR_LOG(VIDEO, "Insufficient texture data for layer %u level %u", layer, level);
return tex;
}
tex->texture->Load(level, level_width, level_height, level_width, &texture_data[start], size);
start += size;
}
}
return tex;
}
void TextureCacheBase::DoState(PointerWrap& p)
{
// Flush all pending XFB copies before either loading or saving.
FlushEFBCopies();
p.Do(last_entry_id);
if (p.GetMode() == PointerWrap::MODE_WRITE || p.GetMode() == PointerWrap::MODE_MEASURE)
DoSaveState(p);
else
DoLoadState(p);
}
void TextureCacheBase::DoSaveState(PointerWrap& p)
{
std::map<const TCacheEntry*, u32> entry_map;
std::vector<TCacheEntry*> entries_to_save;
auto ShouldSaveEntry = [](const TCacheEntry* entry) {
// We skip non-copies as they can be decoded from RAM when the state is loaded.
// Storing them would duplicate data in the save state file, adding to decompression time.
return entry->IsCopy();
};
auto AddCacheEntryToMap = [&entry_map, &entries_to_save, &p](TCacheEntry* entry) -> u32 {
auto iter = entry_map.find(entry);
if (iter != entry_map.end())
return iter->second;
// Since we are sequentially allocating texture entries, we need to save the textures in the
// same order they were collected. This is because of iterating both the address and hash maps.
// Therefore, the map is used for fast lookup, and the vector for ordering.
u32 id = static_cast<u32>(entry_map.size());
entry_map.emplace(entry, id);
entries_to_save.push_back(entry);
return id;
};
auto GetCacheEntryId = [&entry_map](const TCacheEntry* entry) -> std::optional<u32> {
auto iter = entry_map.find(entry);
return iter != entry_map.end() ? std::make_optional(iter->second) : std::nullopt;
};
// Transform the textures_by_address and textures_by_hash maps to a mapping
// of address/hash to entry ID.
std::vector<std::pair<u32, u32>> textures_by_address_list;
std::vector<std::pair<u64, u32>> textures_by_hash_list;
for (const auto& it : textures_by_address)
{
if (ShouldSaveEntry(it.second))
{
u32 id = AddCacheEntryToMap(it.second);
textures_by_address_list.push_back(std::make_pair(it.first, id));
}
}
for (const auto& it : textures_by_hash)
{
if (ShouldSaveEntry(it.second))
{
u32 id = AddCacheEntryToMap(it.second);
textures_by_hash_list.push_back(std::make_pair(it.first, id));
}
}
// Save the texture cache entries out in the order the were referenced.
u32 size = static_cast<u32>(entries_to_save.size());
p.Do(size);
for (TCacheEntry* entry : entries_to_save)
{
g_texture_cache->SerializeTexture(entry->texture.get(), entry->texture->GetConfig(), p);
entry->DoState(p);
}
p.DoMarker("TextureCacheEntries");
// Save references for each cache entry.
// As references are circular, we need to have everything created before linking entries.
std::set<std::pair<u32, u32>> reference_pairs;
for (const auto& it : entry_map)
{
const TCacheEntry* entry = it.first;
auto id1 = GetCacheEntryId(entry);
if (!id1)
continue;
for (const TCacheEntry* referenced_entry : entry->references)
{
auto id2 = GetCacheEntryId(referenced_entry);
if (!id2)
continue;
auto refpair1 = std::make_pair(*id1, *id2);
auto refpair2 = std::make_pair(*id2, *id1);
if (reference_pairs.count(refpair1) == 0 && reference_pairs.count(refpair2) == 0)
reference_pairs.insert(refpair1);
}
}
size = static_cast<u32>(reference_pairs.size());
p.Do(size);
for (const auto& it : reference_pairs)
{
p.Do(it.first);
p.Do(it.second);
}
size = static_cast<u32>(textures_by_address_list.size());
p.Do(size);
for (const auto& it : textures_by_address_list)
{
p.Do(it.first);
p.Do(it.second);
}
size = static_cast<u32>(textures_by_hash_list.size());
p.Do(size);
for (const auto& it : textures_by_hash_list)
{
p.Do(it.first);
p.Do(it.second);
}
// Free the readback texture to potentially save host-mapped GPU memory, depending on where
// the driver mapped the staging buffer.
m_readback_texture.reset();
}
void TextureCacheBase::DoLoadState(PointerWrap& p)
{
// Helper for getting a cache entry from an ID.
std::map<u32, TCacheEntry*> id_map;
auto GetEntry = [&id_map](u32 id) {
auto iter = id_map.find(id);
return iter == id_map.end() ? nullptr : iter->second;
};
// Only clear out state when actually restoring/loading.
// Since we throw away entries when not in loading mode now, we don't need to check
// before inserting entries into the cache, as GetEntry will always return null.
const bool commit_state = p.GetMode() == PointerWrap::MODE_READ;
if (commit_state)
Invalidate();
// Preload all cache entries.
u32 size = 0;
p.Do(size);
for (u32 i = 0; i < size; i++)
{
// Even if the texture isn't valid, we still need to create the cache entry object
// to update the point in the state state. We'll just throw it away if it's invalid.
auto tex = g_texture_cache->DeserializeTexture(p);
TCacheEntry* entry = new TCacheEntry(std::move(tex->texture), std::move(tex->framebuffer));
entry->textures_by_hash_iter = g_texture_cache->textures_by_hash.end();
entry->DoState(p);
if (entry->texture && commit_state)
id_map.emplace(i, entry);
else
delete entry;
}
p.DoMarker("TextureCacheEntries");
// Link all cache entry references.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u32 id1 = 0, id2 = 0;
p.Do(id1);
p.Do(id2);
TCacheEntry* e1 = GetEntry(id1);
TCacheEntry* e2 = GetEntry(id2);
if (e1 && e2)
e1->CreateReference(e2);
}
// Fill in address map.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u32 addr = 0;
u32 id = 0;
p.Do(addr);
p.Do(id);
TCacheEntry* entry = GetEntry(id);
if (entry)
textures_by_address.emplace(addr, entry);
}
// Fill in hash map.
p.Do(size);
for (u32 i = 0; i < size; i++)
{
u64 hash = 0;
u32 id = 0;
p.Do(hash);
p.Do(id);
TCacheEntry* entry = GetEntry(id);
if (entry)
entry->textures_by_hash_iter = textures_by_hash.emplace(hash, entry);
}
}
void TextureCacheBase::TCacheEntry::DoState(PointerWrap& p)
{
p.Do(addr);
p.Do(size_in_bytes);
p.Do(base_hash);
p.Do(hash);
p.Do(format);
p.Do(memory_stride);
p.Do(is_efb_copy);
p.Do(is_custom_tex);
p.Do(may_have_overlapping_textures);
p.Do(tmem_only);
p.Do(has_arbitrary_mips);
p.Do(should_force_safe_hashing);
p.Do(is_xfb_copy);
p.Do(is_xfb_container);
p.Do(id);
p.Do(reference_changed);
p.Do(native_width);
p.Do(native_height);
p.Do(native_levels);
p.Do(frameCount);
}
TextureCacheBase::TCacheEntry*
TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt)

View File

@ -24,6 +24,7 @@
class AbstractFramebuffer;
class AbstractStagingTexture;
class PointerWrap;
struct VideoConfig;
struct TextureAndTLUTFormat
@ -185,6 +186,17 @@ public:
u32 GetNumLevels() const { return texture->GetConfig().levels; }
u32 GetNumLayers() const { return texture->GetConfig().layers; }
AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; }
void DoState(PointerWrap& p);
};
// Minimal version of TCacheEntry just for TexPool
struct TexPoolEntry
{
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
int frameCount = FRAMECOUNT_INVALID;
TexPoolEntry(std::unique_ptr<AbstractTexture> tex, std::unique_ptr<AbstractFramebuffer> fb);
};
TextureCacheBase();
@ -224,6 +236,13 @@ public:
// Flushes all pending EFB copies to emulated RAM.
void FlushEFBCopies();
// Texture Serialization
void SerializeTexture(AbstractTexture* tex, const TextureConfig& config, PointerWrap& p);
std::optional<TexPoolEntry> DeserializeTexture(PointerWrap& p);
// Save States
void DoState(PointerWrap& p);
// Returns false if the top/bottom row coefficients are zero.
static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients);
@ -256,15 +275,6 @@ protected:
static std::bitset<8> valid_bind_points;
private:
// Minimal version of TCacheEntry just for TexPool
struct TexPoolEntry
{
std::unique_ptr<AbstractTexture> texture;
std::unique_ptr<AbstractFramebuffer> framebuffer;
int frameCount = FRAMECOUNT_INVALID;
TexPoolEntry(std::unique_ptr<AbstractTexture> tex, std::unique_ptr<AbstractFramebuffer> fb);
};
using TexAddrCache = std::multimap<u32, TCacheEntry*>;
using TexHashCache = std::multimap<u64, TCacheEntry*>;
using TexPool = std::unordered_multimap<TextureConfig, TexPoolEntry>;
@ -319,6 +329,10 @@ private:
// Returns an EFB copy staging texture to the pool, so it can be re-used.
void ReleaseEFBCopyStagingTexture(std::unique_ptr<AbstractStagingTexture> tex);
bool CheckReadbackTexture(u32 width, u32 height, AbstractTextureFormat format);
void DoSaveState(PointerWrap& p);
void DoLoadState(PointerWrap& p);
TexAddrCache textures_by_address;
TexHashCache textures_by_hash;
TexPool texture_pool;
@ -354,6 +368,11 @@ private:
// List of pending EFB copies. It is important that the order is preserved for these,
// so that overlapping textures are written to guest RAM in the order they are issued.
std::vector<TCacheEntry*> m_pending_efb_copies;
// Staging texture used for readbacks.
// We store this in the class so that the same staging texture can be used for multiple
// readbacks, saving the overhead of allocating a new buffer every time.
std::unique_ptr<AbstractStagingTexture> m_readback_texture;
};
extern std::unique_ptr<TextureCacheBase> g_texture_cache;

View File

@ -461,6 +461,16 @@ void VertexManagerBase::Flush()
void VertexManagerBase::DoState(PointerWrap& p)
{
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Flush old vertex data before loading state.
Flush();
// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
}
p.Do(m_zslope);
}

View File

@ -241,7 +241,7 @@ void VideoBackendBase::DoState(PointerWrap& p)
{
if (!SConfig::GetInstance().bCPUThread)
{
DoStateGPUThread(p);
VideoCommon_DoState(p);
return;
}
@ -255,34 +255,6 @@ void VideoBackendBase::DoState(PointerWrap& p)
Fifo::GpuMaySleep();
}
void VideoBackendBase::DoStateGPUThread(PointerWrap& p)
{
bool software = false;
p.Do(software);
if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{
// change mode to abort load of incompatible save state.
p.SetMode(PointerWrap::MODE_VERIFY);
}
VideoCommon_DoState(p);
p.DoMarker("VideoCommon");
// Refresh state.
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Inform backend of new state from registers.
g_vertex_manager->Flush();
g_texture_cache->Invalidate();
BPReload();
// Clear all caches that touch RAM
// (? these don't appear to touch any emulation state that gets saved. moved to on load only.)
VertexLoaderManager::MarkAllDirty();
}
}
void VideoBackendBase::InitializeShared()
{
memset(&g_main_cp_state, 0, sizeof(g_main_cp_state));

View File

@ -66,9 +66,6 @@ public:
// Wrapper function which pushes the event to the GPU thread.
void DoState(PointerWrap& p);
// Function which handles the real state load/save logic.
void DoStateGPUThread(PointerWrap& p);
protected:
void InitializeShared();
void ShutdownShared();

View File

@ -13,6 +13,8 @@
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
@ -21,6 +23,15 @@
void VideoCommon_DoState(PointerWrap& p)
{
bool software = false;
p.Do(software);
if (p.GetMode() == PointerWrap::MODE_READ && software == true)
{
// change mode to abort load of incompatible save state.
p.SetMode(PointerWrap::MODE_VERIFY);
}
// BP Memory
p.Do(bpmem);
p.DoMarker("BP Memory");
@ -63,5 +74,16 @@ void VideoCommon_DoState(PointerWrap& p)
BoundingBox::DoState(p);
p.DoMarker("BoundingBox");
// TODO: search for more data that should be saved and add it here
g_texture_cache->DoState(p);
p.DoMarker("TextureCache");
g_renderer->DoState(p);
p.DoMarker("Renderer");
// Refresh state.
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Inform backend of new state from registers.
BPReload();
}
}