From bf389b94d6d65c7985c98a1f0ac76a11c5ed7c44 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 19 Dec 2021 19:40:55 +1000 Subject: [PATCH] GS: Add texture preloading option --- pcsx2/Config.h | 3 +- pcsx2/GS/GS.cpp | 1 + pcsx2/GS/GSExtra.h | 3 + pcsx2/GS/GSState.cpp | 9 ++ pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 120 ++++++++++++++++++++++- pcsx2/GS/Renderers/HW/GSTextureCache.h | 9 +- pcsx2/GS/Window/GSwxDialog.cpp | 1 + pcsx2/Pcsx2Config.cpp | 1 + 8 files changed, 140 insertions(+), 7 deletions(-) diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 0540a226bb..a9cffb57b3 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -439,7 +439,8 @@ struct Pcsx2Config SaveRT : 1, SaveFrame : 1, SaveTexture : 1, - SaveDepth : 1; + SaveDepth : 1, + PreloadTexture : 1; }; }; diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index bcaf1bdde1..84fbee36a5 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -1303,6 +1303,7 @@ void GSApp::Init() m_default_configuration["paltex"] = "0"; m_default_configuration["png_compression_level"] = std::to_string(Z_BEST_SPEED); m_default_configuration["preload_frame_with_gs_data"] = "0"; + m_default_configuration["preload_texture"] = "0"; m_default_configuration["Renderer"] = std::to_string(static_cast(GSRendererType::Auto)); m_default_configuration["resx"] = "1024"; m_default_configuration["resy"] = "1024"; diff --git a/pcsx2/GS/GSExtra.h b/pcsx2/GS/GSExtra.h index 0899b9c10a..742b4ee555 100644 --- a/pcsx2/GS/GSExtra.h +++ b/pcsx2/GS/GSExtra.h @@ -110,6 +110,9 @@ static const GSVector2i default_rt_size(2048, 2048); static const GSVector2i default_rt_size(1280, 1024); #endif +// Maximum texture size to skip preload/hash path. +static constexpr int MAXIMUM_PRELOAD_TEXTURE_SIZE = 512; + // Helper path to dump texture extern const std::string root_sw; extern const std::string root_hw; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 06a30cc8d0..061b1203bb 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -2556,6 +2556,15 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR GSVector4i tr(0, 0, w, h); + // don't bother checking when preload is on, since we're going to test the whole thing anyway + if (GSConfig.PreloadTexture && GSConfig.UseHardwareRenderer() && + (GSConfig.GPUPaletteConversion || + (w <= MAXIMUM_PRELOAD_TEXTURE_SIZE && h <= MAXIMUM_PRELOAD_TEXTURE_SIZE))) + { + r = tr; + return; + } + const int wms = CLAMP.WMS; const int wmt = CLAMP.WMT; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index e41372917d..4ca5869dea 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -20,6 +20,11 @@ #include "GS/GSIntrin.h" #include "GS/GSUtil.h" +#define XXH_STATIC_LINKING_ONLY 1 +#define XXH_INLINE_ALL 1 +#include "xxhash.h" + +bool GSTextureCache::m_paltex = false; bool GSTextureCache::m_disable_partial_invalidation = false; bool GSTextureCache::m_wrap_gs_mem = false; @@ -1210,7 +1215,7 @@ void GSTextureCache::InvalidateVideoMemSubTarget(GSTextureCache::Target* rt) void GSTextureCache::IncAge() { - int maxage = m_src.m_used ? 3 : 30; + int maxage = GSConfig.PreloadTexture ? (m_src.m_used ? 30 : 60) : (m_src.m_used ? 3 : 6); // You can't use m_map[page] because Source* are duplicated on several pages. for (auto i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end();) @@ -1802,7 +1807,7 @@ GSTextureCache::Surface::~Surface() { // Shared textures are pointers copy. Therefore no allocation // to recycle. - if (!m_shared_texture) + if (!m_shared_texture && m_texture) g_gs_device->Recycle(m_texture); } @@ -1854,6 +1859,7 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR else { memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0)); + memset(m_layer_hash, 0, sizeof(m_layer_hash)); memset(m_valid, 0, sizeof(m_valid)); @@ -1887,8 +1893,14 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer) const GSVector2i& bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs; - int tw = std::max(1 << m_TEX0.TW, bs.x); - int th = std::max(1 << m_TEX0.TH, bs.y); + const int tw = 1 << m_TEX0.TW; + const int th = 1 << m_TEX0.TH; + const bool preload = (GSConfig.PreloadTexture && (GSConfig.GPUPaletteConversion || (tw <= MAXIMUM_PRELOAD_TEXTURE_SIZE && th <= MAXIMUM_PRELOAD_TEXTURE_SIZE))); + if (preload) + { + PreloadUpdate(tw, th, layer); + return; + } GSVector4i r = rect.ralign(bs); @@ -1961,7 +1973,6 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer) if (blocks > 0) { g_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_palette ? 2 : 0)); - Flush(m_write.count, layer); } } @@ -2088,6 +2099,105 @@ void GSTextureCache::Source::Flush(u32 count, int layer) m_write.count -= count; } +GSTextureCache::Source::HashType GSTextureCache::Source::HashTexture(u8* buff, u32 row_size, u32 pitch, u32 height) +{ + if (row_size == pitch) + { + // fast path since it's all packed + return XXH3_64bits(buff, row_size * height); + } + + // slow path where we have to process rows-at-a-time + XXH3_state_t st; + XXH3_64bits_reset(&st); + for (u32 row = 0; row < height; row++) + { + XXH3_64bits_update(&st, buff, row_size); + buff += pitch; + } + return XXH3_64bits_digest(&st); +} + +void GSTextureCache::Source::PreloadUpdate(int tw, int th, int layer) +{ + const GSVector2i& bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs; + const GSOffset& off = m_renderer->m_context->offset.tex; + const GSVector4i rect(0, 0, tw, th); + const GSVector4i block_rect(rect.ralign(bs)); + GSOffset::BNHelper bn = off.bnMulti(0, 0); + + // flag everything as valid + if (m_repeating) + { + for (int y = block_rect.top; y < block_rect.bottom; y += bs.y, bn.nextBlockY()) + { + for (int x = block_rect.left; x < block_rect.right; bn.nextBlockX(), x += bs.x) + { + const u32 i = static_cast((bn.blkY() << 7) + bn.blkX()); + u32 block = bn.valueNoWrap(); + + if (block < MAX_BLOCKS || m_wrap_gs_mem) + { + u32 addr = i % MAX_BLOCKS; + + u32 row = addr >> 5u; + u32 col = 1 << (addr & 31u); + m_valid[row] |= col; + } + } + } + } + else + { + for (int y = block_rect.top; y < block_rect.bottom; y += bs.y, bn.nextBlockY()) + { + for (int x = block_rect.left; x < block_rect.right; x += bs.x, bn.nextBlockX()) + { + u32 block = bn.valueNoWrap(); + + if (block < MAX_BLOCKS || m_wrap_gs_mem) + { + block %= MAX_BLOCKS; + + u32 row = block >> 5u; + u32 col = 1 << (block & 31u); + m_valid[row] |= col; + } + } + } + } + + if (layer == 0) + m_complete = true; + + // decode texture to temporary memory + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM]; + const int read_width = std::max(tw, psm.bs.x); + u32 pitch = static_cast(read_width) * sizeof(u32); + u32 row_size = static_cast(tw) * sizeof(u32); + GSLocalMemory::readTexture rtx = psm.rtx; + if (m_palette) + { + pitch >>= 2; + row_size >>= 2; + rtx = psm.rtxP; + } + + u8* buff = m_temp; + (m_renderer->m_mem.*rtx)(off, block_rect, buff, pitch, m_TEXA); + + // hash the texture + const HashType hash = HashTexture(buff, row_size, pitch, static_cast(th)); + const u8 layer_bit = static_cast(1) << layer; + if ((m_valid_hashes & layer_bit) && m_layer_hash[layer] == hash) + return; + + // reupload + m_valid_hashes |= layer_bit; + m_layer_hash[layer] = hash; + m_texture->Update(rect, buff, pitch, layer); +} + bool GSTextureCache::Source::ClutMatch(PaletteKey palette_key) { return PaletteKeyEqual()(palette_key, m_palette_obj->GetPaletteKey()); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index c37c9eb1d2..b5c3168ab3 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -106,6 +106,11 @@ public: u32 count; } m_write; + using HashType = u64; + + HashType HashTexture(u8* buff, u32 row_size, u32 pitch, u32 height); + void PreloadUpdate(int tw, int th, int layer); + void Write(const GSVector4i& r, int layer); void Flush(u32 count, int layer); @@ -114,6 +119,7 @@ public: GSTexture* m_palette; u32 m_valid[MAX_PAGES]; // each u32 bits map to the 32 blocks of that page GSVector4i m_valid_rect; + u8 m_valid_hashes = 0; bool m_target; bool m_complete; bool m_repeating; @@ -124,6 +130,7 @@ public: GSTexture* m_from_target; GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0 GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value + HashType m_layer_hash[7]; // Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase std::array m_erase_it; GSOffset::PageLooper m_pages; @@ -214,7 +221,7 @@ protected: PaletteMap m_palette_map; SourceMap m_src; FastList m_dst[2]; - bool m_paltex; + static bool m_paltex; bool m_preload_frame; u8* m_temp; bool m_can_convert_depth; diff --git a/pcsx2/GS/Window/GSwxDialog.cpp b/pcsx2/GS/Window/GSwxDialog.cpp index 8c0f6970c6..826a359989 100644 --- a/pcsx2/GS/Window/GSwxDialog.cpp +++ b/pcsx2/GS/Window/GSwxDialog.cpp @@ -285,6 +285,7 @@ RendererTab::RendererTab(wxWindow* parent) auto* paltex_prereq = m_ui.addCheckBox(hw_checks_box, "GPU Palette Conversion", "paltex", IDC_PALTEX, hw_prereq); auto aniso_prereq = [this, paltex_prereq]{ return m_is_hardware && paltex_prereq->GetValue() == false; }; + m_ui.addCheckBox(hw_checks_box, "Preload Textures", "preload_texture", -1, hw_prereq); auto* hw_choice_grid = new wxFlexGridSizer(2, space, space); diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 008e7b421a..c2e2023636 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -506,6 +506,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings() GSSettingBoolEx(SaveFrame, "savef"); GSSettingBoolEx(SaveTexture, "savet"); GSSettingBoolEx(SaveDepth, "savez"); + GSSettingBoolEx(PreloadTexture, "preload_texture"); GSSettingIntEnumEx(InterlaceMode, "interlace");