From e06f1f100204a60b28d34c380cc91a406b8e3e6a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 31 Dec 2023 19:40:10 +1000 Subject: [PATCH] GPU: Add hardware texture cache --- src/common/gsvector_neon.h | 4 + src/common/gsvector_nosimd.h | 4 + src/common/gsvector_sse.h | 4 + src/core/CMakeLists.txt | 4 +- src/core/core.vcxproj | 4 +- src/core/core.vcxproj.filters | 4 +- src/core/fullscreen_ui.cpp | 60 +- src/core/gpu.cpp | 4 +- src/core/gpu_commands.cpp | 8 +- src/core/gpu_hw.cpp | 351 +- src/core/gpu_hw.h | 29 +- src/core/gpu_hw_shadergen.cpp | 118 +- src/core/gpu_hw_shadergen.h | 6 +- src/core/gpu_hw_texture_cache.cpp | 3261 +++++++++++++++++ src/core/gpu_hw_texture_cache.h | 147 + src/core/gpu_sw.cpp | 7 +- src/core/gpu_types.h | 156 +- src/core/hotkeys.cpp | 9 +- src/core/save_state_version.h | 2 +- src/core/settings.cpp | 215 +- src/core/settings.h | 45 +- src/core/system.cpp | 109 +- src/core/texture_replacements.cpp | 335 -- src/core/texture_replacements.h | 30 - src/duckstation-qt/CMakeLists.txt | 1 + src/duckstation-qt/duckstation-qt.vcxproj | 3 + .../duckstation-qt.vcxproj.filters | 1 + src/duckstation-qt/graphicssettingswidget.cpp | 204 +- src/duckstation-qt/graphicssettingswidget.h | 3 +- src/duckstation-qt/graphicssettingswidget.ui | 162 +- src/duckstation-qt/mainwindow.cpp | 12 + src/duckstation-qt/mainwindow.h | 1 + src/duckstation-qt/mainwindow.ui | 15 +- src/duckstation-qt/qthost.cpp | 13 + src/duckstation-qt/qthost.h | 1 + .../texturereplacementsettingsdialog.ui | 380 ++ 36 files changed, 5020 insertions(+), 692 deletions(-) create mode 100644 src/core/gpu_hw_texture_cache.cpp create mode 100644 src/core/gpu_hw_texture_cache.h delete mode 100644 src/core/texture_replacements.cpp delete mode 100644 src/core/texture_replacements.h create mode 100644 src/duckstation-qt/texturereplacementsettingsdialog.ui diff --git a/src/common/gsvector_neon.h b/src/common/gsvector_neon.h index 4c7cecd61..9a558269f 100644 --- a/src/common/gsvector_neon.h +++ b/src/common/gsvector_neon.h @@ -2998,6 +2998,10 @@ public: return GSVector4i(vsetq_lane_s32(high, vsetq_lane_s32(low, vdupq_n_s32(0), 0), 1)); } + ALWAYS_INLINE GSVector2 xy() const { return GSVector2(vget_low_s32(v4s)); } + + ALWAYS_INLINE GSVector2 zw() const { return GSVector2(vget_high_s32(v4s)); } + #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \ { \ diff --git a/src/common/gsvector_nosimd.h b/src/common/gsvector_nosimd.h index abe055730..b7a719858 100644 --- a/src/common/gsvector_nosimd.h +++ b/src/common/gsvector_nosimd.h @@ -2289,6 +2289,10 @@ public: return GSVector4i(static_cast(F64[0]), static_cast(F64[1]), 0, 0); } + ALWAYS_INLINE GSVector2 xy() const { return GSVector2(x, y); } + + ALWAYS_INLINE GSVector2 zw() const { return GSVector2(z, w); } + #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const { return GSVector4(F32[xn], F32[yn], F32[zn], F32[wn]); } diff --git a/src/common/gsvector_sse.h b/src/common/gsvector_sse.h index e62246ace..ae203d07d 100644 --- a/src/common/gsvector_sse.h +++ b/src/common/gsvector_sse.h @@ -2365,6 +2365,10 @@ public: ALWAYS_INLINE GSVector4i f64toi32() const { return GSVector4i(_mm_cvttpd_epi32(_mm_castps_pd(m))); } + ALWAYS_INLINE GSVector2 xy() const { return GSVector2(m); } + + ALWAYS_INLINE GSVector2 zw() const { return GSVector2(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 2, 3, 2))); } + #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \ { \ diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 5524cf2f2..3bd9a7029 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -51,6 +51,8 @@ add_library(core gpu_hw.h gpu_hw_shadergen.cpp gpu_hw_shadergen.h + gpu_hw_texture_cache.cpp + gpu_hw_texture_cache.h gpu_shadergen.cpp gpu_shadergen.h gpu_sw.cpp @@ -108,8 +110,6 @@ add_library(core spu.h system.cpp system.h - texture_replacements.cpp - texture_replacements.h timers.cpp timers.h timing_event.cpp diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index addb7a1a8..e05bab768 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -47,6 +47,7 @@ + @@ -81,7 +82,6 @@ - @@ -124,6 +124,7 @@ + @@ -161,7 +162,6 @@ - diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index ec0c37da1..4672c9bba 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -46,7 +46,6 @@ - @@ -67,6 +66,7 @@ + @@ -116,7 +116,6 @@ - @@ -140,6 +139,7 @@ + diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index be83f63f0..7dd6c25df 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -4502,23 +4502,45 @@ void FullscreenUI::DrawDisplaySettingsPage() MenuHeading(FSUI_CSTR("Texture Replacements")); - DrawToggleSetting(bsi, FSUI_CSTR("Enable VRAM Write Texture Replacement"), - FSUI_CSTR("Enables the replacement of background textures in supported games."), - "TextureReplacements", "EnableVRAMWriteReplacements", false); - DrawToggleSetting(bsi, FSUI_CSTR("Preload Replacement Textures"), - FSUI_CSTR("Loads all replacement texture to RAM, reducing stuttering at runtime."), - "TextureReplacements", "PreloadTextures", false); + ActiveButton(FSUI_CSTR("The texture cache is currently experimental, and may cause rendering errors in some games."), + false, false, ImGuiFullscreen::LAYOUT_MENU_BUTTON_HEIGHT_NO_SUMMARY, g_large_font); + + DrawToggleSetting(bsi, FSUI_CSTR("Enable Texture Cache"), + FSUI_CSTR("Enables caching of guest textures, required for texture replacement."), "GPU", + "EnableTextureCache", false); DrawToggleSetting(bsi, FSUI_CSTR("Use Old MDEC Routines"), FSUI_CSTR("Enables the older, less accurate MDEC decoding routines. May be required for old " "replacement backgrounds to match/load."), "Hacks", "UseOldMDECRoutines", false); - DrawToggleSetting(bsi, FSUI_CSTR("Dump Replaceable VRAM Writes"), - FSUI_CSTR("Writes textures which can be replaced to the dump directory."), "TextureReplacements", + const bool texture_cache_enabled = GetEffectiveBoolSetting(bsi, "GPU", "EnableTextureCache", false); + DrawToggleSetting(bsi, FSUI_CSTR("Enable Texture Replacements"), + FSUI_CSTR("Enables loading of replacement textures. Not compatible with all games."), + "TextureReplacements", "EnableTextureReplacements", false, texture_cache_enabled); + DrawToggleSetting( + bsi, FSUI_CSTR("Enable Texture Dumping"), + FSUI_CSTR("Enables dumping of textures to image files, which can be replaced. Not compatible with all games."), + "TextureReplacements", "DumpTextures", false, texture_cache_enabled); + DrawToggleSetting(bsi, FSUI_CSTR("Dump Replaced Textures"), + FSUI_CSTR("Dumps textures that have replacements already loaded."), "TextureReplacements", + "DumpReplacedTextures", false, texture_cache_enabled); + + DrawToggleSetting(bsi, FSUI_CSTR("Enable VRAM Write Texture Replacement"), + FSUI_CSTR("Enables the replacement of background textures in supported games."), + "TextureReplacements", "EnableVRAMWriteReplacements", false); + + DrawToggleSetting(bsi, FSUI_CSTR("Enable VRAM Write Dumping"), + FSUI_CSTR("Writes backgrounds that can be replaced to the dump directory."), "TextureReplacements", "DumpVRAMWrites", false); - DrawToggleSetting(bsi, FSUI_CSTR("Set VRAM Write Dump Alpha Channel"), - FSUI_CSTR("Clears the mask/transparency bit in VRAM write dumps."), "TextureReplacements", - "DumpVRAMWriteForceAlphaChannel", true); + + DrawToggleSetting(bsi, FSUI_CSTR("Preload Replacement Textures"), + FSUI_CSTR("Loads all replacement texture to RAM, reducing stuttering at runtime."), + "TextureReplacements", "PreloadTextures", false, + ((texture_cache_enabled && + GetEffectiveBoolSetting(bsi, "TextureReplacements", "EnableTextureReplacements", false)) || + GetEffectiveBoolSetting(bsi, "TextureReplacements", "EnableVRAMWriteReplacements", false))); + + DrawFolderSetting(bsi, FSUI_CSTR("Textures Directory"), "Folders", "Textures", EmuFolders::Textures); EndMenuButtons(); } @@ -7239,7 +7261,6 @@ TRANSLATE_NOOP("FullscreenUI", "Clear Settings"); TRANSLATE_NOOP("FullscreenUI", "Clear Shaders"); TRANSLATE_NOOP("FullscreenUI", "Clears a shader from the chain."); TRANSLATE_NOOP("FullscreenUI", "Clears all settings set for this game."); -TRANSLATE_NOOP("FullscreenUI", "Clears the mask/transparency bit in VRAM write dumps."); TRANSLATE_NOOP("FullscreenUI", "Close"); TRANSLATE_NOOP("FullscreenUI", "Close Game"); TRANSLATE_NOOP("FullscreenUI", "Close Menu"); @@ -7319,7 +7340,8 @@ TRANSLATE_NOOP("FullscreenUI", "Downsampling"); TRANSLATE_NOOP("FullscreenUI", "Downsampling Display Scale"); TRANSLATE_NOOP("FullscreenUI", "Duck icon by icons8 (https://icons8.com/icon/74847/platforms.undefined.short-title)"); TRANSLATE_NOOP("FullscreenUI", "DuckStation is a free simulator/emulator of the Sony PlayStation(TM) console, focusing on playability, speed, and long-term maintainability."); -TRANSLATE_NOOP("FullscreenUI", "Dump Replaceable VRAM Writes"); +TRANSLATE_NOOP("FullscreenUI", "Dump Replaced Textures"); +TRANSLATE_NOOP("FullscreenUI", "Dumps textures that have replacements already loaded."); TRANSLATE_NOOP("FullscreenUI", "Emulation Settings"); TRANSLATE_NOOP("FullscreenUI", "Emulation Speed"); TRANSLATE_NOOP("FullscreenUI", "Enable 8MB RAM"); @@ -7338,6 +7360,10 @@ TRANSLATE_NOOP("FullscreenUI", "Enable Rewinding"); TRANSLATE_NOOP("FullscreenUI", "Enable SDL Input Source"); TRANSLATE_NOOP("FullscreenUI", "Enable Subdirectory Scanning"); TRANSLATE_NOOP("FullscreenUI", "Enable TTY Logging"); +TRANSLATE_NOOP("FullscreenUI", "Enable Texture Cache"); +TRANSLATE_NOOP("FullscreenUI", "Enable Texture Dumping"); +TRANSLATE_NOOP("FullscreenUI", "Enable Texture Replacements"); +TRANSLATE_NOOP("FullscreenUI", "Enable VRAM Write Dumping"); TRANSLATE_NOOP("FullscreenUI", "Enable VRAM Write Texture Replacement"); TRANSLATE_NOOP("FullscreenUI", "Enable XInput Input Source"); TRANSLATE_NOOP("FullscreenUI", "Enable debugging when supported by the host's renderer API. Only for developer use."); @@ -7345,6 +7371,9 @@ TRANSLATE_NOOP("FullscreenUI", "Enable/Disable the Player LED on DualSense contr TRANSLATE_NOOP("FullscreenUI", "Enables alignment and bus exceptions. Not needed for any known games."); TRANSLATE_NOOP("FullscreenUI", "Enables an additional 6MB of RAM to obtain a total of 2+6 = 8MB, usually present on dev consoles."); TRANSLATE_NOOP("FullscreenUI", "Enables an additional three controller slots on each port. Not supported in all games."); +TRANSLATE_NOOP("FullscreenUI", "Enables caching of guest textures, required for texture replacement."); +TRANSLATE_NOOP("FullscreenUI", "Enables dumping of textures to image files, which can be replaced. Not compatible with all games."); +TRANSLATE_NOOP("FullscreenUI", "Enables loading of replacement textures. Not compatible with all games."); TRANSLATE_NOOP("FullscreenUI", "Enables smooth scrolling of menus in Big Picture UI."); TRANSLATE_NOOP("FullscreenUI", "Enables the older, less accurate MDEC decoding routines. May be required for old replacement backgrounds to match/load."); TRANSLATE_NOOP("FullscreenUI", "Enables the replacement of background textures in supported games."); @@ -7626,7 +7655,6 @@ TRANSLATE_NOOP("FullscreenUI", "Selects the view that the game list will open to TRANSLATE_NOOP("FullscreenUI", "Serial"); TRANSLATE_NOOP("FullscreenUI", "Session: {}"); TRANSLATE_NOOP("FullscreenUI", "Set Input Binding"); -TRANSLATE_NOOP("FullscreenUI", "Set VRAM Write Dump Alpha Channel"); TRANSLATE_NOOP("FullscreenUI", "Sets a threshold for discarding precise values when exceeded. May help with glitches in some games."); TRANSLATE_NOOP("FullscreenUI", "Sets a threshold for discarding the emulated depth buffer. May help in some games."); TRANSLATE_NOOP("FullscreenUI", "Sets the fast forward speed. It is not guaranteed that this speed will be reached on all systems."); @@ -7705,10 +7733,12 @@ TRANSLATE_NOOP("FullscreenUI", "Temporarily disables all enhancements, useful wh TRANSLATE_NOOP("FullscreenUI", "Test Unofficial Achievements"); TRANSLATE_NOOP("FullscreenUI", "Texture Filtering"); TRANSLATE_NOOP("FullscreenUI", "Texture Replacements"); +TRANSLATE_NOOP("FullscreenUI", "Textures Directory"); TRANSLATE_NOOP("FullscreenUI", "The SDL input source supports most controllers."); TRANSLATE_NOOP("FullscreenUI", "The XInput source provides support for XBox 360/XBox One/XBox Series controllers."); TRANSLATE_NOOP("FullscreenUI", "The audio backend determines how frames produced by the emulator are submitted to the host."); TRANSLATE_NOOP("FullscreenUI", "The selected memory card image will be used in shared mode for this slot."); +TRANSLATE_NOOP("FullscreenUI", "The texture cache is currently experimental, and may cause rendering errors in some games."); TRANSLATE_NOOP("FullscreenUI", "This game has no achievements."); TRANSLATE_NOOP("FullscreenUI", "This game has no leaderboards."); TRANSLATE_NOOP("FullscreenUI", "Threaded Rendering"); @@ -7763,7 +7793,7 @@ TRANSLATE_NOOP("FullscreenUI", "When playing a multi-disc game and using per-gam TRANSLATE_NOOP("FullscreenUI", "When this option is chosen, the clock speed set below will be used."); TRANSLATE_NOOP("FullscreenUI", "Widescreen Rendering"); TRANSLATE_NOOP("FullscreenUI", "Wireframe Rendering"); -TRANSLATE_NOOP("FullscreenUI", "Writes textures which can be replaced to the dump directory."); +TRANSLATE_NOOP("FullscreenUI", "Writes backgrounds that can be replaced to the dump directory."); TRANSLATE_NOOP("FullscreenUI", "Yes, {} now and risk memory card corruption."); TRANSLATE_NOOP("FullscreenUI", "\"Challenge\" mode for achievements, including leaderboard tracking. Disables save state, cheats, and slowdown functions."); TRANSLATE_NOOP("FullscreenUI", "\"PlayStation\" and \"PSX\" are registered trademarks of Sony Interactive Entertainment Europe Limited. This software is not affiliated in any way with Sony Interactive Entertainment."); diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 7c3723105..35dfb8b95 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1555,8 +1555,8 @@ void GPU::SetDrawMode(u16 value) if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) return; - m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) != - (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK)); + m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK) != + (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK)); m_draw_mode.mode_reg.bits = new_mode_reg.bits; if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 60f97a15b..0f6f408cd 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -2,9 +2,9 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu.h" +#include "gpu_hw_texture_cache.h" #include "interrupt_controller.h" #include "system.h" -#include "texture_replacements.h" #include "common/assert.h" #include "common/log.h" @@ -532,10 +532,10 @@ void GPU::FinishVRAMWrite() m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width, m_blit_buffer.data(), true); } - if (g_settings.texture_replacements.ShouldDumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height)) + if (GPUTextureCache::ShouldDumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height)) { - TextureReplacements::DumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height, - reinterpret_cast(m_blit_buffer.data())); + GPUTextureCache::DumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height, + reinterpret_cast(m_blit_buffer.data())); } UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index ea0f24d48..a5d4ca122 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -194,6 +194,8 @@ GPU_HW::GPU_HW() : GPU() GPU_HW::~GPU_HW() { + GPUTextureCache::Shutdown(); + if (m_sw_renderer) { m_sw_renderer->Shutdown(); @@ -262,6 +264,8 @@ bool GPU_HW::Initialize() m_clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); + m_use_texture_cache = g_settings.gpu_texture_cache; + m_texture_dumping = m_use_texture_cache && g_settings.texture_replacements.dump_textures; CheckSettings(); @@ -282,13 +286,27 @@ bool GPU_HW::Initialize() return false; } + if (m_use_texture_cache) + { + if (!GPUTextureCache::Initialize()) + { + ERROR_LOG("Failed to initialize texture cache, disabling."); + m_use_texture_cache = false; + } + } + UpdateDownsamplingLevels(); + RestoreDeviceContext(); return true; } void GPU_HW::Reset(bool clear_vram) { + // Texture cache needs to be invalidated before we load, otherwise we dump black. + if (m_use_texture_cache) + GPUTextureCache::Invalidate(); + if (m_batch_vertex_ptr) UnmapGPUBuffer(0, 0); @@ -364,6 +382,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di else if (sw.IsReading()) { // Need to update the VRAM copy on the GPU with the state data. + // Would invalidate the TC, but base DoState() calls Reset(). UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); } @@ -373,10 +392,12 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); ClearVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle(); + UpdateVRAMReadTexture(true, false); + ClearVRAMDirtyRectangle(); ResetBatchVertexDepth(); } - return true; + return GPUTextureCache::DoState(sw, !m_use_texture_cache); } void GPU_HW::RestoreDeviceContext() @@ -401,7 +422,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) const bool clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || - m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); + m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || + (!old_settings.gpu_texture_cache && g_settings.gpu_texture_cache)); const bool shaders_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_true_color != g_settings.gpu_true_color || prev_force_progressive_scan != m_force_progressive_scan || @@ -468,6 +490,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_clamp_uvs = clamp_uvs; m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering); + m_use_texture_cache = g_settings.gpu_texture_cache; + m_texture_dumping = m_use_texture_cache && g_settings.texture_replacements.dump_textures; m_batch.sprite_mode = (m_allow_sprite_mode && m_batch.sprite_mode); const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); @@ -521,6 +545,23 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) UpdateDepthBufferFromMaskBit(); } + if (m_use_texture_cache && !old_settings.gpu_texture_cache) + { + if (!GPUTextureCache::Initialize()) + { + ERROR_LOG("Failed to initialize texture cache, disabling."); + m_use_texture_cache = false; + } + } + else if (!m_use_texture_cache && old_settings.gpu_texture_cache) + { + GPUTextureCache::Shutdown(); + } + else if (m_use_texture_cache) + { + GPUTextureCache::UpdateSettings(old_settings); + } + if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || (g_settings.gpu_downsample_mode == GPUDownsampleMode::Box && g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)) @@ -728,6 +769,9 @@ void GPU_HW::AddWrittenRectangle(const GSVector4i rect) { m_vram_dirty_write_rect = m_vram_dirty_write_rect.runion(rect); SetTexPageChangedOnOverlap(m_vram_dirty_write_rect); + + if (m_use_texture_cache) + GPUTextureCache::AddWrittenRectangle(rect); } void GPU_HW::AddDrawnRectangle(const GSVector4i rect) @@ -735,13 +779,22 @@ void GPU_HW::AddDrawnRectangle(const GSVector4i rect) // Normally, we would check for overlap here. But the GPU's texture cache won't actually reload until the page // changes, or it samples a larger region, so we can get away without doing so. This reduces copies considerably in // games like Mega Man Legends 2. - m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect); + if (m_current_draw_rect.rcontains(rect)) + return; + + m_current_draw_rect = m_current_draw_rect.runion(rect); + m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(m_current_draw_rect); + + if (m_use_texture_cache) + GPUTextureCache::AddDrawnRectangle(m_current_draw_rect, m_clamped_drawing_area); } void GPU_HW::AddUnclampedDrawnRectangle(const GSVector4i rect) { m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect); SetTexPageChangedOnOverlap(m_vram_dirty_draw_rect); + if (m_use_texture_cache) + GPUTextureCache::AddDrawnRectangle(rect, rect); } void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) @@ -749,9 +802,9 @@ void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // shadow texture is updated if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && - (m_draw_mode.mode_reg.GetTexturePageRectangle().rintersects(update_rect) || + (GetTextureRect(m_draw_mode.mode_reg.texture_page, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect) || (m_draw_mode.mode_reg.IsUsingPalette() && - m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) + GetPaletteRect(m_draw_mode.palette_reg, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) { m_draw_mode.SetTexturePageChanged(); } @@ -882,6 +935,8 @@ void GPU_HW::ClearFramebuffer() g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); } ClearVRAMDirtyRectangle(); + if (m_use_texture_cache) + GPUTextureCache::Invalidate(); m_last_depth_z = 1.0f; } @@ -986,7 +1041,7 @@ bool GPU_HW::CompilePipelines(Error* error) const u32 active_texture_modes = m_allow_sprite_mode ? NUM_TEXTURE_MODES : (NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast(BatchTextureMode::SpriteStart))); - const u32 total_vertex_shaders = (m_allow_sprite_mode ? 5 : 3); + const u32 total_vertex_shaders = (m_allow_sprite_mode ? 7 : 3); const u32 total_fragment_shaders = ((needs_rov_depth ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * (1 + BoolToUInt32(!true_color)) * (1 + BoolToUInt32(!m_force_progressive_scan)) * (1 + BoolToUInt32(needs_rov_depth))); @@ -1013,7 +1068,7 @@ bool GPU_HW::CompilePipelines(Error* error) // vertex shaders - [textured/palette/sprite] // fragment shaders - [depth_test][render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; - DimensionalArray, 2, 2, 2> batch_vertex_shaders{}; + DimensionalArray, 2, 3, 2> batch_vertex_shaders{}; DimensionalArray, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5, 2> batch_fragment_shaders{}; ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { batch_vertex_shaders.enumerate(destroy_shader); @@ -1022,7 +1077,7 @@ bool GPU_HW::CompilePipelines(Error* error) for (u8 textured = 0; textured < 2; textured++) { - for (u8 palette = 0; palette < 2; palette++) + for (u8 palette = 0; palette < 3; palette++) { if (palette && !textured) continue; @@ -1034,7 +1089,7 @@ bool GPU_HW::CompilePipelines(Error* error) const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); const std::string vs = shadergen.GenerateBatchVertexShader( - textured != 0, palette != 0, uv_limits, !sprite && force_round_texcoords, m_pgxp_depth_buffer); + textured != 0, palette == 1, palette == 2, uv_limits, !sprite && force_round_texcoords, m_pgxp_depth_buffer); if (!(batch_vertex_shaders[textured][palette][sprite] = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error))) { @@ -1203,6 +1258,8 @@ bool GPU_HW::CompilePipelines(Error* error) static_cast(texture_mode) == BatchTextureMode::Palette8Bit || static_cast(texture_mode) == BatchTextureMode::SpritePalette4Bit || static_cast(texture_mode) == BatchTextureMode::SpritePalette8Bit); + const bool page_texture = + (static_cast(texture_mode) == BatchTextureMode::PageTexture); const bool sprite = (static_cast(texture_mode) >= BatchTextureMode::SpriteStart); const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); const bool use_shader_blending = (render_mode == static_cast(BatchRenderMode::ShaderBlend)); @@ -1216,7 +1273,9 @@ bool GPU_HW::CompilePipelines(Error* error) std::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); plconfig.vertex_shader = - batch_vertex_shaders[BoolToUInt8(textured)][BoolToUInt8(palette)][BoolToUInt8(sprite)].get(); + batch_vertex_shaders[BoolToUInt8(textured)][page_texture ? 2 : BoolToUInt8(palette)] + [BoolToUInt8(sprite)] + .get(); plconfig.fragment_shader = batch_fragment_shaders[BoolToUInt8(depth_test && needs_rov_depth)][render_mode] [use_shader_blending ? transparency_mode : @@ -1836,19 +1895,26 @@ void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices) } ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, - u32 base_vertex) + u32 base_vertex, const GPUTextureCache::Source* texture) { // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] - const u8 texture_mode = static_cast(m_batch.texture_mode) + - ((m_batch.texture_mode != BatchTextureMode::Disabled && m_batch.sprite_mode) ? - static_cast(BatchTextureMode::SpriteStart) : - 0); + const u8 texture_mode = texture ? static_cast(BatchTextureMode::PageTexture) : + (static_cast(m_batch.texture_mode) + + ((m_batch.texture_mode < BatchTextureMode::PageTexture && m_batch.sprite_mode) ? + static_cast(BatchTextureMode::SpriteStart) : + 0)); const u8 depth_test = BoolToUInt8(m_batch.use_depth_buffer); const u8 check_mask = BoolToUInt8(m_batch.check_mask_before_draw); g_gpu_device->SetPipeline(m_batch_pipelines[depth_test][static_cast(m_batch.transparency_mode)][static_cast( render_mode)][texture_mode][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)][check_mask] .get()); + if (m_use_texture_cache && texture_mode != static_cast(BatchTextureMode::Disabled)) + { + g_gpu_device->SetTextureSampler(0, texture ? texture->texture : m_vram_read_texture.get(), + g_gpu_device->GetNearestSampler()); + } + GL_INS_FMT("Texture mode: {}", s_batch_texture_modes[texture_mode]); GL_INS_FMT("Transparency mode: {}", s_transparency_modes[static_cast(m_batch.transparency_mode)]); GL_INS_FMT("Render mode: {}", s_batch_render_modes[static_cast(render_mode)]); @@ -2199,7 +2265,7 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) for (u32 i = 0; i < num_vertices; i++) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); - if (m_texpage_dirty != 0) + if (ShouldCheckForTexPageOverlap()) CheckForTexPageOverlap(GSVector4i(min).upl32(GSVector4i(max)).u16to32()); } @@ -2620,7 +2686,7 @@ void GPU_HW::LoadVertices() const u32 tex_right = tex_left + quad_width; const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - if (rc.texture_enable && m_texpage_dirty != 0) + if (rc.texture_enable && ShouldCheckForTexPageOverlap()) { CheckForTexPageOverlap(GSVector4i(static_cast(tex_left), static_cast(tex_top), static_cast(tex_right), static_cast(tex_bottom))); @@ -2801,7 +2867,7 @@ void GPU_HW::LoadVertices() } } -bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacements::ReplacementImage* tex, u32 dst_x, u32 dst_y, +bool GPU_HW::BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width, u32 height) { if (!m_vram_replacement_texture || m_vram_replacement_texture->GetWidth() < tex->GetWidth() || @@ -2844,7 +2910,7 @@ bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacements::ReplacementIm ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) { - DebugAssert(m_texpage_dirty != 0 && m_batch.texture_mode != BatchTextureMode::Disabled); + DebugAssert((m_texpage_dirty != 0 || m_texture_dumping) && m_batch.texture_mode != BatchTextureMode::Disabled); if (m_texture_window_active) { @@ -2871,6 +2937,34 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) m_current_uv_rect = new_uv_rect; bool update_drawn = false, update_written = false; + if (m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT) + { + DebugAssert(!(m_texpage_dirty & (TEXPAGE_DIRTY_DRAWN_RECT | TEXPAGE_DIRTY_WRITTEN_RECT))); + DebugAssert(m_batch.texture_mode == BatchTextureMode::PageTexture && + m_batch.texture_cache_key.page < NUM_VRAM_PAGES); + + if (GPUTextureCache::AreSourcePagesDrawn(m_batch.texture_cache_key, m_current_uv_rect)) + { + // UVs intersect with drawn area, can't use TC + if (m_batch_index_count > 0) + { + FlushRender(); + EnsureVertexBufferSpaceForCurrentCommand(); + } + + // We need to swap the dirty tracking over to drawn/written. + const GSVector4i page_rect = GetTextureRect(m_batch.texture_cache_key.page, m_batch.texture_cache_key.mode); + m_texpage_dirty = (m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0) | + (m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0); + m_compute_uv_range = (ShouldCheckForTexPageOverlap() || m_clamp_uvs); + m_batch.texture_mode = static_cast(m_draw_mode.mode_reg.texture_mode.GetValue()); + } + else + { + // Page isn't drawn, we're done. + return; + } + } if (m_texpage_dirty & TEXPAGE_DIRTY_DRAWN_RECT) { DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); @@ -2905,6 +2999,11 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) } } +bool GPU_HW::ShouldCheckForTexPageOverlap() const +{ + return (m_texpage_dirty != 0); +} + ALWAYS_INLINE bool GPU_HW::IsFlushed() const { return (m_batch_index_count == 0); @@ -3003,8 +3102,13 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) { const bool current_enabled = (m_sw_renderer != nullptr); const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks; + const bool use_thread = !g_settings.gpu_texture_cache; if (current_enabled == new_enabled) + { + if (m_sw_renderer) + m_sw_renderer->SetThreadEnabled(use_thread); return; + } if (!new_enabled) { @@ -3015,7 +3119,7 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) } std::unique_ptr sw_renderer = std::make_unique(); - if (!sw_renderer->Initialize(true)) + if (!sw_renderer->Initialize(use_thread)) return; // We need to fill in the SW renderer's VRAM with the current state for hot toggles. @@ -3080,7 +3184,17 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) GL_INS_FMT("Dirty draw area before: {}", m_vram_dirty_draw_rect); const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); - AddUnclampedDrawnRectangle(bounds); + + // If TC is enabled, we have to update local memory. + if (m_use_texture_cache && !IsInterlacedRenderingEnabled()) + { + AddWrittenRectangle(bounds); + GPU_SW_Rasterizer::FillVRAM(x, y, width, height, color, false, 0); + } + else + { + AddUnclampedDrawnRectangle(bounds); + } GL_INS_FMT("Dirty draw area after: {}", m_vram_dirty_draw_rect); @@ -3126,6 +3240,8 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) return; } + // TODO: Only read if it's in the drawn area + // Get bounds with wrap-around handled. GSVector4i copy_rect = GetVRAMTransferBounds(x, y, width, height); @@ -3177,7 +3293,17 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b { GL_SCOPE_FMT("UpdateVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); - if (m_sw_renderer) + // TODO: Handle wrapped transfers... break them up or something + const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); + DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); + AddWrittenRectangle(bounds); + + // We want to dump *before* the write goes through, otherwise we dump bad data. + if (m_use_texture_cache) + { + GPUTextureCache::WriteVRAM(x, y, width, height, data, set_mask, check_mask, bounds); + } + else if (m_sw_renderer) { const u32 num_words = width * height; GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); @@ -3192,10 +3318,6 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b m_sw_renderer->PushCommand(cmd); } - const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); - DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); - AddWrittenRectangle(bounds); - if (check_mask) { // set new vertex counter since we want this to take into consideration previous masked pixels @@ -3203,7 +3325,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b } else { - const TextureReplacements::ReplacementImage* rtex = TextureReplacements::GetVRAMReplacement(width, height, data); + const GPUTextureCache::TextureReplacementImage* rtex = GPUTextureCache::GetVRAMReplacement(width, height, data); if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale)) { @@ -3283,7 +3405,26 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 { GL_SCOPE_FMT("CopyVRAM({}x{} @ {},{} => {},{}", width, height, src_x, src_y, dst_x, dst_y); - if (m_sw_renderer) + // masking enabled, oversized, or overlapping + const bool use_shader = + (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT); + const GSVector4i src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); + const GSVector4i dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); + + // If we're copying a region that hasn't been drawn to, and we're using the TC, we can do it in local memory. + if (m_use_texture_cache && !GPUTextureCache::IsRectDrawn(src_bounds)) + { + GL_INS("Performed in local memory."); + GPUTextureCache::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, m_GPUSTAT.set_mask_while_drawing, + m_GPUSTAT.check_mask_before_draw, src_bounds, dst_bounds); + UpdateVRAMOnGPU(dst_bounds.left, dst_bounds.top, dst_bounds.width(), dst_bounds.height(), + &g_vram[dst_bounds.top * VRAM_WIDTH + dst_bounds.left], VRAM_WIDTH * sizeof(u16), false, false, + dst_bounds); + return; + } + else if (m_sw_renderer) { GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); FillBackendCommandParameters(cmd); @@ -3296,16 +3437,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 m_sw_renderer->PushCommand(cmd); } - // masking enabled, oversized, or overlapping - const bool use_shader = - (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || - ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || - ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT); - const GSVector4i src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const GSVector4i dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); const bool intersect_with_draw = m_vram_dirty_draw_rect.rintersects(src_bounds); const bool intersect_with_write = m_vram_dirty_write_rect.rintersects(src_bounds); - if (use_shader || IsUsingMultisampling()) { if (intersect_with_draw || intersect_with_write) @@ -3343,6 +3476,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); g_gpu_device->SetPipeline( m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && m_write_mask_as_depth)].get()); + g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); @@ -3362,7 +3496,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); } - if (intersect_with_draw) + // We don't have it in local memory, so TC can't read it. + if (intersect_with_draw || m_use_texture_cache) { AddUnclampedDrawnRectangle(dst_bounds); } @@ -3398,77 +3533,112 @@ void GPU_HW::DispatchRenderCommand() { const GPURenderCommand rc{m_render_command.bits}; - BatchTextureMode texture_mode = BatchTextureMode::Disabled; + // TODO: avoid all this for vertex loading, only do when the type of draw changes + BatchTextureMode texture_mode = rc.IsTexturingEnabled() ? m_batch.texture_mode : BatchTextureMode::Disabled; + GPUTextureCache::SourceKey texture_cache_key = m_batch.texture_cache_key; if (rc.IsTexturingEnabled()) { // texture page changed - check that the new page doesn't intersect the drawing area - if (m_draw_mode.IsTexturePageChanged()) + if (m_draw_mode.IsTexturePageChanged() || texture_mode == BatchTextureMode::Disabled) { m_draw_mode.ClearTexturePageChangedFlag(); -#if 0 - if (!m_vram_dirty_draw_rect.eq(INVALID_RECT) || !m_vram_dirty_write_rect.eq(INVALID_RECT)) - { - GL_INS_FMT("VRAM DIRTY: {} {}", m_vram_dirty_draw_rect, m_vram_dirty_write_rect); - GL_INS_FMT("PAGE RECT: {}", m_draw_mode.mode_reg.GetTexturePageRectangle()); - if (m_draw_mode.mode_reg.IsUsingPalette()) - GL_INS_FMT("PALETTE RECT: {}", m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode)); - } -#endif + // start by assuming we can use the TC + bool use_texture_cache = m_use_texture_cache; + // check that the palette isn't in a drawn area if (m_draw_mode.mode_reg.IsUsingPalette()) { - const GSVector4i palette_rect = m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode); - const bool update_drawn = palette_rect.rintersects(m_vram_dirty_draw_rect); - const bool update_written = palette_rect.rintersects(m_vram_dirty_write_rect); - if (update_drawn || update_written) + const GSVector4i palette_rect = + GetPaletteRect(m_draw_mode.palette_reg, m_draw_mode.mode_reg.texture_mode, use_texture_cache); + if (!use_texture_cache || GPUTextureCache::IsRectDrawn(palette_rect)) { - GL_INS("Palette in VRAM dirty area, flushing cache"); - if (!IsFlushed()) - FlushRender(); + if (use_texture_cache) + GL_INS_FMT("Palette at {} is in drawn area, can't use TC", palette_rect); + use_texture_cache = false; - UpdateVRAMReadTexture(update_drawn, update_written); + const bool update_drawn = palette_rect.rintersects(m_vram_dirty_draw_rect); + const bool update_written = palette_rect.rintersects(m_vram_dirty_write_rect); + if (update_drawn || update_written) + { + GL_INS("Palette in VRAM dirty area, flushing cache"); + if (!IsFlushed()) + FlushRender(); + + UpdateVRAMReadTexture(update_drawn, update_written); + } } } - const GSVector4i page_rect = m_draw_mode.mode_reg.GetTexturePageRectangle(); - GSVector4i::storel(m_current_texture_page_offset, page_rect); + m_compute_uv_range = (m_clamp_uvs || m_texture_dumping); - u8 new_texpage_dirty = m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0; - new_texpage_dirty |= m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0; + const GPUTextureMode gpu_texture_mode = + (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit) ? GPUTextureMode::Direct16Bit : + m_draw_mode.mode_reg.texture_mode; + const GSVector4i page_rect = GetTextureRect(m_draw_mode.mode_reg.texture_page, m_draw_mode.mode_reg.texture_mode); - if (new_texpage_dirty != 0) + // TODO: This will result in incorrect global-space UVs when the texture page wraps around. + // Need to deal with it if it becomes a problem. + m_current_texture_page_offset[0] = static_cast(m_draw_mode.mode_reg.GetTexturePageBaseX()); + m_current_texture_page_offset[1] = static_cast(m_draw_mode.mode_reg.GetTexturePageBaseY()); + + if (use_texture_cache) { - GL_INS("Texpage is in dirty area, checking UV ranges"); - m_texpage_dirty = new_texpage_dirty; - m_compute_uv_range = true; - m_current_uv_rect = INVALID_RECT; + texture_mode = BatchTextureMode::PageTexture; + texture_cache_key = + GPUTextureCache::SourceKey(m_draw_mode.mode_reg.texture_page, m_draw_mode.palette_reg, gpu_texture_mode); + + const bool is_drawn = GPUTextureCache::IsRectDrawn(page_rect); + if (is_drawn) + GL_INS_FMT("Texpage [{}] {} is drawn in TC, checking UV ranges", texture_cache_key.page, page_rect); + + m_texpage_dirty = + (is_drawn ? TEXPAGE_DIRTY_PAGE_RECT : 0) | (m_texture_dumping ? TEXPAGE_DIRTY_ONLY_UV_RECT : 0); + m_compute_uv_range |= ShouldCheckForTexPageOverlap(); } else { - m_compute_uv_range = m_clamp_uvs; - if (m_texpage_dirty) - GL_INS("Texpage is no longer dirty"); - m_texpage_dirty = 0; + texture_mode = static_cast(gpu_texture_mode); + m_texpage_dirty = (m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0) | + (m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0); + if (m_texpage_dirty & TEXPAGE_DIRTY_DRAWN_RECT) + GL_INS_FMT("Texpage {} is in dirty DRAWN area {}", page_rect, m_vram_dirty_draw_rect); + if (m_texpage_dirty & TEXPAGE_DIRTY_WRITTEN_RECT) + GL_INS_FMT("Texpage {} is in dirty WRITTEN area {}", page_rect, m_vram_dirty_write_rect); + + // Current UV rect _must_ be cleared here, because we're only check for texpage intersection when it grows in + // size, a switch from a non-contained page to a contained page would go undetected otherwise. + if (m_texpage_dirty != 0) + { + m_compute_uv_range = true; + m_current_uv_rect = INVALID_RECT; + } } } - - texture_mode = (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit) ? - BatchTextureMode::Direct16Bit : - static_cast(m_draw_mode.mode_reg.texture_mode.GetValue()); } + DebugAssert((rc.IsTexturingEnabled() && (texture_mode == BatchTextureMode::PageTexture && + texture_cache_key.mode == m_draw_mode.mode_reg.texture_mode) || + texture_mode == static_cast(m_draw_mode.mode_reg.texture_mode.GetValue())) || + (!rc.IsTexturingEnabled() && texture_mode == BatchTextureMode::Disabled)); + DebugAssert(!(m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT) || texture_mode == BatchTextureMode::PageTexture || + !rc.IsTexturingEnabled()); + // has any state changed which requires a new batch? // Reverse blending breaks with mixed transparent and opaque pixels, so we have to do one draw per polygon. // If we have fbfetch, we don't need to draw it in two passes. Test case: Suikoden 2 shadows. const GPUTransparencyMode transparency_mode = rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; - if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || - (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || - dithering_enable != m_batch.dithering) + if (!IsFlushed()) { - FlushRender(); + if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || + (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || + dithering_enable != m_batch.dithering || + (texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key)) + { + FlushRender(); + } } EnsureVertexBufferSpaceForCurrentCommand(); @@ -3512,6 +3682,7 @@ void GPU_HW::DispatchRenderCommand() m_batch.texture_mode = texture_mode; m_batch.transparency_mode = transparency_mode; m_batch.dithering = dithering_enable; + m_batch.texture_cache_key = texture_cache_key; if (m_draw_mode.IsTextureWindowChanged()) { @@ -3577,10 +3748,21 @@ void GPU_HW::FlushRender() return; #ifdef _DEBUG - GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number); + GL_SCOPE_FMT("Hardware Draw {}: {}", ++s_draw_number, m_current_draw_rect); #endif GL_INS_FMT("Dirty draw area: {}", m_vram_dirty_draw_rect); + if (m_compute_uv_range) + GL_INS_FMT("UV rect: {}", m_current_uv_rect); + + const GPUTextureCache::Source* texture = nullptr; + if (m_batch.texture_mode == BatchTextureMode::PageTexture) + { + texture = LookupSource(m_batch.texture_cache_key, m_current_uv_rect, + m_batch.transparency_mode != GPUTransparencyMode::Disabled ? + GPUTextureCache::PaletteRecordFlags::HasSemiTransparentDraws : + GPUTextureCache::PaletteRecordFlags::None); + } if (m_batch_ubo_dirty) { @@ -3589,21 +3771,24 @@ void GPU_HW::FlushRender() m_batch_ubo_dirty = false; } + m_current_draw_rect = INVALID_RECT; + m_current_uv_rect = INVALID_RECT; + if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe) { if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.texture_mode, m_batch.check_mask_before_draw) || m_rov_active || (m_use_rov_for_shader_blend && m_pgxp_depth_buffer)) { - DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex); + DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex, texture); } else if (NeedsTwoPassRendering()) { - DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex); - DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex); + DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex, texture); + DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex, texture); } else { - DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex); + DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex, texture); } } @@ -3623,6 +3808,8 @@ void GPU_HW::UpdateDisplay() GL_SCOPE("UpdateDisplay()"); + GPUTextureCache::Compact(); + if (g_settings.debugging.show_vram) { if (IsUsingMultisampling()) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index b2fd939a8..db2f564ed 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -4,7 +4,7 @@ #pragma once #include "gpu.h" -#include "texture_replacements.h" +#include "gpu_hw_texture_cache.h" #include "util/gpu_device.h" @@ -38,6 +38,7 @@ public: Palette4Bit, Palette8Bit, Direct16Bit, + PageTexture, Disabled, SpritePalette4Bit, @@ -52,6 +53,11 @@ public: static_cast(BatchTextureMode::Palette8Bit) == static_cast(GPUTextureMode::Palette8Bit) && static_cast(BatchTextureMode::Direct16Bit) == static_cast(GPUTextureMode::Direct16Bit)); + static constexpr GSVector4i VRAM_SIZE_RECT = GSVector4i::cxpr(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + static constexpr GSVector4i INVALID_RECT = + GSVector4i::cxpr(std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::min(), + std::numeric_limits::min()); + GPU_HW(); ~GPU_HW() override; @@ -83,6 +89,8 @@ private: { TEXPAGE_DIRTY_DRAWN_RECT = (1 << 0), TEXPAGE_DIRTY_WRITTEN_RECT = (1 << 1), + TEXPAGE_DIRTY_PAGE_RECT = (1 << 2), + TEXPAGE_DIRTY_ONLY_UV_RECT = (1 << 3), }; static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT)); @@ -116,6 +124,8 @@ private: bool use_depth_buffer = false; bool sprite_mode = false; + GPUTextureCache::SourceKey texture_cache_key = {}; + // Returns the render mode for this batch. BatchRenderMode GetRenderMode() const; }; @@ -140,11 +150,6 @@ private: u32 num_uniform_buffer_updates; }; - static constexpr GSVector4i VRAM_SIZE_RECT = GSVector4i::cxpr(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - static constexpr GSVector4i INVALID_RECT = - GSVector4i::cxpr(std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::min(), - std::numeric_limits::min()); - /// Returns true if a depth buffer should be created. GPUTexture::Format GetDepthBufferFormat() const; @@ -169,7 +174,8 @@ private: void DeactivateROV(); void MapGPUBuffer(u32 required_vertices, u32 required_indices); void UnmapGPUBuffer(u32 used_vertices, u32 used_indices); - void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex); + void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex, + const GPUTextureCache::Source* texture); u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; @@ -186,6 +192,7 @@ private: void SetTexPageChangedOnOverlap(const GSVector4i update_rect); void CheckForTexPageOverlap(GSVector4i uv_rect); + bool ShouldCheckForTexPageOverlap() const; bool IsFlushed() const; void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices); @@ -217,7 +224,7 @@ private: void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, bool check_mask, const GSVector4i bounds); - bool BlitVRAMReplacementTexture(const TextureReplacements::ReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width, + bool BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); /// Expands a line into two triangles. @@ -290,6 +297,9 @@ private: bool m_texture_window_active : 1 = false; bool m_rov_active : 1 = false; + bool m_use_texture_cache : 1 = false; + bool m_texture_dumping : 1 = false; + u8 m_texpage_dirty = 0; BatchConfig m_batch; @@ -300,8 +310,9 @@ private: // Bounding box of VRAM area that the GPU has drawn into. GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; - GSVector4i m_vram_dirty_write_rect = INVALID_RECT; + GSVector4i m_vram_dirty_write_rect = INVALID_RECT; // TODO: Don't use in TC mode, should be kept at zero. GSVector4i m_current_uv_rect = INVALID_RECT; + GSVector4i m_current_draw_rect = INVALID_RECT; s32 m_current_texture_page_offset[2] = {}; std::unique_ptr m_wireframe_pipeline; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 1c4ca21a4..36e5b9055 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -55,13 +55,14 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) false); } -std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool palette, bool uv_limits, +std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool palette, bool page_texture, bool uv_limits, bool force_round_texcoords, bool pgxp_depth) { std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "PALETTE", palette); + DefineMacro(ss, "PAGE_TEXTURE", page_texture); DefineMacro(ss, "UV_LIMITS", uv_limits); DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords); DefineMacro(ss, "PGXP_DEPTH", pgxp_depth); @@ -69,7 +70,22 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale WriteBatchUniformBuffer(ss); - if (textured) + if (textured && page_texture) + { + if (uv_limits) + { + DeclareVertexEntryPoint( + ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1, + {{"nointerpolation", "float4 v_uv_limits"}}, false, "", UsingMSAA(), UsingPerSampleShading(), + m_disable_color_perspective); + } + else + { + DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, {}, + false, "", UsingMSAA(), UsingPerSampleShading(), m_disable_color_perspective); + } + } + else if (textured) { if (uv_limits) { @@ -127,16 +143,18 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale v_col0 = a_col0; #if TEXTURED v_tex0 = float2(uint2(a_texcoord & 0xFFFFu, a_texcoord >> 16)); - #if !PALETTE + #if !PALETTE && !PAGE_TEXTURE v_tex0 *= u_resolution_scale; #endif - // base_x,base_y,palette_x,palette_y - v_texpage.x = (a_texpage & 15u) * 64u; - v_texpage.y = ((a_texpage >> 4) & 1u) * 256u; - #if PALETTE - v_texpage.z = ((a_texpage >> 16) & 63u) * 16u; - v_texpage.w = ((a_texpage >> 22) & 511u); + #if !PAGE_TEXTURE + // base_x,base_y,palette_x,palette_y + v_texpage.x = (a_texpage & 15u) * 64u; + v_texpage.y = ((a_texpage >> 4) & 1u) * 256u; + #if PALETTE + v_texpage.z = ((a_texpage >> 16) & 63u) * 16u; + v_texpage.w = ((a_texpage >> 22) & 511u); + #endif #endif #if UV_LIMITS @@ -146,7 +164,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale // Add 0.5 to the upper bounds when upscaling, to work around interpolation differences. // Limited to force-round-texcoord hack, to avoid breaking other games. v_uv_limits.zw += 0.5; - #elif !PALETTE + #elif !PAGE_TEXTURE && !PALETTE // Treat coordinates as being in upscaled space, and extend the UV range to all "upscaled" // pixels. This means 1-pixel-high polygon-based framebuffer effects won't be downsampled. // (e.g. Mega Man Legends 2 haze effect) @@ -707,6 +725,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader( const bool textured = (texture_mode != GPU_HW::BatchTextureMode::Disabled); const bool palette = (texture_mode == GPU_HW::BatchTextureMode::Palette4Bit || texture_mode == GPU_HW::BatchTextureMode::Palette8Bit); + const bool page_texture = (texture_mode == GPU_HW::BatchTextureMode::PageTexture); const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend); const bool use_dual_source = (!shader_blending && !use_rov && m_supports_dual_source_blend && ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && @@ -725,6 +744,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader( DefineMacro(ss, "PALETTE", palette); DefineMacro(ss, "PALETTE_4_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette4Bit); DefineMacro(ss, "PALETTE_8_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette8Bit); + DefineMacro(ss, "PAGE_TEXTURE", page_texture); DefineMacro(ss, "DITHERING", dithering); DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering); DefineMacro(ss, "INTERLACING", interlacing); @@ -804,6 +824,8 @@ uint2 FloatToIntegerCoords(float2 coords) return uint2((UPSCALED == 0 || FORCE_ROUND_TEXCOORDS != 0) ? roundEven(coords) : floor(coords)); } +#if !PAGE_TEXTURE + float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) { #if PALETTE @@ -855,11 +877,43 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) #endif } +#else + +float4 SampleFromPageTexture(float2 coords) +{ + // Cached textures. +#if UPSCALED == 0 || FORCE_ROUND_TEXCOORDS != 0 + float2 fpart = coords - roundEven(coords); +#else + float2 fpart = frac(coords); #endif + uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); + coords = (float2(icoord) + fpart) * (1.0f / 256.0f); + return SAMPLE_TEXTURE(samp0, coords); +} + +#endif + +#endif // TEXTURED )"; const u32 num_fragment_outputs = use_rov ? 0 : (use_dual_source ? 2 : 1); - if (textured) + if (textured && page_texture) + { + if (uv_limits) + { + DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "float4 v_uv_limits"}}, true, num_fragment_outputs, + use_dual_source, m_write_mask_as_depth, UsingMSAA(), UsingPerSampleShading(), false, + m_disable_color_perspective, shader_blending && !use_rov, use_rov); + } + else + { + DeclareFragmentEntryPoint(ss, 1, 1, {}, true, num_fragment_outputs, use_dual_source, m_write_mask_as_depth, + UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, + shader_blending && !use_rov, use_rov); + } + } + else if (textured) { if (texture_filtering != GPUTextureFilter::Nearest) WriteBatchTextureFilter(ss, texture_filtering); @@ -905,7 +959,17 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) #if TEXTURED float4 texcol; - #if TEXTURE_FILTERING + #if PAGE_TEXTURE + #if UV_LIMITS + texcol = SampleFromPageTexture(clamp(v_tex0, v_uv_limits.xy, v_uv_limits.zw)); + #else + texcol = SampleFromPageTexture(v_tex0); + #endif + if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR)) + discard; + + ialpha = 1.0; + #elif TEXTURE_FILTERING FilteredSampleFromVRAM(v_texpage, v_tex0, v_uv_limits, texcol, ialpha); if (ialpha < 0.5) discard; @@ -1687,3 +1751,33 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact return ss.str(); } + +std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent) +{ + std::stringstream ss; + WriteHeader(ss); + DefineMacro(ss, "SEMITRANSPARENT", semitransparent); + DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true); + DeclareTexture(ss, "samp0", 0); + DeclareFragmentEntryPoint(ss, 0, 1); + + ss << R"( +{ + float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw; + float4 color = SAMPLE_TEXTURE(samp0, coords); + o_col0.rgb = color.rgb; + + // Alpha processing. + #if SEMITRANSPARENT + // Map anything not 255 to 1 for semitransparent, otherwise zero for opaque. + o_col0.a = (color.a <= 0.95f) ? 1.0f : 0.0f; + o_col0.a = VECTOR_EQ(color, float4(0.0, 0.0, 0.0, 0.0)) ? 0.0f : o_col0.a; + #else + // Leave (0,0,0,0) as 0000 for opaque replacements for cutout alpha. + o_col0.a = color.a; + #endif +} +)"; + + return ss.str(); +} \ No newline at end of file diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 69b390494..9ebbbc193 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -15,8 +15,8 @@ public: bool supports_dual_source_blend, bool supports_framebuffer_fetch); ~GPU_HW_ShaderGen(); - std::string GenerateBatchVertexShader(bool textured, bool palette, bool uv_limits, bool force_round_texcoords, - bool pgxp_depth); + std::string GenerateBatchVertexShader(bool textured, bool palette, bool page_texture, bool uv_limits, + bool force_round_texcoords, bool pgxp_depth); std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPU_HW::BatchTextureMode texture_mode, GPUTextureFilter texture_filtering, bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing, @@ -36,6 +36,8 @@ public: std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor); + std::string GenerateReplacementMergeFragmentShader(bool semitransparent); + private: ALWAYS_INLINE bool UsingMSAA() const { return m_multisamples > 1; } ALWAYS_INLINE bool UsingPerSampleShading() const { return m_multisamples > 1 && m_per_sample_shading; } diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp new file mode 100644 index 000000000..db245109b --- /dev/null +++ b/src/core/gpu_hw_texture_cache.cpp @@ -0,0 +1,3261 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#include "gpu_hw_texture_cache.h" +#include "gpu_hw.h" +#include "gpu_hw_shadergen.h" +#include "gpu_sw_rasterizer.h" +#include "host.h" +#include "settings.h" +#include "system.h" + +#include "util/gpu_device.h" +#include "util/state_wrapper.h" + +#include "common/error.h" +#include "common/file_system.h" +#include "common/gsvector_formatter.h" +#include "common/log.h" +#include "common/path.h" +#include "common/string_util.h" +#include "common/timer.h" + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" +#ifdef CPU_ARCH_SSE +#include "xxh_x86dispatch.h" +#endif + +#include +#include +#include +#include + +LOG_CHANNEL(GPUTextureCache); + +#include "common/ryml_helpers.h" + +// #define ALWAYS_TRACK_VRAM_WRITES 1 + +namespace GPUTextureCache { +static constexpr u32 MAX_CLUT_SIZE = 256; +static constexpr u32 NUM_PAGE_DRAW_RECTS = 4; +static constexpr const GSVector4i& INVALID_RECT = GPU_HW::INVALID_RECT; +static constexpr const GPUTexture::Format REPLACEMENT_TEXTURE_FORMAT = GPUTexture::Format::RGBA8; +static constexpr const char LOCAL_CONFIG_FILENAME[] = "config.yaml"; + +// Has to be public because it's referenced in Source. +struct HashCacheEntry +{ + std::unique_ptr texture; + u32 ref_count; + u32 last_used_frame; + TList sources; +}; + +namespace { +struct VRAMWrite +{ + GSVector4i active_rect; + GSVector4i write_rect; + HashType hash; + + struct PaletteRecord + { + // TODO: Texture window, for sub texture dumping. + GSVector4i rect; + SourceKey key; + PaletteRecordFlags flags; + + // Awkward to store, but we need to keep a backup copy of each CLUT, because if the CLUT gets overwritten + // before the VRAM write, when we go to dump the texture, it'll be incorrect. + HashType palette_hash; + u16 palette[MAX_CLUT_SIZE]; + }; + + // List of palettes and rectangles drawn for dumping. + // TODO: Keep these in texel-local space, not global space, that way texture sizes aren't aligned to 4 pixels. + // But realistically, that probably isn't super common, and also requires modifying the renderer side of things. + std::vector palette_records; + + u32 num_splits; + u32 num_page_refs; + std::array, MAX_PAGE_REFS_PER_WRITE> page_refs; +}; + +struct PageEntry +{ + TList sources; + TList writes; // TODO: Split to own list + u32 num_draw_rects; + GSVector4i total_draw_rect; // NOTE: In global VRAM space. + std::array draw_rects; +}; + +struct HashCacheKey +{ + HashType texture_hash; + HashType palette_hash; + HashType mode; + + ALWAYS_INLINE bool operator==(const HashCacheKey& k) const + { + return (std::memcmp(&k, this, sizeof(HashCacheKey)) == 0); + } + ALWAYS_INLINE bool operator!=(const HashCacheKey& k) const + { + return (std::memcmp(&k, this, sizeof(HashCacheKey)) != 0); + } +}; +struct HashCacheKeyHash +{ + size_t operator()(const HashCacheKey& k) const; +}; + +enum class TextureReplacementType : u8 +{ + VRAMReplacement, + TextureFromVRAMWrite, + TextureFromPage, +}; + +struct TextureReplacementSubImage +{ + GSVector4i dst_rect; + GSVector4i src_rect; + const TextureReplacementImage& image; + float scale_x; + float scale_y; + bool invert_alpha; +}; + +struct VRAMReplacementName +{ + u64 low; + u64 high; + + TinyString ToString() const; + bool Parse(const std::string_view file_title); + + bool operator<(const VRAMReplacementName& rhs) const { return std::tie(low, high) < std::tie(rhs.low, rhs.high); } + bool operator==(const VRAMReplacementName& rhs) const { return low == rhs.low && high == rhs.high; } + bool operator!=(const VRAMReplacementName& rhs) const { return low != rhs.low || high != rhs.high; } +}; + +struct VRAMReplacementNameHash +{ + size_t operator()(const VRAMReplacementName& hash) const; +}; + +struct TextureReplacementIndex +{ + u64 src_hash; + GPUTextureMode mode; + + bool operator<(const TextureReplacementIndex& rhs) const + { + return std::tie(src_hash, mode) < std::tie(src_hash, mode); + } + bool operator==(const TextureReplacementIndex& rhs) const { return src_hash == rhs.src_hash && mode == rhs.mode; } + bool operator!=(const TextureReplacementIndex& rhs) const { return src_hash != rhs.src_hash || mode != rhs.mode; } +}; + +struct TextureReplacementIndexHash +{ + size_t operator()(const TextureReplacementIndex& hash) const; +}; + +struct TextureReplacementName +{ + u64 src_hash; + u64 pal_hash; + u16 src_width; + u16 src_height; + TextureReplacementType type; + u8 texture_mode; + u16 offset_x; + u16 offset_y; + u16 width; + u16 height; + u8 pal_min; + u8 pal_max; + + TinyString ToString() const; + bool Parse(const std::string_view file_title); + TextureReplacementIndex GetIndex() const; + GPUTextureMode GetTextureMode() const; + bool IsSemitransparent() const; + + bool operator<(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) < 0); } + bool operator==(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) == 0); } + bool operator!=(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) != 0); } + + ALWAYS_INLINE GSVector2i GetSizeVec() const { return GSVector2i(width, height); } + ALWAYS_INLINE GSVector2i GetOffsetVec() const { return GSVector2i(offset_x, offset_y); } + ALWAYS_INLINE GSVector4i GetDestRect() const + { + return GSVector4i(GSVector4i(GetOffsetVec()).xyxy().add32(GSVector4i(GetSizeVec()).zwxy())); + } +}; + +struct DumpedTextureKey +{ + HashType tex_hash; + HashType pal_hash; + u16 offset_x, offset_y; + u16 width, height; + TextureReplacementType type; + u8 texture_mode; + u8 pad[6]; + + ALWAYS_INLINE bool operator==(const DumpedTextureKey& k) const + { + return (std::memcmp(&k, this, sizeof(DumpedTextureKey)) == 0); + } + ALWAYS_INLINE bool operator!=(const DumpedTextureKey& k) const + { + return (std::memcmp(&k, this, sizeof(DumpedTextureKey)) != 0); + } +}; +struct DumpedTextureKeyHash +{ + size_t operator()(const DumpedTextureKey& k) const; +}; +} // namespace + +using HashCache = std::unordered_map; +using TextureCache = std::unordered_map; + +using VRAMReplacementMap = std::unordered_map; +using TextureReplacementMap = + std::unordered_multimap, + TextureReplacementIndexHash>; + +static bool ShouldTrackVRAMWrites(); +static bool IsDumpingVRAMWriteTextures(); + +static bool CompilePipelines(); +static void DestroyPipelines(); + +static const Source* ReturnSource(Source* source, const GSVector4i uv_rect, PaletteRecordFlags flags); +static Source* CreateSource(SourceKey key); + +static HashCacheEntry* LookupHashCache(SourceKey key, HashType tex_hash, HashType pal_hash); +static void ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash, HashCacheEntry* entry); +static void RemoveFromHashCache(HashCache::iterator it); +static void ClearHashCache(); + +static HashType HashPage(u8 page, GPUTextureMode mode); +static HashType HashPalette(GPUTexturePaletteReg palette, GPUTextureMode mode); +static HashType HashPartialPalette(const u16* palette, u32 min, u32 max); + +static std::pair ReducePaletteBounds(const GSVector4i rect, GPUTextureMode mode, + GPUTexturePaletteReg palette); +static void SyncVRAMWritePaletteRecords(VRAMWrite* entry); +static void InitializeVRAMWritePaletteRecord(VRAMWrite::PaletteRecord* record, SourceKey source_key, + const GSVector4i rect, PaletteRecordFlags flags); +static void UpdateVRAMWriteSources(VRAMWrite* entry, SourceKey source_key, const GSVector4i global_uv_rect, + PaletteRecordFlags flags); +static void SplitVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect); +static bool TryMergeVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect); +static void RemoveVRAMWrite(VRAMWrite* entry); +static void DumpTexturesFromVRAMWrite(VRAMWrite* entry); +static void DumpTextureFromPage(const Source* src); + +static void DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, u32 dest_stride); +static void DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, u32 dest_stride); +static void DecodeTexture16(const u16* page, u32 width, u32 height, u32* dest, u32 dest_stride); +static void DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture); + +static std::optional GetTextureReplacementTypeFromFileTitle(const std::string_view file_title); +static bool HasValidReplacementExtension(const std::string_view path); + +static bool EnsureGameDirectoryExists(); +static std::string GetTextureReplacementDirectory(); +static std::string GetTextureDumpDirectory(); + +static VRAMReplacementName GetVRAMWriteHash(u32 width, u32 height, const void* pixels); +static std::string GetVRAMWriteDumpFilename(const VRAMReplacementName& name); + +static bool IsMatchingReplacementPalette(HashType full_palette_hash, GPUTextureMode mode, GPUTexturePaletteReg palette, + const TextureReplacementName& name); +static bool LoadLocalConfiguration(bool load_vram_write_replacement_aliases, bool load_texture_replacement_aliases); + +static void FindTextureReplacements(bool load_vram_write_replacements, bool load_texture_replacements); +static void LoadTextureReplacementAliases(const ryml::ConstNodeRef& root, bool load_vram_write_replacement_aliases, + bool load_texture_replacement_aliases); + +static const TextureReplacementImage* GetTextureReplacementImage(const std::string& filename); +static void PreloadReplacementTextures(); +static void PurgeUnreferencedTexturesFromCache(); + +static void DumpTexture(TextureReplacementType type, u32 offset_x, u32 offset_y, u32 src_width, u32 src_height, + GPUTextureMode mode, HashType src_hash, HashType pal_hash, u32 pal_min, u32 pal_max, + const u16* palette, const GSVector4i rect, PaletteRecordFlags flags); + +static bool HasVRAMWriteTextureReplacements(); +static void GetVRAMWriteTextureReplacements(std::vector& replacements, + HashType vram_write_hash, HashType palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette, const GSVector2i& offset_to_page); + +static bool HasTexturePageTextureReplacements(); +static void GetTexturePageTextureReplacements(std::vector& replacements, + u32 start_page_number, HashType page_hash, HashType palette_hash, + GPUTextureMode mode, GPUTexturePaletteReg palette); + +template +ALWAYS_INLINE_RELEASE static void ListPrepend(TList* list, T* item, TListNode* item_node) +{ + item_node->ref = item; + item_node->list = list; + item_node->prev = nullptr; + if (list->tail) + { + item_node->next = list->head; + list->head->prev = item_node; + list->head = item_node; + } + else + { + item_node->next = nullptr; + list->head = item_node; + list->tail = item_node; + } +} + +template +ALWAYS_INLINE_RELEASE static void ListAppend(TList* list, T* item, TListNode* item_node) +{ + item_node->ref = item; + item_node->list = list; + item_node->next = nullptr; + if (list->tail) + { + item_node->prev = list->tail; + list->tail->next = item_node; + list->tail = item_node; + } + else + { + item_node->prev = nullptr; + list->head = item_node; + list->tail = item_node; + } +} + +template +ALWAYS_INLINE_RELEASE static void ListMoveToFront(TList* list, TListNode* item_node) +{ + DebugAssert(list->head); + if (!item_node->prev) + return; + + item_node->prev->next = item_node->next; + if (item_node->next) + item_node->next->prev = item_node->prev; + else + list->tail = item_node->prev; + + item_node->prev = nullptr; + list->head->prev = item_node; + item_node->next = list->head; + list->head = item_node; +} + +template +ALWAYS_INLINE_RELEASE static void ListUnlink(const TListNode& node) +{ + if (node.prev) + node.prev->next = node.next; + else + node.list->head = node.next; + if (node.next) + node.next->prev = node.prev; + else + node.list->tail = node.prev; +} + +template +ALWAYS_INLINE_RELEASE static void ListIterate(const TList& list, const F& f) +{ + for (const GPUTextureCache::TListNode* n = list.head; n;) + { + const GPUTextureCache::TListNode* tn = n; + n = n->next; + f(tn->ref); + } +} + +template +ALWAYS_INLINE_RELEASE static bool ListIterateWithEarlyExit(const TList& list, const F& f) +{ + for (const GPUTextureCache::TListNode* n = list.head; n; n = n->next) + { + if (!f(n->ref)) + return false; + } + + return true; +} + +template +ALWAYS_INLINE_RELEASE static void LoopRectPages(u32 left, u32 top, u32 right, u32 bottom, const F& f) +{ + DebugAssert(right <= VRAM_WIDTH && bottom <= VRAM_HEIGHT); + DebugAssert((right - left) > 0 && (bottom - top) > 0); + + const u32 start_x = left / VRAM_PAGE_WIDTH; + const u32 end_x = (right - 1) / VRAM_PAGE_WIDTH; + const u32 start_y = top / VRAM_PAGE_HEIGHT; + const u32 end_y = (bottom - 1) / VRAM_PAGE_HEIGHT; + + u32 page_number = VRAMPageIndex(start_x, start_y); + for (u32 page_y = start_y; page_y <= end_y; page_y++) + { + u32 y_page_number = page_number; + + for (u32 page_x = start_x; page_x <= end_x; page_x++) + f(y_page_number++); + + page_number += VRAM_PAGES_WIDE; + } +} + +template +ALWAYS_INLINE_RELEASE static bool LoopRectPagesWithEarlyExit(u32 left, u32 top, u32 right, u32 bottom, const F& f) +{ + DebugAssert(right <= VRAM_WIDTH && bottom <= VRAM_HEIGHT); + DebugAssert((right - left) > 0 && (bottom - top) > 0); + + const u32 start_x = left / VRAM_PAGE_WIDTH; + const u32 end_x = (right - 1) / VRAM_PAGE_WIDTH; + const u32 start_y = top / VRAM_PAGE_HEIGHT; + const u32 end_y = (bottom - 1) / VRAM_PAGE_HEIGHT; + + u32 page_number = VRAMPageIndex(start_x, start_y); + for (u32 page_y = start_y; page_y <= end_y; page_y++) + { + u32 y_page_number = page_number; + + for (u32 page_x = start_x; page_x <= end_x; page_x++) + { + if (!f(y_page_number++)) + return false; + } + + page_number += VRAM_PAGES_WIDE; + } + + return true; +} + +template +ALWAYS_INLINE_RELEASE static void LoopRectPages(const GSVector4i& rc, const F& f) +{ + LoopRectPages(rc.left, rc.top, rc.right, rc.bottom, f); +} + +template +ALWAYS_INLINE_RELEASE static bool LoopRectPagesWithEarlyExit(const GSVector4i& rc, const F& f) +{ + return LoopRectPagesWithEarlyExit(rc.left, rc.top, rc.right, rc.bottom, f); +} + +template +ALWAYS_INLINE_RELEASE static void LoopXWrappedPages(u32 page, u32 num_pages, const F& f) +{ + for (u32 i = 0; i < num_pages; i++) + f((page & VRAM_PAGE_Y_MASK) | ((page + i) & VRAM_PAGE_X_MASK)); +} + +ALWAYS_INLINE static void DoStateVector(StateWrapper& sw, GSVector4i* vec) +{ + sw.DoBytes(vec->S32, sizeof(vec->S32)); +} + +ALWAYS_INLINE static float RectDistance(const GSVector4i& lhs, const GSVector4i& rhs) +{ + const GSVector4 flhs(lhs); + const GSVector4 frhs(rhs); + const GSVector2 clhs = flhs.xy() + ((flhs.zw() - flhs.xy()) * 0.5f); + const GSVector2 crhs = frhs.xy() + ((frhs.zw() - flhs.xy()) * 0.5f); + return clhs.dot(crhs); +} + +// TODO: Pack in struct + +static HashCache s_hash_cache; +static size_t s_hash_cache_memory_usage = 0; +static size_t s_max_hash_cache_memory_usage = 1ULL * 1024ULL * 1024ULL * 1024ULL; // 2GB + +static std::array s_pages = {}; +static VRAMWrite* s_last_vram_write = nullptr; + +/// List of candidates for purging when the hash cache gets too large. +static std::vector> s_hash_cache_purge_list; + +/// List of VRAM writes collected when saving state. +static std::vector s_temp_vram_write_list; + +static std::unique_ptr s_replacement_texture_render_target; +static std::unique_ptr s_replacement_init_pipeline; +static std::unique_ptr s_replacement_draw_pipeline; // copies alpha as-is +static std::unique_ptr s_replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent) + +static bool s_track_vram_writes = false; + +static std::string s_game_id; +static Settings::TextureReplacementSettings::Configuration s_config; + +// TODO: Check the size, purge some when it gets too large. +static TextureCache s_replacement_image_cache; + +static VRAMReplacementMap s_vram_replacements; + +// TODO: Combine these into one map? +static TextureReplacementMap s_vram_write_texture_replacements; +static TextureReplacementMap s_texture_page_texture_replacements; + +static std::unordered_set s_dumped_vram_writes; +static std::unordered_set s_dumped_textures; + +} // namespace GPUTextureCache + +bool GPUTextureCache::ShouldTrackVRAMWrites() +{ +#ifdef ALWAYS_TRACK_VRAM_WRITES + return true; +#else + return (IsDumpingVRAMWriteTextures() || + (g_settings.texture_replacements.enable_texture_replacements && HasVRAMWriteTextureReplacements())); +#endif +} + +bool GPUTextureCache::IsDumpingVRAMWriteTextures() +{ + return (g_settings.texture_replacements.dump_textures && !s_config.dump_texture_pages); +} + +bool GPUTextureCache::Initialize() +{ + UpdateVRAMTrackingState(); + if (!CompilePipelines()) + return false; + + return true; +} + +void GPUTextureCache::UpdateSettings(const Settings& old_settings) +{ + UpdateVRAMTrackingState(); + + if (g_settings.texture_replacements.enable_texture_replacements != + old_settings.texture_replacements.enable_texture_replacements) + { + Invalidate(); + + DestroyPipelines(); + if (!CompilePipelines()) + Panic("Failed to compile pipelines on TC settings change"); + } + + // Reload textures if configuration changes. + if (LoadLocalConfiguration(false, false)) + ReloadTextureReplacements(); +} + +bool GPUTextureCache::DoState(StateWrapper& sw, bool skip) +{ + if (sw.GetVersion() < 73) + { + if (!skip) + WARNING_LOG("Texture cache not in save state due to old version."); + + Invalidate(); + return true; + } + + if (!sw.DoMarker("GPUTextureCache")) + return false; + + if (sw.IsReading()) + { + if (!skip) + Invalidate(); + + u32 num_vram_writes = 0; + sw.Do(&num_vram_writes); + + const bool skip_writes = (skip || !s_track_vram_writes); + + for (u32 i = 0; i < num_vram_writes; i++) + { + static constexpr u32 PALETTE_RECORD_SIZE = sizeof(GSVector4i) + sizeof(SourceKey) + sizeof(PaletteRecordFlags) + + sizeof(HashType) + sizeof(u16) * MAX_CLUT_SIZE; + + if (skip_writes) + { + sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType)); + + u32 num_palette_records = 0; + sw.Do(&num_palette_records); + sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE); + } + else + { + VRAMWrite* vrw = new VRAMWrite(); + DoStateVector(sw, &vrw->active_rect); + DoStateVector(sw, &vrw->write_rect); + sw.Do(&vrw->hash); + + u32 num_palette_records = 0; + sw.Do(&num_palette_records); + + // Skip palette records if we're not dumping now. + if (g_settings.texture_replacements.dump_textures) + { + vrw->palette_records.reserve(num_palette_records); + for (u32 j = 0; j < num_palette_records; j++) + { + VRAMWrite::PaletteRecord& rec = vrw->palette_records.emplace_back(); + DoStateVector(sw, &rec.rect); + sw.DoBytes(&rec.key, sizeof(rec.key)); + sw.Do(&rec.flags); + sw.Do(&rec.palette_hash); + sw.DoBytes(rec.palette, sizeof(rec.palette)); + } + } + else + { + sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE); + } + + if (sw.HasError()) + { + delete vrw; + Invalidate(); + return false; + } + + vrw->num_page_refs = 0; + LoopRectPages(vrw->active_rect, [vrw](u32 pn) { + DebugAssert(vrw->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, vrw, &vrw->page_refs[vrw->num_page_refs++]); + return true; + }); + } + } + } + else + { + s_temp_vram_write_list.clear(); + + if (!skip && s_track_vram_writes) + { + for (PageEntry& page : s_pages) + { + ListIterate(page.writes, [](VRAMWrite* vrw) { + if (std::find(s_temp_vram_write_list.begin(), s_temp_vram_write_list.end(), vrw) != + s_temp_vram_write_list.end()) + { + return; + } + + // try not to lose data... pull it from the sources + if (g_settings.texture_replacements.dump_textures) + SyncVRAMWritePaletteRecords(vrw); + + s_temp_vram_write_list.push_back(vrw); + }); + } + } + + u32 num_vram_writes = static_cast(s_temp_vram_write_list.size()); + sw.Do(&num_vram_writes); + for (VRAMWrite* vrw : s_temp_vram_write_list) + { + DoStateVector(sw, &vrw->active_rect); + DoStateVector(sw, &vrw->write_rect); + sw.Do(&vrw->hash); + + u32 num_palette_records = static_cast(vrw->palette_records.size()); + sw.Do(&num_palette_records); + for (VRAMWrite::PaletteRecord& rec : vrw->palette_records) + { + DoStateVector(sw, &rec.rect); + sw.DoBytes(&rec.key, sizeof(rec.key)); + sw.Do(&rec.flags); + sw.Do(&rec.palette_hash); + sw.DoBytes(rec.palette, sizeof(rec.palette)); + } + } + } + + return !sw.HasError(); +} + +void GPUTextureCache::Shutdown() +{ + Invalidate(); + ClearHashCache(); + DestroyPipelines(); + s_replacement_texture_render_target.reset(); + s_hash_cache_purge_list = {}; + s_temp_vram_write_list = {}; + s_track_vram_writes = false; + + s_replacement_image_cache.clear(); + s_vram_replacements.clear(); + s_vram_write_texture_replacements.clear(); + s_texture_page_texture_replacements.clear(); + s_dumped_textures.clear(); + s_game_id = {}; +} + +bool GPUTextureCache::CompilePipelines() +{ + if (!g_settings.texture_replacements.enable_texture_replacements) + return true; + + GPUPipeline::GraphicsConfig plconfig = {}; + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.input_layout.vertex_attributes = {}; + plconfig.input_layout.vertex_stride = 0; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.geometry_shader = nullptr; + plconfig.SetTargetFormats(REPLACEMENT_TEXTURE_FORMAT); + + // Most flags don't matter here. + const GPUDevice::Features features = g_gpu_device->GetFeatures(); + GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), 1, 1, false, false, false, false, false, + features.dual_source_blend, features.framebuffer_fetch); + std::unique_ptr fullscreen_quad_vertex_shader = g_gpu_device->CreateShader( + GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateScreenQuadVertexShader()); + if (!fullscreen_quad_vertex_shader) + return false; + + plconfig.vertex_shader = fullscreen_quad_vertex_shader.get(); + + std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateCopyFragmentShader()); + if (!fs) + return false; + plconfig.fragment_shader = fs.get(); + if (!(s_replacement_init_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateReplacementMergeFragmentShader(false)); + if (!fs) + return false; + plconfig.fragment_shader = fs.get(); + if (!(s_replacement_draw_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateReplacementMergeFragmentShader(true)); + if (!fs) + return false; + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.fragment_shader = fs.get(); + if (!(s_replacement_semitransparent_draw_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + return true; +} + +void GPUTextureCache::DestroyPipelines() +{ + s_replacement_init_pipeline.reset(); + s_replacement_draw_pipeline.reset(); + s_replacement_semitransparent_draw_pipeline.reset(); +} + +void GPUTextureCache::AddDrawnRectangle(const GSVector4i rect, const GSVector4i clip_rect) +{ + // TODO: This might be a bit slow... + LoopRectPages(rect, [&rect, &clip_rect](u32 pn) { + PageEntry& page = s_pages[pn]; + + for (TListNode* n = page.writes.head; n;) + { + VRAMWrite* it = n->ref; + n = n->next; + if (it->active_rect.rintersects(rect)) + RemoveVRAMWrite(it); + } + + const GSVector4i rc = rect.rintersect(VRAMPageRect(pn)); + if (page.num_draw_rects > 0) + { + u32 candidate = page.num_draw_rects; + for (u32 i = 0; i < page.num_draw_rects; i++) + { + const GSVector4i page_draw_rect = page.draw_rects[i]; + if (page_draw_rect.rcontains(rc)) + { + // already contained + return; + } + else if (clip_rect.rintersects(page_draw_rect)) + { + // this one's probably for the draw rect, so use it + candidate = i; + } + } + if (candidate == NUM_PAGE_DRAW_RECTS) + { + // we're out of draw rects.. pick the one that's the closest, and hope for the best + GL_INS_FMT("Out of draw rects for page {}", pn); + candidate = 0; + float closest_dist = RectDistance(rc, page.draw_rects[0]); + for (u32 i = 1; i < NUM_PAGE_DRAW_RECTS; i++) + { + const float dist = RectDistance(rc, page.draw_rects[i]); + candidate = (dist < closest_dist) ? i : candidate; + closest_dist = (dist < closest_dist) ? dist : closest_dist; + } + } + + if (candidate != page.num_draw_rects) + { + const GSVector4i new_draw_rect = page.draw_rects[candidate].runion(rc); + page.draw_rects[candidate] = new_draw_rect; + InvalidatePageSources(pn, new_draw_rect); + } + else + { + DebugAssert(page.num_draw_rects < NUM_PAGE_DRAW_RECTS); + page.draw_rects[candidate] = rc; + page.num_draw_rects++; + InvalidatePageSources(pn, rc); + } + + page.total_draw_rect = page.total_draw_rect.runion(rc); + GL_INS_FMT("Page {} drawn rect is now {}", pn, page.total_draw_rect); + } + else + { + GL_INS_FMT("Page {} drawn rect is now {}", pn, rc); + page.total_draw_rect = rc; + page.draw_rects[0] = rc; + page.num_draw_rects = 1; + + // remove all sources, let them re-lookup if needed + InvalidatePageSources(pn, rc); + } + }); +} + +void GPUTextureCache::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool check_mask, + bool set_mask, const GSVector4i src_bounds, const GSVector4i dst_bounds) +{ + const bool convert_copies_to_writes = s_config.convert_copies_to_writes; + + // first dump out any overlapping writes with the old data + if (convert_copies_to_writes) + { + LoopRectPages(dst_bounds, [&dst_bounds](u32 pn) { + PageEntry& page = s_pages[pn]; + for (TListNode* n = page.writes.head; n; n = n->next) + { + VRAMWrite* it = n->ref; + if (it->active_rect.rintersects(dst_bounds)) + { + SyncVRAMWritePaletteRecords(it); + DumpTexturesFromVRAMWrite(it); + } + } + }); + } + + // copy and invalidate + GPU_SW_Rasterizer::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, check_mask, set_mask); + AddWrittenRectangle(dst_bounds, convert_copies_to_writes); +} + +void GPUTextureCache::WriteVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask, + const GSVector4i bounds) +{ + GPU_SW_Rasterizer::WriteVRAM(x, y, width, height, data, set_mask, check_mask); + + if (!s_track_vram_writes) + return; + + if (s_last_vram_write && TryMergeVRAMWrite(s_last_vram_write, bounds)) + return; + + VRAMWrite* it = new VRAMWrite(); + it->active_rect = bounds; + it->write_rect = bounds; + it->hash = HashRect(bounds); + it->num_page_refs = 0; + LoopRectPages(bounds, [it](u32 pn) { + DebugAssert(it->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, it, &it->page_refs[it->num_page_refs++]); + return true; + }); + + DEV_LOG("New VRAM write {:016X} at {} touching {} pages", it->hash, bounds, it->num_page_refs); + s_last_vram_write = it; +} + +void GPUTextureCache::AddWrittenRectangle(const GSVector4i rect, bool update_vram_writes) +{ + LoopRectPages(rect, [&rect, &update_vram_writes](u32 pn) { + PageEntry& page = s_pages[pn]; + InvalidatePageSources(pn, rect); + + if (page.num_draw_rects > 0) + { + const u32 prev_draw_rects = page.num_draw_rects; + for (u32 i = 0; i < page.num_draw_rects;) + { + const GSVector4i page_draw_rect = page.draw_rects[i]; + if (!page_draw_rect.rintersects(rect)) + { + i++; + continue; + } + + GL_INS_FMT("Clearing page {} draw rect {} due to write", pn, page_draw_rect); + page.num_draw_rects--; + if (page.num_draw_rects > 0) + { + // reorder it + const u32 remaining_rects = page.num_draw_rects - i; + if (remaining_rects > 0) + std::memmove(&page.draw_rects[i], &page.draw_rects[i + 1], sizeof(GSVector4i) * remaining_rects); + } + } + + if (page.num_draw_rects != prev_draw_rects) + { + if (page.num_draw_rects == 0) + { + page.total_draw_rect = INVALID_RECT; + GL_INS_FMT("Page {} no longer has any draw rects", pn); + } + else + { + GSVector4i new_total_draw_rect = page.draw_rects[0]; + for (u32 i = 1; i < page.num_draw_rects; i++) + new_total_draw_rect = new_total_draw_rect.runion(page.draw_rects[i]); + page.total_draw_rect = new_total_draw_rect; + GL_INS_FMT("Page {} total draw rect is now {}", pn, new_total_draw_rect); + } + } + } + + for (TListNode* n = page.writes.head; n;) + { + VRAMWrite* it = n->ref; + n = n->next; + + const GSVector4i intersection = it->active_rect.rintersect(rect); + if (!intersection.rempty()) + { + if (update_vram_writes && it->active_rect.rcontains(rect)) + { + const HashType new_hash = HashRect(it->write_rect); + DEV_LOG("New VRAM write hash {:016X} => {:016X}", it->hash, new_hash); + it->hash = new_hash; + } + else if (it->num_splits < s_config.max_vram_write_splits && !it->active_rect.eq(intersection)) + { + SplitVRAMWrite(it, intersection); + } + else + { + RemoveVRAMWrite(it); + } + } + } + }); +} + +[[maybe_unused]] ALWAYS_INLINE static TinyString SourceKeyToString(const GPUTextureCache::SourceKey& key) +{ + static constexpr const std::array texture_modes = { + {"Palette4Bit", "Palette8Bit", "Direct16Bit", "Reserved_Direct16Bit"}}; + + TinyString ret; + if (key.mode < GPUTextureMode::Direct16Bit) + { + ret.format("{} Page[{}] CLUT@[{},{}]", texture_modes[static_cast(key.mode)], key.page, key.palette.GetXBase(), + key.palette.GetYBase()); + } + else + { + ret.format("{} Page[{}]", texture_modes[static_cast(key.mode)], key.page); + } + return ret; +} + +[[maybe_unused]] ALWAYS_INLINE static TinyString SourceToString(const GPUTextureCache::Source* src) +{ + return SourceKeyToString(src->key); +} + +ALWAYS_INLINE_RELEASE static const u16* VRAMPagePointer(u32 pn) +{ + const u32 start_y = VRAMPageStartY(pn); + const u32 start_x = VRAMPageStartX(pn); + return &g_vram[start_y * VRAM_WIDTH + start_x]; +} + +ALWAYS_INLINE_RELEASE static const u16* VRAMPalettePointer(GPUTexturePaletteReg palette) +{ + return &g_vram[VRAM_WIDTH * palette.GetYBase() + palette.GetXBase()]; +} + +// TODO: Vectorize these with gather. +void GPUTextureCache::DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, + u32 dest_stride) +{ + if ((width % 4u) == 0) + { + const u32 vram_width = width / 4; + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + for (u32 x = 0; x < vram_width; x++) + { + const u32 pp = *(page_ptr++); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp & 0x0F]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[(pp >> 4) & 0x0F]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[(pp >> 8) & 0x0F]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp >> 12]); + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } + else + { + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + u32 offs = 0; + u16 texel = 0; + for (u32 x = 0; x < width; x++) + { + if (offs == 0) + texel = *(page_ptr++); + + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[texel & 0x0F]); + texel >>= 4; + + offs = (offs + 1) % 4; + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } +} +void GPUTextureCache::DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, + u32 dest_stride) +{ + if ((width % 2u) == 0) + { + const u32 vram_width = width / 2; + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + for (u32 x = 0; x < vram_width; x++) + { + const u32 pp = *(page_ptr++); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp & 0xFF]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp >> 8]); + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } + else + { + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + u32 offs = 0; + u16 texel = 0; + for (u32 x = 0; x < width; x++) + { + if (offs == 0) + texel = *(page_ptr++); + + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[texel & 0xFF]); + texel >>= 8; + + offs ^= 1; + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } +} + +void GPUTextureCache::DecodeTexture16(const u16* page, u32 width, u32 height, u32* dest, u32 dest_stride) +{ + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + for (u32 x = 0; x < width; x++) + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(*(page_ptr++)); + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } +} + +void GPUTextureCache::DecodeTexture(GPUTextureMode mode, const u16* page_ptr, const u16* palette, u32* dest, + u32 dest_stride, u32 width, u32 height) +{ + switch (mode) + { + case GPUTextureMode::Palette4Bit: + DecodeTexture4(page_ptr, palette, width, height, dest, dest_stride); + break; + case GPUTextureMode::Palette8Bit: + DecodeTexture8(page_ptr, palette, width, height, dest, dest_stride); + break; + case GPUTextureMode::Direct16Bit: + case GPUTextureMode::Reserved_Direct16Bit: + DecodeTexture16(page_ptr, width, height, dest, dest_stride); + break; + + DefaultCaseIsUnreachable() + } +} + +void GPUTextureCache::DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture) +{ + alignas(16) static u32 s_temp_buffer[TEXTURE_PAGE_WIDTH * TEXTURE_PAGE_HEIGHT]; + + u32* tex_map; + u32 tex_stride; + const bool mapped = + texture->Map(reinterpret_cast(&tex_map), &tex_stride, 0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT); + if (!mapped) + { + tex_map = s_temp_buffer; + tex_stride = sizeof(u32) * TEXTURE_PAGE_WIDTH; + } + + const u16* page_ptr = VRAMPagePointer(page); + const u16* palette_ptr = TextureModeHasPalette(mode) ? VRAMPalettePointer(palette) : nullptr; + DecodeTexture(mode, page_ptr, palette_ptr, tex_map, tex_stride, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT); + + if (mapped) + texture->Unmap(); + else + texture->Update(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, tex_map, tex_stride); +} + +const GPUTextureCache::Source* GPUTextureCache::LookupSource(SourceKey key, const GSVector4i rect, + PaletteRecordFlags flags) +{ + GL_SCOPE_FMT("TC: Lookup source {}", SourceKeyToString(key)); + + TList& list = s_pages[key.page].sources; + for (TListNode* n = list.head; n; n = n->next) + { + if (n->ref->key == key) + { + GL_INS("TC: Source hit"); + ListMoveToFront(&list, n); + return ReturnSource(n->ref, rect, flags); + } + } + + return ReturnSource(CreateSource(key), rect, flags); +} + +const GPUTextureCache::Source* GPUTextureCache::ReturnSource(Source* source, const GSVector4i uv_rect, + PaletteRecordFlags flags) +{ +#ifdef _DEBUG + // GL_INS_FMT("Tex hash: {:016X}", source->texture_hash); + // GL_INS_FMT("Palette hash: {:016X}", source->palette_hash); + if (!uv_rect.eq(INVALID_RECT)) + { + LoopXWrappedPages(source->key.page, TexturePageCountForMode(source->key.mode), [&uv_rect](u32 pn) { + const PageEntry& pe = s_pages[pn]; + ListIterate(pe.writes, [&uv_rect](const VRAMWrite* vrw) { + if (const GSVector4i intersection = uv_rect.rintersect(vrw->write_rect); !intersection.rempty()) + GL_INS_FMT("TC: VRAM write was {:016X} ({})", vrw->hash, intersection); + }); + }); + if (TextureModeHasPalette(source->key.mode)) + GL_INS_FMT("TC: Palette was {:016X}", source->palette_hash); + } +#endif + + DebugAssert(source->from_hash_cache); + source->from_hash_cache->last_used_frame = System::GetFrameNumber(); + + // TODO: Cache var. + if (g_settings.texture_replacements.dump_textures) + { + source->active_uv_rect = source->active_uv_rect.runion(uv_rect); + source->palette_record_flags |= flags; + } + + return source; +} + +bool GPUTextureCache::IsPageDrawn(u32 page_index) +{ + return (s_pages[page_index].num_draw_rects > 0); +} + +bool GPUTextureCache::IsPageDrawn(u32 page_index, const GSVector4i rect) +{ + const PageEntry& page = s_pages[page_index]; + if (page.num_draw_rects == 0 || !page.total_draw_rect.rintersects(rect)) + return false; + + // if there's only a single draw rect, it'll match the total + if (page.num_draw_rects == 1) + return true; + + for (u32 i = 0; i < page.num_draw_rects; i++) + { + if (page.draw_rects[i].rintersects(rect)) + return true; + } + + return false; +} + +bool GPUTextureCache::IsRectDrawn(const GSVector4i rect) +{ + // TODO: This is potentially hot, so replace it with an explicit loop over the pages instead. + return !LoopRectPagesWithEarlyExit(rect, [&rect](u32 pn) { return !IsPageDrawn(pn, rect); }); +} + +bool GPUTextureCache::AreSourcePagesDrawn(SourceKey key, const GSVector4i rect) +{ + // NOTE: This doesn't handle VRAM wrapping. But neither does the caller. YOLO? +#ifdef _DEBUG + { + for (u32 offset = 0; offset < TexturePageCountForMode(key.mode); offset++) + { + const u32 wrapped_page = ((key.page + offset) & VRAM_PAGE_X_MASK) + (key.page & VRAM_PAGE_Y_MASK); + if (IsPageDrawn(wrapped_page, rect)) + { + GL_INS_FMT("UV rect {} intersects page [{}] dirty rect {}, disabling TC", rect, wrapped_page, + s_pages[wrapped_page].total_draw_rect); + } + } + } +#endif + + switch (key.mode) + { + case GPUTextureMode::Palette4Bit: + { + return IsPageDrawn(key.page, rect); + } + + case GPUTextureMode::Palette8Bit: + { + // 2 P4 pages per P8 page. + const u32 yoffs = (key.page & VRAM_PAGE_Y_MASK); + return (IsPageDrawn(key.page, rect) || IsPageDrawn(((key.page + 1) & VRAM_PAGE_X_MASK) + yoffs, rect)); + } + + case GPUTextureMode::Direct16Bit: + case GPUTextureMode::Reserved_Direct16Bit: + { + // 4 P4 pages per C16 page. + const u32 yoffs = (key.page & VRAM_PAGE_Y_MASK); + return (IsPageDrawn(key.page, rect) || IsPageDrawn(((key.page + 1) & VRAM_PAGE_X_MASK) + yoffs, rect) || + IsPageDrawn(((key.page + 2) & VRAM_PAGE_X_MASK) + yoffs, rect) || + IsPageDrawn(((key.page + 3) & VRAM_PAGE_X_MASK) + yoffs, rect)); + } + + DefaultCaseIsUnreachable() + } +} + +void GPUTextureCache::Invalidate() +{ + for (u32 i = 0; i < NUM_VRAM_PAGES; i++) + { + InvalidatePageSources(i); + + PageEntry& page = s_pages[i]; + page.num_draw_rects = 0; + page.total_draw_rect = GSVector4i::zero(); + std::memset(page.draw_rects.data(), 0, sizeof(page.draw_rects)); + + while (page.writes.tail) + RemoveVRAMWrite(page.writes.tail->ref); + } + + // should all be null +#ifdef _DEBUG + for (u32 i = 0; i < NUM_VRAM_PAGES; i++) + DebugAssert(!s_pages[i].sources.head && !s_pages[i].sources.tail); + DebugAssert(!s_last_vram_write); +#endif + + ClearHashCache(); +} + +void GPUTextureCache::InvalidatePageSources(u32 pn) +{ + DebugAssert(pn < NUM_VRAM_PAGES); + + TList& ps = s_pages[pn].sources; + if (ps.head) + GL_INS_FMT("Invalidate page {} sources", pn); + + for (TListNode* n = ps.head; n;) + { + Source* src = n->ref; + n = n->next; + + DestroySource(src); + } + + DebugAssert(!ps.head && !ps.tail); +} + +void GPUTextureCache::InvalidatePageSources(u32 pn, const GSVector4i rc) +{ + DebugAssert(pn < NUM_VRAM_PAGES); + + TList& ps = s_pages[pn].sources; + for (TListNode* n = ps.head; n;) + { + Source* src = n->ref; + n = n->next; + + // TODO: Make faster? + if (!src->texture_rect.rintersects(rc) && + (src->key.mode == GPUTextureMode::Direct16Bit || !src->palette_rect.rintersects(rc))) + { + continue; + } + + GL_INS_FMT("Invalidate source {} in page {} due to overlapping with {}", SourceToString(src), pn, rc); + DestroySource(src); + } +} + +void GPUTextureCache::DestroySource(Source* src) +{ + GL_INS_FMT("Invalidate source {}", SourceToString(src)); + + if (g_settings.texture_replacements.dump_textures && !src->active_uv_rect.eq(INVALID_RECT)) + { + if (!s_config.dump_texture_pages) + { + // Find VRAM writes that overlap with this source + LoopRectPages(src->active_uv_rect, [src](const u32 pn) { + PageEntry& pg = s_pages[pn]; + ListIterate(pg.writes, [src](VRAMWrite* vw) { + UpdateVRAMWriteSources(vw, src->key, src->active_uv_rect, src->palette_record_flags); + }); + return true; + }); + } + else + { + DumpTextureFromPage(src); + } + } + + for (u32 i = 0; i < src->num_page_refs; i++) + ListUnlink(src->page_refs[i]); + + DebugAssert(src->from_hash_cache && src->from_hash_cache->ref_count > 0); + ListUnlink(src->hash_cache_ref); + src->from_hash_cache->ref_count--; + delete src; +} + +GPUTextureCache::Source* GPUTextureCache::CreateSource(SourceKey key) +{ + GL_INS_FMT("TC: Create source {}", SourceKeyToString(key)); + + const HashType tex_hash = HashPage(key.page, key.mode); + const HashType pal_hash = (key.mode < GPUTextureMode::Direct16Bit) ? HashPalette(key.palette, key.mode) : 0; + HashCacheEntry* hcentry = LookupHashCache(key, tex_hash, pal_hash); + if (!hcentry) + { + GL_INS("TC: Hash cache lookup fail?!"); + return nullptr; + } + + hcentry->ref_count++; + + Source* src = new Source(); + src->key = key; + src->num_page_refs = 0; + src->texture = hcentry->texture.get(); + src->from_hash_cache = hcentry; + ListAppend(&hcentry->sources, src, &src->hash_cache_ref); + src->texture_hash = tex_hash; + src->palette_hash = pal_hash; + + // Textures at front, CLUTs at back. + std::array page_refns; + const auto add_page_ref = [src, &page_refns](u32 pn) { + // Don't double up references + for (u32 i = 0; i < src->num_page_refs; i++) + { + if (page_refns[i] == pn) + return; + } + + const u32 ri = src->num_page_refs++; + page_refns[ri] = pn; + + ListPrepend(&s_pages[pn].sources, src, &src->page_refs[ri]); + }; + const auto add_page_ref_back = [src, &page_refns](u32 pn) { + // Don't double up references + for (u32 i = 0; i < src->num_page_refs; i++) + { + if (page_refns[i] == pn) + return; + } + + const u32 ri = src->num_page_refs++; + page_refns[ri] = pn; + + ListAppend(&s_pages[pn].sources, src, &src->page_refs[ri]); + }; + + src->texture_rect = GetTextureRect(key.page, key.mode); + src->active_uv_rect = INVALID_RECT; + LoopXWrappedPages(key.page, TexturePageCountForMode(key.mode), add_page_ref); + + if (key.mode < GPUTextureMode::Direct16Bit) + { + src->palette_rect = GetPaletteRect(key.palette, key.mode, true); + LoopXWrappedPages(PalettePageNumber(key.palette), PalettePageCountForMode(key.mode), add_page_ref_back); + } + + GL_INS_FMT("Appended new source {} to {} pages", SourceToString(src), src->num_page_refs); + return src; +} + +void GPUTextureCache::UpdateVRAMTrackingState() +{ + s_track_vram_writes = ShouldTrackVRAMWrites(); +} + +std::pair GPUTextureCache::ReducePaletteBounds(const GSVector4i rect, GPUTextureMode mode, + GPUTexturePaletteReg palette) +{ + DebugAssert(TextureModeHasPalette(mode)); + u32 pal_min = GetPaletteWidth(mode) - 1; + u32 pal_max = 0; + + const u32 rect_width = rect.width(); + const u32 rect_height = rect.height(); + + if (mode == GPUTextureMode::Palette4Bit) + { + for (u32 y = 0; y < rect_height; y++) + { + const u16* ptr = &g_vram[rect.y * VRAM_WIDTH + rect.x]; + for (u32 x = 0; x < rect_width; x++) + { + const u16 val = *(ptr++); + const u32 p0 = val & 0xf; + const u32 p1 = (val >> 4) & 0xf; + const u32 p2 = (val >> 8) & 0xf; + const u32 p3 = (val >> 12) & 0xf; + pal_min = std::min(pal_min, std::min(p0, std::min(p1, std::min(p2, p3)))); + pal_max = std::max(pal_max, std::max(p0, std::max(p1, std::max(p2, p3)))); + } + } + } + else // if (mode == GPUTextureMode::Palette8Bit) + { + const u32 aligned_width = Common::AlignDownPow2(rect_width, 8); + const u16* row_ptr = &g_vram[rect.y * VRAM_WIDTH + rect.x]; + for (u32 y = 0; y < rect_height; y++) + { + const u16* ptr = reinterpret_cast(row_ptr); + row_ptr += VRAM_WIDTH; + + if (aligned_width > 0) [[likely]] + { + GSVector4i min = GSVector4i::load(ptr); + GSVector4i max = min; + ptr += 8; + + for (u32 x = 8; x < aligned_width; x += 8) + { + const GSVector4i v = GSVector4i::load(ptr); + ptr += 8; + + min = min.min_u8(v); + max = max.max_u8(v); + } + + pal_min = std::min(pal_min, min.minv_u8()); + pal_max = std::max(pal_max, max.maxv_u8()); + } + + for (u32 x = aligned_width; x < rect_width; x++) + { + const u16 val = *(ptr++); + const u32 p0 = (val & 0xFF); + const u32 p1 = (val >> 8); + pal_min = std::min(pal_min, std::min(p0, p1)); + pal_max = std::max(pal_max, std::max(p0, p1)); + } + } + } + + // Clamp to VRAM bounds. + const u32 x_base = palette.GetXBase(); + if ((x_base + pal_max) >= VRAM_WIDTH) [[unlikely]] + { + WARNING_LOG("Texture with CLUT at {},{} is outside of VRAM bounds, clamping.", x_base, palette.GetYBase()); + pal_min = std::min(pal_min, VRAM_WIDTH - x_base - 1); + pal_max = std::min(pal_max, VRAM_WIDTH - x_base - 1); + } + + return std::make_pair(pal_min, pal_max); +} + +void GPUTextureCache::SyncVRAMWritePaletteRecords(VRAMWrite* entry) +{ + // Have to go through any sources that intersect this write, because they may not have been invalidated yet, in which + // case the active rect also will not have been updated. + if (IsDumpingVRAMWriteTextures()) + { + LoopRectPages(entry->active_rect, [entry](const u32 pn) { + const PageEntry& page = s_pages[pn]; + ListIterate(page.sources, [entry](const Source* src) { + if (!src->active_uv_rect.eq(INVALID_RECT)) + UpdateVRAMWriteSources(entry, src->key, src->active_uv_rect, src->palette_record_flags); + }); + + return true; + }); + } +} + +void GPUTextureCache::UpdateVRAMWriteSources(VRAMWrite* entry, SourceKey source_key, const GSVector4i global_uv_rect, + PaletteRecordFlags flags) +{ + // convert to VRAM write space + const GSVector4i write_intersection = entry->active_rect.rintersect(global_uv_rect); + if (write_intersection.rempty()) + return; + + // Add to the palette tracking list + auto iter = std::find_if(entry->palette_records.begin(), entry->palette_records.end(), + [&source_key](const auto& it) { return (it.key == source_key); }); + if (iter != entry->palette_records.end()) + { + iter->rect = iter->rect.runion(write_intersection); + iter->flags |= flags; + } + else + { + InitializeVRAMWritePaletteRecord(&entry->palette_records.emplace_back(), source_key, write_intersection, flags); + } +} + +void GPUTextureCache::SplitVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect) +{ + SyncVRAMWritePaletteRecords(entry); + + const s32 to_left = (written_rect.left - entry->active_rect.left); + const s32 to_right = (entry->active_rect.right - written_rect.right); + const s32 to_top = (written_rect.top - entry->active_rect.top); + const s32 to_bottom = (entry->active_rect.bottom - written_rect.bottom); + DebugAssert(to_left > 0 || to_right > 0 || to_top > 0 || to_bottom > 0); + + entry->num_splits++; + + GSVector4i rects[4]; + + // TODO: more efficient vector swizzle + if (std::max(to_top, to_bottom) > std::max(to_left, to_right)) + { + // split top/bottom, then left/right + rects[0] = GSVector4i(entry->active_rect.left, entry->active_rect.top, entry->active_rect.right, written_rect.top); + rects[1] = + GSVector4i(entry->active_rect.left, written_rect.bottom, entry->active_rect.right, entry->active_rect.bottom); + rects[2] = GSVector4i(entry->active_rect.left, entry->active_rect.top + to_top, entry->active_rect.left + to_left, + entry->active_rect.bottom - to_bottom); + rects[3] = GSVector4i(entry->active_rect.right - to_right, entry->active_rect.top + to_top, + entry->active_rect.right, entry->active_rect.bottom - to_bottom); + } + else + { + // split left/right, then top/bottom + rects[0] = + GSVector4i(entry->active_rect.left, entry->active_rect.top, written_rect.left, entry->active_rect.bottom); + rects[1] = + GSVector4i(written_rect.right, entry->active_rect.top, entry->active_rect.right, entry->active_rect.bottom); + rects[2] = GSVector4i(entry->active_rect.left + to_left, entry->active_rect.top + to_top, + written_rect.right - to_right, entry->active_rect.top - to_top); + rects[3] = GSVector4i(entry->active_rect.left + to_left, entry->active_rect.bottom - to_bottom, + written_rect.right - to_right, entry->active_rect.bottom); + } + + for (size_t i = 0; i < std::size(rects); i++) + { + const GSVector4i splitr = rects[i]; + if (splitr.rempty()) + continue; + + VRAMWrite* it = new VRAMWrite(); + it->write_rect = entry->write_rect; + it->active_rect = splitr; + it->hash = entry->hash; + it->num_splits = entry->num_splits; + it->num_page_refs = 0; + + // TODO: We probably want to share this... + it->palette_records.reserve(entry->palette_records.size()); + for (const VRAMWrite::PaletteRecord& prec : it->palette_records) + { + if (prec.rect.rintersects(splitr)) + it->palette_records.push_back(prec); + } + + LoopRectPages(splitr, [it](u32 pn) { + DebugAssert(it->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, it, &it->page_refs[it->num_page_refs++]); + return true; + }); + + DEV_LOG("Split VRAM write {:016X} at {} in direction {} => {}", it->hash, entry->active_rect, i, splitr); + } + + for (u32 i = 0; i < entry->num_page_refs; i++) + ListUnlink(entry->page_refs[i]); + + delete entry; +} + +bool GPUTextureCache::TryMergeVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect) +{ + // It shouldn't have been split. Don't want to update after it has been. + if (s_last_vram_write->num_splits != 0) + return false; + + // Check coalesce bounds/config. + const u32 coalesce_width = s_config.max_vram_write_coalesce_width; + const u32 coalesce_height = s_config.max_vram_write_coalesce_height; + const bool merge_vertical = (static_cast(written_rect.height()) <= coalesce_height && + s_last_vram_write->write_rect.left == written_rect.left && + s_last_vram_write->write_rect.right == written_rect.right && + s_last_vram_write->write_rect.bottom == written_rect.top); + const bool merge_horizontal = (static_cast(written_rect.width()) <= coalesce_width && + s_last_vram_write->write_rect.top == written_rect.top && + s_last_vram_write->write_rect.bottom == written_rect.bottom && + s_last_vram_write->write_rect.right == written_rect.left); + if (!merge_vertical && !merge_horizontal) + return false; + + // Double-check that nothing has used this write as a source yet (i.e. drawn). + // Don't want to merge textures that are already completely uploaded... + if (!LoopRectPagesWithEarlyExit(entry->active_rect, [entry](const u32 pn) { + return ListIterateWithEarlyExit(s_pages[pn].sources, [entry](const Source* src) { + return (!src->active_uv_rect.eq(INVALID_RECT) || !src->active_uv_rect.rintersects(entry->active_rect)); + }); + })) + { + return false; + } + + // Remove from old pages, we'll re-add it. + for (u32 i = 0; i < entry->num_page_refs; i++) + ListUnlink(entry->page_refs[i]); + entry->num_page_refs = 0; + + // Expand the write. + const GSVector4i new_rect = entry->write_rect.runion(written_rect); + DEV_LOG("Expanding VRAM write {:016X} from {} to {}", entry->hash, entry->write_rect, new_rect); + entry->active_rect = new_rect; + entry->write_rect = new_rect; + entry->hash = HashRect(new_rect); + + // Re-add to pages. + LoopRectPages(new_rect, [entry](u32 pn) { + DebugAssert(entry->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, entry, &entry->page_refs[entry->num_page_refs++]); + return true; + }); + + return true; +} + +void GPUTextureCache::RemoveVRAMWrite(VRAMWrite* entry) +{ + DEV_LOG("Remove VRAM write {:016X} at {}", entry->hash, entry->write_rect); + + SyncVRAMWritePaletteRecords(entry); + + if (entry->num_splits > 0 && !entry->palette_records.empty()) + { + // Combine palette records with another write. + VRAMWrite* other_write = nullptr; + LoopRectPagesWithEarlyExit(entry->write_rect, [&entry, &other_write](u32 pn) { + PageEntry& pg = s_pages[pn]; + ListIterateWithEarlyExit(pg.writes, [&entry, &other_write](VRAMWrite* cur) { + if (cur->hash != entry->hash) + return true; + + other_write = cur; + return false; + }); + return (other_write == nullptr); + }); + if (other_write) + { + for (const VRAMWrite::PaletteRecord& prec : entry->palette_records) + { + const auto iter = std::find_if(other_write->palette_records.begin(), other_write->palette_records.end(), + [&prec](const VRAMWrite::PaletteRecord& it) { return it.key == prec.key; }); + if (iter != other_write->palette_records.end()) + iter->rect = iter->rect.runion(prec.rect); + else + other_write->palette_records.push_back(prec); + } + + // No dumping from here! + entry->palette_records.clear(); + } + } + + for (u32 i = 0; i < entry->num_page_refs; i++) + ListUnlink(entry->page_refs[i]); + + DumpTexturesFromVRAMWrite(entry); + + s_last_vram_write = (s_last_vram_write == entry) ? nullptr : s_last_vram_write; + delete entry; +} + +void GPUTextureCache::DumpTexturesFromVRAMWrite(VRAMWrite* entry) +{ + if (g_settings.texture_replacements.dump_textures && !s_config.dump_texture_pages) + { + for (const VRAMWrite::PaletteRecord& prec : entry->palette_records) + { + if (prec.key.mode == GPUTextureMode::Direct16Bit && !s_config.dump_c16_textures) + continue; + + HashType pal_hash = + (prec.key.mode < GPUTextureMode::Direct16Bit) ? HashPalette(prec.key.palette, prec.key.mode) : 0; + + // If it's 8-bit, try reducing the range of the palette. + u32 pal_min = 0, pal_max = prec.key.HasPalette() ? (GetPaletteWidth(prec.key.mode) - 1) : 0; + if (prec.key.HasPalette() && s_config.reduce_palette_range) + { + std::tie(pal_min, pal_max) = ReducePaletteBounds(prec.rect, prec.key.mode, prec.key.palette); + pal_hash = HashPartialPalette(prec.palette, pal_min, pal_max); + } + + const u32 offset_x = ApplyTextureModeShift(prec.key.mode, prec.rect.left - entry->write_rect.left); + const u32 offset_y = prec.rect.top - entry->write_rect.top; + + DumpTexture(TextureReplacementType::TextureFromVRAMWrite, offset_x, offset_y, entry->write_rect.width(), + entry->write_rect.height(), prec.key.mode, entry->hash, pal_hash, pal_min, pal_max, prec.palette, + prec.rect, prec.flags); + } + } +} + +void GPUTextureCache::DumpTextureFromPage(const Source* src) +{ + // C16 filter + if (!s_config.dump_c16_textures && src->key.mode >= GPUTextureMode::Direct16Bit) + return; + + const bool dump_full_page = s_config.dump_full_texture_pages; + + // Dump active area from page + HashType pal_hash = src->palette_hash; + const u16* pal_ptr = src->key.HasPalette() ? VRAMPalettePointer(src->key.palette) : nullptr; + + // We don't want to dump the wraparound + const GSVector4i unwrapped_texture_rect = + (TexturePageIsWrapping(src->key.mode, src->key.page) ? + GSVector4i(VRAMPageStartX(src->key.page), src->texture_rect.y, VRAM_WIDTH, src->texture_rect.w) : + src->texture_rect); + const GSVector4i dump_rect = + dump_full_page ? unwrapped_texture_rect : src->active_uv_rect.rintersect(unwrapped_texture_rect); + if (dump_rect.rempty()) + return; + + // Need to hash only the active area. + const HashType tex_hash = HashRect(dump_rect); + + // Source rect needs the offset, but we still only want to hash the active area when replacing + const GSVector4i dump_offset_in_page = dump_rect.sub32(unwrapped_texture_rect); + + // If it's 8-bit, try reducing the range of the palette. + u32 pal_min = 0, pal_max = src->key.HasPalette() ? (GetPaletteWidth(src->key.mode) - 1) : 0; + if (src->key.HasPalette() && s_config.reduce_palette_range) + { + std::tie(pal_min, pal_max) = ReducePaletteBounds(dump_rect, src->key.mode, src->key.palette); + pal_hash = HashPartialPalette(pal_ptr, pal_min, pal_max); + } + + DumpTexture(TextureReplacementType::TextureFromPage, ApplyTextureModeShift(src->key.mode, dump_offset_in_page.x), + dump_offset_in_page.y, unwrapped_texture_rect.width(), unwrapped_texture_rect.height(), src->key.mode, + tex_hash, pal_hash, pal_min, pal_max, pal_ptr, dump_rect, src->palette_record_flags); +} + +GPUTextureCache::HashType GPUTextureCache::HashPage(u8 page, GPUTextureMode mode) +{ + XXH3_state_t state; + XXH3_64bits_reset(&state); + + // Pages aren't contiguous in memory :( + const u16* page_ptr = VRAMPagePointer(page); + + switch (mode) + { + case GPUTextureMode::Palette4Bit: + { + for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++) + { + XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * sizeof(u16)); + page_ptr += VRAM_WIDTH; + } + } + break; + + case GPUTextureMode::Palette8Bit: + { + for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++) + { + XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * 2 * sizeof(u16)); + page_ptr += VRAM_WIDTH; + } + } + break; + + case GPUTextureMode::Direct16Bit: + { + for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++) + { + XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * 4 * sizeof(u16)); + page_ptr += VRAM_WIDTH; + } + } + break; + + DefaultCaseIsUnreachable() + } + + return XXH3_64bits_digest(&state); +} + +GPUTextureCache::HashType GPUTextureCache::HashPalette(GPUTexturePaletteReg palette, GPUTextureMode mode) +{ + const u32 x_base = palette.GetXBase(); + const u16* base = VRAMPalettePointer(palette); + + switch (mode) + { + case GPUTextureMode::Palette4Bit: + return XXH3_64bits(base, sizeof(u16) * 16); + + case GPUTextureMode::Palette8Bit: + { + // If the palette wraps around, chances are we aren't using those indices. + // Games that do this: Metal Gear Solid. + if ((x_base + 256) > VRAM_WIDTH) [[unlikely]] + return XXH3_64bits(base, sizeof(u16) * (VRAM_WIDTH - x_base)); + else + return XXH3_64bits(base, sizeof(u16) * 256); + } + + DefaultCaseIsUnreachable() + } +} + +GPUTextureCache::HashType GPUTextureCache::HashPartialPalette(GPUTexturePaletteReg palette, GPUTextureMode mode, + u32 min, u32 max) +{ + DebugAssert((palette.GetXBase() + max + 1) <= VRAM_WIDTH); + return HashPartialPalette(VRAMPalettePointer(palette), min, max); +} + +GPUTextureCache::HashType GPUTextureCache::HashPartialPalette(const u16* palette, u32 min, u32 max) +{ + const u32 size = max - min + 1; + return XXH3_64bits(palette, sizeof(u16) * size); +} + +GPUTextureCache::HashType GPUTextureCache::HashRect(const GSVector4i rc) +{ + XXH3_state_t state; + XXH3_64bits_reset(&state); + + const u32 width = rc.width(); + const u32 height = rc.height(); + const u16* ptr = &g_vram[rc.top * VRAM_WIDTH + rc.left]; + for (u32 y = 0; y < height; y++) + { + XXH3_64bits_update(&state, ptr, width * sizeof(u16)); + ptr += VRAM_WIDTH; + } + + return XXH3_64bits_digest(&state); +} + +void GPUTextureCache::InitializeVRAMWritePaletteRecord(VRAMWrite::PaletteRecord* record, SourceKey source_key, + const GSVector4i rect, PaletteRecordFlags flags) +{ + record->rect = rect; + record->key = source_key; + record->flags = flags; + + switch (source_key.mode) + { + case GPUTextureMode::Palette4Bit: + { + // Always has 16 colours. + std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), 16 * sizeof(u16)); + record->palette_hash = XXH3_64bits(record->palette, 16 * sizeof(u16)); + } + break; + + case GPUTextureMode::Palette8Bit: + { + // Might have less if we're extending over the edge. Clamp it. + const u32 pal_width = std::min(256, VRAM_WIDTH - source_key.palette.GetXBase()); + if (pal_width != 256) + { + std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), pal_width * sizeof(u16)); + std::memset(&record->palette[pal_width], 0, sizeof(record->palette) - (pal_width * sizeof(u16))); + record->palette_hash = XXH3_64bits(record->palette, pal_width * sizeof(u16)); + } + else + { + // Whole thing, 2ez. + std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), 256 * sizeof(u16)); + record->palette_hash = XXH3_64bits(record->palette, 256 * sizeof(u16)); + } + } + break; + + case GPUTextureMode::Direct16Bit: + { + // No palette. + std::memset(record->palette, 0, sizeof(record->palette)); + record->palette_hash = 0; + } + break; + + DefaultCaseIsUnreachable() + } +} + +GPUTextureCache::HashCacheEntry* GPUTextureCache::LookupHashCache(SourceKey key, HashType tex_hash, HashType pal_hash) +{ + const HashCacheKey hkey = {tex_hash, pal_hash, static_cast(key.mode)}; + + const auto it = s_hash_cache.find(hkey); + if (it != s_hash_cache.end()) + { + GL_INS_FMT("TC: Hash cache hit {:X} {:X}", hkey.texture_hash, hkey.palette_hash); + return &it->second; + } + + GL_INS_FMT("TC: Hash cache miss {:X} {:X}", hkey.texture_hash, hkey.palette_hash); + + HashCacheEntry entry; + entry.ref_count = 0; + entry.last_used_frame = 0; + entry.sources = {}; + entry.texture = g_gpu_device->FetchTexture(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, 1, 1, 1, + GPUTexture::Type::Texture, GPUTexture::Format::RGBA8); + if (!entry.texture) + { + ERROR_LOG("Failed to create texture."); + return nullptr; + } + + DecodeTexture(key.page, key.palette, key.mode, entry.texture.get()); + + if (g_settings.texture_replacements.enable_texture_replacements) + ApplyTextureReplacements(key, tex_hash, pal_hash, &entry); + + s_hash_cache_memory_usage += entry.texture->GetVRAMUsage(); + + return &s_hash_cache.emplace(hkey, std::move(entry)).first->second; +} + +void GPUTextureCache::RemoveFromHashCache(HashCache::iterator it) +{ + ListIterate(it->second.sources, [](Source* source) { DestroySource(source); }); + + const size_t vram_usage = it->second.texture->GetVRAMUsage(); + DebugAssert(s_hash_cache_memory_usage >= vram_usage); + s_hash_cache_memory_usage -= vram_usage; + + g_gpu_device->RecycleTexture(std::move(it->second.texture)); + s_hash_cache.erase(it); +} + +void GPUTextureCache::ClearHashCache() +{ + while (!s_hash_cache.empty()) + RemoveFromHashCache(s_hash_cache.begin()); +} + +void GPUTextureCache::Compact() +{ + // Number of frames before unused hash cache entries are evicted. + static constexpr u32 MAX_HASH_CACHE_AGE = 600; + + // Maximum number of textures which are permitted in the hash cache at the end of the frame. + static constexpr u32 MAX_HASH_CACHE_SIZE = 500; + + bool might_need_cache_purge = + (s_hash_cache.size() > MAX_HASH_CACHE_SIZE || s_hash_cache_memory_usage >= s_max_hash_cache_memory_usage); + if (might_need_cache_purge) + s_hash_cache_purge_list.clear(); + + const u32 frame_number = System::GetFrameNumber(); + const u32 min_frame_number = ((frame_number > MAX_HASH_CACHE_AGE) ? (frame_number - MAX_HASH_CACHE_AGE) : 0); + + for (auto it = s_hash_cache.begin(); it != s_hash_cache.end();) + { + HashCacheEntry& e = it->second; + if (e.ref_count == 0 && e.last_used_frame < min_frame_number) + { + RemoveFromHashCache(it++); + continue; + } + + // We might free up enough just with "normal" removals above. + if (might_need_cache_purge) + { + might_need_cache_purge = + (s_hash_cache.size() > MAX_HASH_CACHE_SIZE || s_hash_cache_memory_usage >= s_max_hash_cache_memory_usage); + if (might_need_cache_purge) + s_hash_cache_purge_list.emplace_back(it, static_cast(e.last_used_frame)); + } + + ++it; + } + + // Pushing to a list, sorting, and removing ends up faster than re-iterating the map. + if (might_need_cache_purge) + { + std::sort(s_hash_cache_purge_list.begin(), s_hash_cache_purge_list.end(), + [](const auto& lhs, const auto& rhs) { return lhs.second < rhs.second; }); + + size_t purge_index = 0; + while (s_hash_cache.size() > MAX_HASH_CACHE_SIZE || s_hash_cache_memory_usage >= s_max_hash_cache_memory_usage) + { + if (purge_index == s_hash_cache_purge_list.size()) + { + WARNING_LOG("Cannot find hash cache entries to purge, current hash cache size is {} MB in {} textures.", + static_cast(s_hash_cache_memory_usage) / 1048576.0, s_hash_cache.size()); + break; + } + + RemoveFromHashCache(s_hash_cache_purge_list[purge_index++].first); + } + } +} + +size_t GPUTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& k) const +{ + std::size_t h = 0; + hash_combine(h, k.texture_hash, k.palette_hash, k.mode); + return h; +} + +TinyString GPUTextureCache::VRAMReplacementName::ToString() const +{ + return TinyString::from_format("{:08X}{:08X}", high, low); +} + +bool GPUTextureCache::VRAMReplacementName::Parse(const std::string_view file_title) +{ + if (file_title.length() != 43) + return false; + + const std::optional high_value = StringUtil::FromChars(file_title.substr(11, 16), 16); + const std::optional low_value = StringUtil::FromChars(file_title.substr(11 + 16), 16); + if (!high_value.has_value() || !low_value.has_value()) + return false; + + low = low_value.value(); + high = high_value.value(); + return true; +} + +size_t GPUTextureCache::VRAMReplacementNameHash::operator()(const VRAMReplacementName& name) const +{ + size_t seed = std::hash{}(name.low); + hash_combine(seed, name.high); + return seed; +} + +static constexpr const char* s_texture_replacement_mode_names[] = {"P4", "P8", "C16", "C16", + "STP4", "STP8", "STC16", "STC16"}; + +TinyString GPUTextureCache::TextureReplacementName::ToString() const +{ + const char* type_str = (type == TextureReplacementType::TextureFromVRAMWrite) ? "texupload" : "texpage"; + const char* mode_str = s_texture_replacement_mode_names[texture_mode]; + if (GetTextureMode() < GPUTextureMode::Direct16Bit) + { + return TinyString::from_format("{}-{}-{:016X}-{:016X}-{}x{}-{}-{}-{}x{}-P{}-{}", type_str, mode_str, src_hash, + pal_hash, src_width, src_height, offset_x, offset_y, width, height, pal_min, + pal_max); + } + else + { + return TinyString::from_format("{}-{}-{:016X}-{}x{}-{}-{}-{}x{}", type_str, mode_str, src_hash, src_width, + src_height, offset_x, offset_y, width, height); + } +} + +bool GPUTextureCache::TextureReplacementName::Parse(const std::string_view file_title) +{ + // TODO: Swap to https://github.com/eliaskosunen/scnlib + + std::string_view::size_type start_pos = 0; + std::string_view::size_type end_pos = file_title.find("-", start_pos); + if (end_pos == std::string_view::npos) + return false; + + // type + std::string_view token = file_title.substr(start_pos, end_pos); + if (token == "texupload") + type = TextureReplacementType::TextureFromVRAMWrite; + else if (token == "texpage") + type = TextureReplacementType::TextureFromPage; + else + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // mode + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional mode_opt; + for (size_t i = 0; i < std::size(s_texture_replacement_mode_names); i++) + { + if (token == s_texture_replacement_mode_names[i]) + { + mode_opt = static_cast(i); + break; + } + } + if (!mode_opt.has_value()) + return false; + texture_mode = mode_opt.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_hash + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional val64; + if (token.size() != 16 || !(val64 = StringUtil::FromChars(token, 16)).has_value()) + return false; + src_hash = val64.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + if (GetTextureMode() < GPUTextureMode::Direct16Bit) + { + // pal_hash + token = file_title.substr(start_pos, end_pos - start_pos); + if (token.size() != 16 || !(val64 = StringUtil::FromChars(token, 16)).has_value()) + return false; + pal_hash = val64.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("x", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_width + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional val16; + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_width = val16.value(); + if (src_width == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_height + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_height = val16.value(); + if (src_height == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_x + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_x = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_y + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_y = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("x", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // width + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + width = val16.value(); + if (width == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // height + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + height = val16.value(); + if (height == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos || file_title[start_pos] != 'P') + return false; + + // pal_min + token = file_title.substr(start_pos + 1, end_pos - start_pos - 1); + std::optional val8; + if (!(val8 = StringUtil::FromChars(token)).has_value()) + return false; + pal_min = val8.value(); + start_pos = end_pos + 1; + + // pal_max + token = file_title.substr(start_pos); + if (!(val8 = StringUtil::FromChars(token)).has_value()) + return false; + pal_max = val8.value(); + if (pal_min > pal_max) + return false; + } + else + { + // src_width + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional val16; + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_width = val16.value(); + if (src_width == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_height + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_height = val16.value(); + if (src_height == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_x + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_x = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_y + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_y = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("x", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // width + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + width = val16.value(); + if (width == 0) + return false; + start_pos = end_pos + 1; + + // height + token = file_title.substr(start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + height = val16.value(); + if (height == 0) + return false; + } + + return true; +} + +GPUTextureCache::TextureReplacementIndex GPUTextureCache::TextureReplacementName::GetIndex() const +{ + return {src_hash, GetTextureMode()}; +} + +GPUTextureMode GPUTextureCache::TextureReplacementName::GetTextureMode() const +{ + return static_cast(texture_mode & 3u); +} + +bool GPUTextureCache::TextureReplacementName::IsSemitransparent() const +{ + return (texture_mode >= 4); +} + +size_t GPUTextureCache::TextureReplacementIndexHash::operator()(const TextureReplacementIndex& name) const +{ + // TODO: This sucks ass, do better. + size_t seed = std::hash{}(name.src_hash); + hash_combine(seed, static_cast(name.mode)); + return seed; +} + +size_t GPUTextureCache::DumpedTextureKeyHash::operator()(const DumpedTextureKey& k) const +{ + // TODO: This is slow + std::size_t hash = 0; + hash_combine(hash, k.tex_hash, k.pal_hash, k.width, k.height, k.texture_mode); + return hash; +} + +void GPUTextureCache::SetGameID(std::string game_id) +{ + if (s_game_id == game_id) + return; + + s_game_id = game_id; + ReloadTextureReplacements(); +} + +const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetVRAMReplacement(u32 width, u32 height, + const void* pixels) +{ + const VRAMReplacementName hash = GetVRAMWriteHash(width, height, pixels); + + const auto it = s_vram_replacements.find(hash); + if (it == s_vram_replacements.end()) + return nullptr; + + return GetTextureReplacementImage(it->second); +} + +bool GPUTextureCache::ShouldDumpVRAMWrite(u32 width, u32 height) +{ + return (g_settings.texture_replacements.dump_vram_writes && width >= s_config.vram_write_dump_width_threshold && + height >= s_config.vram_write_dump_height_threshold); +} + +void GPUTextureCache::DumpVRAMWrite(u32 width, u32 height, const void* pixels) +{ + const VRAMReplacementName name = GetVRAMWriteHash(width, height, pixels); + if (s_dumped_vram_writes.find(name) != s_dumped_vram_writes.end()) + return; + + s_dumped_vram_writes.insert(name); + + const std::string filename = GetVRAMWriteDumpFilename(name); + if (filename.empty() || FileSystem::FileExists(filename.c_str())) + return; + + RGBA8Image image; + image.SetSize(width, height); + + const u16* src_pixels = reinterpret_cast(pixels); + + for (u32 y = 0; y < height; y++) + { + for (u32 x = 0; x < width; x++) + { + image.SetPixel(x, y, VRAMRGBA5551ToRGBA8888(*src_pixels)); + src_pixels++; + } + } + + if (s_config.dump_vram_write_force_alpha_channel) + { + for (u32 y = 0; y < height; y++) + { + for (u32 x = 0; x < width; x++) + image.SetPixel(x, y, image.GetPixel(x, y) | 0xFF000000u); + } + } + + INFO_LOG("Dumping {}x{} VRAM write to '{}'", width, height, Path::GetFileName(filename)); + if (!image.SaveToFile(filename.c_str())) [[unlikely]] + ERROR_LOG("Failed to dump {}x{} VRAM write to '{}'", width, height, filename); +} + +void GPUTextureCache::DumpTexture(TextureReplacementType type, u32 offset_x, u32 offset_y, u32 src_width, + u32 src_height, GPUTextureMode mode, HashType src_hash, HashType pal_hash, + u32 pal_min, u32 pal_max, const u16* palette_data, const GSVector4i rect, + PaletteRecordFlags flags) +{ + const u32 width = ApplyTextureModeShift(mode, rect.width()); + const u32 height = rect.height(); + + if (width < s_config.texture_dump_width_threshold || height < s_config.texture_dump_height_threshold) + return; + + const bool semitransparent = ((flags & PaletteRecordFlags::HasSemiTransparentDraws) != PaletteRecordFlags::None && + !s_config.dump_texture_force_alpha_channel); + const u8 dumped_texture_mode = static_cast(mode) | (semitransparent ? 4 : 0); + + const DumpedTextureKey key = {src_hash, + pal_hash, + Truncate16(offset_x), + Truncate16(offset_y), + Truncate16(width), + Truncate16(height), + type, + dumped_texture_mode, + {}}; + if (s_dumped_textures.find(key) != s_dumped_textures.end()) + return; + + if (!EnsureGameDirectoryExists()) + return; + + const std::string dump_directory = GetTextureDumpDirectory(); + if (!FileSystem::EnsureDirectoryExists(dump_directory.c_str(), false)) + return; + + s_dumped_textures.insert(key); + + const TextureReplacementName name = { + .src_hash = src_hash, + .pal_hash = pal_hash, + .src_width = Truncate16(src_width), + .src_height = Truncate16(src_height), + .type = type, + .texture_mode = dumped_texture_mode, + .offset_x = Truncate16(offset_x), + .offset_y = Truncate16(offset_y), + .width = Truncate16(width), + .height = Truncate16(height), + .pal_min = Truncate8(pal_min), + .pal_max = Truncate8(pal_max), + }; + + // skip if dumped already + if (!g_settings.texture_replacements.dump_replaced_textures) + { + const TextureReplacementMap& map = (type == TextureReplacementType::TextureFromPage) ? + s_texture_page_texture_replacements : + s_vram_write_texture_replacements; + const auto& [begin, end] = map.equal_range(name.GetIndex()); + for (auto it = begin; it != end; ++it) + { + // only match on the hash, not the sizes, we could be trying to dump a smaller texture + if (it->second.first.pal_hash == name.pal_hash) + { + DEV_LOG("Not dumping currently-replaced VRAM write {:016X} [{}x{}] at {}", src_hash, width, height, rect); + return; + } + } + } + + SmallString filename = name.ToString(); + filename.append(".png"); + + const std::string path = Path::Combine(dump_directory, filename); + if (FileSystem::FileExists(path.c_str())) + return; + + DEV_LOG("Dumping VRAM write {:016X} [{}x{}] at {}", src_hash, width, height, rect); + + RGBA8Image image(width, height); + GPUTextureCache::DecodeTexture(mode, &g_vram[rect.top * VRAM_WIDTH + rect.left], palette_data, image.GetPixels(), + image.GetPitch(), width, height); + + u32* image_pixels = image.GetPixels(); + const u32* image_pixels_end = image.GetPixels() + (width * height); + if (s_config.dump_texture_force_alpha_channel) + { + for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++) + *pixel |= 0xFF000000u; + } + else + { + if (semitransparent) + { + // Alpha channel should be inverted, because 0 means opaque, 1 is semitransparent. + // Pixel value of 0000 is still completely transparent. + for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++) + { + const u32 val = *pixel; + *pixel = (val == 0u) ? 0u : ((val & 0xFFFFFFFu) | ((val & 0x80000000u) ? 0x80000000u : 0xFF000000u)); + } + } + else + { + // Only cut out 0000 pixels. + for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++) + { + const u32 val = *pixel; + *pixel = (val == 0u) ? 0u : (val | 0xFF000000u); + } + } + } + + if (!image.SaveToFile(path.c_str())) + ERROR_LOG("Failed to write texture dump to {}.", Path::GetFileName(path)); +} + +bool GPUTextureCache::IsMatchingReplacementPalette(HashType full_palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette, const TextureReplacementName& name) +{ + if (!TextureModeHasPalette(mode)) + return true; + + const u32 full_pal_max = GetPaletteWidth(mode) - 1; + if (name.pal_min == 0 && name.pal_max == full_pal_max) + return (name.pal_hash == full_palette_hash); + + // If the range goes off the edge of VRAM, it's not a match. + if ((palette.GetXBase() + name.pal_max) >= VRAM_WIDTH) + return false; + + // This needs to re-hash every lookup, which is a bit of a bummer. + // But at least there's the hash cache, so it shouldn't be too painful... + const HashType partial_hash = GPUTextureCache::HashPartialPalette(palette, mode, name.pal_min, name.pal_max); + return (partial_hash == name.pal_hash); +} + +bool GPUTextureCache::HasVRAMWriteTextureReplacements() +{ + return !s_vram_write_texture_replacements.empty(); +} + +void GPUTextureCache::GetVRAMWriteTextureReplacements(std::vector& replacements, + HashType vram_write_hash, HashType palette_hash, + GPUTextureMode mode, GPUTexturePaletteReg palette, + const GSVector2i& offset_to_page) +{ + const TextureReplacementIndex index = {vram_write_hash, mode}; + const auto& [begin, end] = s_vram_write_texture_replacements.equal_range(index); + if (begin == end) + return; + + const GSVector4i offset_to_page_v = GSVector4i(offset_to_page).xyxy(); + + for (auto it = begin; it != end; ++it) + { + if (!IsMatchingReplacementPalette(palette_hash, mode, palette, it->second.first)) + continue; + + const TextureReplacementImage* image = GetTextureReplacementImage(it->second.second); + if (!image) + continue; + + const TextureReplacementName& name = it->second.first; + const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); + const GSVector4i rect_in_write_space = name.GetDestRect(); + const GSVector4i rect_in_page_space = rect_in_write_space.sub32(offset_to_page_v); + + // zw <= 0 or zw >= TEXTURE_PAGE_SIZE + if (!(rect_in_page_space.le32( + GSVector4i::cxpr(std::numeric_limits::min(), std::numeric_limits::min(), 0, 0)) | + rect_in_page_space.ge32(GSVector4i::cxpr(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, + std::numeric_limits::max(), std::numeric_limits::max()))) + .allfalse()) + { + // Rect is out of bounds. + continue; + } + + // TODO: This fails in Wild Arms 2, writes that are wider than a page. + DebugAssert(rect_in_page_space.width() == name.width && rect_in_page_space.height() == name.height); + DebugAssert(rect_in_page_space.width() <= static_cast(TEXTURE_PAGE_WIDTH)); + DebugAssert(rect_in_page_space.height() <= static_cast(TEXTURE_PAGE_HEIGHT)); + + replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, + name.IsSemitransparent()}); + } +} + +bool GPUTextureCache::HasTexturePageTextureReplacements() +{ + return !s_texture_page_texture_replacements.empty(); +} + +void GPUTextureCache::GetTexturePageTextureReplacements(std::vector& replacements, + u32 start_page_number, HashType page_hash, + HashType palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette) +{ + // This is truely awful. Because we can dump a sub-page worth of texture, we need to examine the entire replacement + // list, because any of them could match up... + + const u8 shift = GetTextureModeShift(mode); + const GSVector4i page_start_in_vram = + GSVector4i(GSVector2i(VRAMPageStartX(start_page_number), VRAMPageStartY(start_page_number))).xyxy(); + + for (TextureReplacementMap::const_iterator it = s_texture_page_texture_replacements.begin(); + it != s_texture_page_texture_replacements.end(); ++it) + { + if (it->first.mode != mode) + continue; + + // Early-out if the palette mismatches, at least that'll save some cycles... + if (!IsMatchingReplacementPalette(palette_hash, mode, palette, it->second.first)) + continue; + + const TextureReplacementName& name = it->second.first; + GSVector4i rect_in_page_space; + if (name.width == TEXTURE_PAGE_WIDTH && name.height == TEXTURE_PAGE_HEIGHT) + { + // This replacement is an entire page, so we can simply check the already-computed page hash. + DebugAssert(name.offset_x == 0 && name.offset_y == 0); + if (it->first.src_hash != page_hash) + continue; + + rect_in_page_space = GSVector4i::cxpr(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT); + } + else + { + // Unlike write replacements, the + // Replacement is part of a page, need to re-hash. + rect_in_page_space = name.GetDestRect(); + const GSVector4i hash_rect = + rect_in_page_space.blend32<0x5>(rect_in_page_space.srl32(shift)).add32(page_start_in_vram); + const GPUTextureCache::HashType hash = GPUTextureCache::HashRect(hash_rect); + if (it->first.src_hash != hash) + continue; + } + + const TextureReplacementImage* image = GetTextureReplacementImage(it->second.second); + if (!image) + continue; + + const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); + replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, + name.IsSemitransparent()}); + } +} + +std::optional +GPUTextureCache::GetTextureReplacementTypeFromFileTitle(const std::string_view path) +{ + if (path.starts_with("vram-write-")) + return TextureReplacementType::VRAMReplacement; + + if (path.starts_with("texupload-")) + return TextureReplacementType::TextureFromVRAMWrite; + + if (path.starts_with("texpage-")) + return TextureReplacementType::TextureFromPage; + + return std::nullopt; +} + +bool GPUTextureCache::HasValidReplacementExtension(const std::string_view path) +{ + const std::string_view extension = Path::GetExtension(path); + for (const char* test_extension : {"png", "jpg", "webp"}) + { + if (StringUtil::EqualNoCase(extension, test_extension)) + return true; + } + + return false; +} + +void GPUTextureCache::FindTextureReplacements(bool load_vram_write_replacements, bool load_texture_replacements) +{ + FileSystem::FindResultsArray files; + FileSystem::FindFiles(GetTextureReplacementDirectory().c_str(), "*", + FILESYSTEM_FIND_FILES | FILESYSTEM_FIND_RECURSIVE, &files); + + for (FILESYSTEM_FIND_DATA& fd : files) + { + if ((fd.Attributes & FILESYSTEM_FILE_ATTRIBUTE_DIRECTORY) || !HasValidReplacementExtension(fd.FileName)) + continue; + + const std::string_view file_title = Path::GetFileTitle(fd.FileName); + const std::optional type = GetTextureReplacementTypeFromFileTitle(file_title); + if (!type.has_value()) + continue; + + switch (type.value()) + { + case TextureReplacementType::VRAMReplacement: + { + VRAMReplacementName name; + if (!load_vram_write_replacements || !name.Parse(file_title)) + continue; + + if (const auto it = s_vram_replacements.find(name); it != s_vram_replacements.end()) + { + WARNING_LOG("Duplicate VRAM replacement: '{}' and '{}'", Path::GetFileName(it->second), + Path::GetFileName(fd.FileName)); + continue; + } + + s_vram_replacements.emplace(name, std::move(fd.FileName)); + } + break; + + case TextureReplacementType::TextureFromVRAMWrite: + case TextureReplacementType::TextureFromPage: + { + TextureReplacementName name; + if (!load_texture_replacements || !name.Parse(file_title)) + continue; + + DebugAssert(name.type == type.value()); + + const TextureReplacementIndex index = name.GetIndex(); + TextureReplacementMap& dest_map = (type.value() == TextureReplacementType::TextureFromVRAMWrite) ? + s_vram_write_texture_replacements : + s_texture_page_texture_replacements; + + // Multiple replacements in the same write are fine. But they should have different rects. + const auto range = dest_map.equal_range(index); + bool duplicate = false; + for (auto it = range.first; it != range.second; ++it) + { + if (it->second.first == name) [[unlikely]] + { + WARNING_LOG("Duplicate texture replacement: '{}' and '{}'", Path::GetFileName(it->second.second), + Path::GetFileName(fd.FileName)); + duplicate = true; + } + } + if (duplicate) [[unlikely]] + continue; + + dest_map.emplace(index, std::make_pair(name, std::move(fd.FileName))); + } + break; + + DefaultCaseIsUnreachable() + } + } + + if (g_settings.texture_replacements.enable_texture_replacements) + { + INFO_LOG("Found {} replacement upload textures for '{}'", s_vram_write_texture_replacements.size(), s_game_id); + INFO_LOG("Found {} replacement page textures for '{}'", s_texture_page_texture_replacements.size(), s_game_id); + } + + if (g_settings.texture_replacements.enable_vram_write_replacements) + INFO_LOG("Found {} replacement VRAM for '{}'", s_vram_replacements.size(), s_game_id); +} + +void GPUTextureCache::LoadTextureReplacementAliases(const ryml::ConstNodeRef& root, + bool load_vram_write_replacement_aliases, + bool load_texture_replacement_aliases) +{ + const std::string source_dir = GetTextureReplacementDirectory(); + + for (const ryml::ConstNodeRef& current : root.cchildren()) + { + const std::string_view key = to_stringview(current.key()); + const std::optional type = GetTextureReplacementTypeFromFileTitle(key); + if (!type.has_value()) + continue; + + const std::string_view replacement_filename = to_stringview(current.val()); + std::string replacement_path = Path::Combine(source_dir, replacement_filename); + if (!FileSystem::FileExists(replacement_path.c_str())) + { + ERROR_LOG("File '{}' for alias '{}' does not exist.", key, replacement_filename); + continue; + } + + switch (type.value()) + { + case TextureReplacementType::VRAMReplacement: + { + VRAMReplacementName name; + if (!load_vram_write_replacement_aliases || !name.Parse(key)) + continue; + + if (const auto it = s_vram_replacements.find(name); it != s_vram_replacements.end()) + { + WARNING_LOG("Duplicate VRAM replacement alias: '{}' and '{}'", Path::GetFileName(it->second), + replacement_filename); + continue; + } + + s_vram_replacements.emplace(name, std::move(replacement_path)); + } + break; + + case TextureReplacementType::TextureFromVRAMWrite: + case TextureReplacementType::TextureFromPage: + { + TextureReplacementName name; + if (!load_texture_replacement_aliases || !name.Parse(key)) + continue; + + DebugAssert(name.type == type.value()); + + const TextureReplacementIndex index = name.GetIndex(); + TextureReplacementMap& dest_map = (type.value() == TextureReplacementType::TextureFromVRAMWrite) ? + s_vram_write_texture_replacements : + s_texture_page_texture_replacements; + + // Multiple replacements in the same write are fine. But they should have different rects. + const auto range = dest_map.equal_range(index); + bool duplicate = false; + for (auto it = range.first; it != range.second; ++it) + { + if (it->second.first == name) [[unlikely]] + { + WARNING_LOG("Duplicate texture replacement alias: '{}' and '{}'", Path::GetFileName(it->second.second), + replacement_filename); + duplicate = true; + } + } + if (duplicate) [[unlikely]] + continue; + + dest_map.emplace(index, std::make_pair(name, std::move(replacement_path))); + } + break; + + DefaultCaseIsUnreachable() + } + } + + if (g_settings.texture_replacements.enable_texture_replacements) + { + INFO_LOG("Found {} replacement upload textures after applying aliases for '{}'", + s_vram_write_texture_replacements.size(), s_game_id); + INFO_LOG("Found {} replacement page textures after applying aliases for '{}'", + s_texture_page_texture_replacements.size(), s_game_id); + } + + if (g_settings.texture_replacements.enable_vram_write_replacements) + INFO_LOG("Found {} replacement VRAM after applying aliases for '{}'", s_vram_replacements.size(), s_game_id); +} + +const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetTextureReplacementImage(const std::string& filename) +{ + auto it = s_replacement_image_cache.find(filename); + if (it != s_replacement_image_cache.end()) + return &it->second; + + RGBA8Image image; + if (!image.LoadFromFile(filename.c_str())) + { + ERROR_LOG("Failed to load '{}'", Path::GetFileName(filename)); + return nullptr; + } + + INFO_LOG("Loaded '{}': {}x{}", Path::GetFileName(filename), image.GetWidth(), image.GetHeight()); + it = s_replacement_image_cache.emplace(filename, std::move(image)).first; + return &it->second; +} + +void GPUTextureCache::PreloadReplacementTextures() +{ + static constexpr float UPDATE_INTERVAL = 1.0f; + + Common::Timer last_update_time; + u32 num_textures_loaded = 0; + const size_t total_textures = + s_vram_replacements.size() + s_vram_write_texture_replacements.size() + s_texture_page_texture_replacements.size(); + +#define UPDATE_PROGRESS() \ + if (last_update_time.GetTimeSeconds() >= UPDATE_INTERVAL) \ + { \ + Host::DisplayLoadingScreen("Preloading replacement textures...", 0, static_cast(total_textures), \ + static_cast(num_textures_loaded)); \ + last_update_time.Reset(); \ + } + + for (const auto& it : s_vram_replacements) + { + UPDATE_PROGRESS(); + GetTextureReplacementImage(it.second); + num_textures_loaded++; + } + +#define PROCESS_MAP(map) \ + for (const auto& it : map) \ + { \ + UPDATE_PROGRESS(); \ + GetTextureReplacementImage(it.second.second); \ + num_textures_loaded++; \ + } + + PROCESS_MAP(s_vram_write_texture_replacements); + PROCESS_MAP(s_texture_page_texture_replacements); +#undef PROCESS_MAP +#undef UPDATE_PROGRESS +} + +bool GPUTextureCache::EnsureGameDirectoryExists() +{ + if (s_game_id.empty()) + return false; + + const std::string game_directory = Path::Combine(EmuFolders::Textures, s_game_id); + if (FileSystem::DirectoryExists(game_directory.c_str())) + return true; + + Error error; + if (!FileSystem::CreateDirectory(game_directory.c_str(), false, &error)) + { + ERROR_LOG("Failed to create game directory: {}", error.GetDescription()); + return false; + } + + if (const std::string config_path = Path::Combine(game_directory, LOCAL_CONFIG_FILENAME); + !FileSystem::FileExists(config_path.c_str()) && + !FileSystem::WriteStringToFile(config_path.c_str(), + Settings::TextureReplacementSettings().config.ExportToYAML(true), &error)) + { + ERROR_LOG("Failed to write configuration template: {}", error.GetDescription()); + return false; + } + + if (!FileSystem::CreateDirectory(Path::Combine(game_directory, "dumps").c_str(), false, &error)) + { + ERROR_LOG("Failed to create dumps directory: {}", error.GetDescription()); + return false; + } + + if (!FileSystem::CreateDirectory(Path::Combine(game_directory, "replacements").c_str(), false, &error)) + { + ERROR_LOG("Failed to create replacements directory: {}", error.GetDescription()); + return false; + } + + return true; +} + +std::string GPUTextureCache::GetTextureReplacementDirectory() +{ + return Path::Combine(EmuFolders::Textures, + SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "replacements", s_game_id)); +} + +std::string GPUTextureCache::GetTextureDumpDirectory() +{ + return Path::Combine(EmuFolders::Textures, SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "dumps", s_game_id)); +} + +GPUTextureCache::VRAMReplacementName GPUTextureCache::GetVRAMWriteHash(u32 width, u32 height, const void* pixels) +{ + const XXH128_hash_t hash = XXH3_128bits(pixels, width * height * sizeof(u16)); + return {hash.low64, hash.high64}; +} + +std::string GPUTextureCache::GetVRAMWriteDumpFilename(const VRAMReplacementName& name) +{ + std::string ret; + if (!EnsureGameDirectoryExists()) + return ret; + + const std::string dump_directory = GetTextureDumpDirectory(); + if (!FileSystem::EnsureDirectoryExists(dump_directory.c_str(), false)) + return ret; + + return Path::Combine(dump_directory, SmallString::from_format("vram-write-{}.png", name.ToString())); +} + +bool GPUTextureCache::LoadLocalConfiguration(bool load_vram_write_replacement_aliases, + bool load_texture_replacement_aliases) +{ + const Settings::TextureReplacementSettings::Configuration old_config = s_config; + + // load settings from ini + s_config = g_settings.texture_replacements.config; + + if (s_game_id.empty()) + return (s_config != old_config); + + const std::optional ini_data = FileSystem::ReadFileToString( + Path::Combine(EmuFolders::Textures, + SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "{}", s_game_id, LOCAL_CONFIG_FILENAME)) + .c_str()); + if (!ini_data.has_value() || ini_data->empty()) + return (s_config != old_config); + + const ryml::Tree tree = ryml::parse_in_arena(LOCAL_CONFIG_FILENAME, to_csubstr(ini_data.value())); + const ryml::ConstNodeRef root = tree.rootref(); + + // This is false if all we have are comments + if (!root.is_map()) + return (s_config != old_config); + + s_config.dump_texture_pages = + GetOptionalTFromObject(root, "DumpTexturePages").value_or(static_cast(s_config.dump_texture_pages)); + s_config.dump_full_texture_pages = GetOptionalTFromObject(root, "DumpFullTexturePages") + .value_or(static_cast(s_config.dump_full_texture_pages)); + s_config.dump_texture_force_alpha_channel = GetOptionalTFromObject(root, "DumpTextureForceAlphaChannel") + .value_or(static_cast(s_config.dump_texture_force_alpha_channel)); + s_config.dump_vram_write_force_alpha_channel = + GetOptionalTFromObject(root, "DumpVRAMWriteForceAlphaChannel") + .value_or(static_cast(s_config.dump_vram_write_force_alpha_channel)); + s_config.dump_c16_textures = + GetOptionalTFromObject(root, "DumpC16Textures").value_or(static_cast(s_config.dump_c16_textures)); + s_config.reduce_palette_range = + GetOptionalTFromObject(root, "ReducePaletteRange").value_or(static_cast(s_config.reduce_palette_range)); + s_config.convert_copies_to_writes = GetOptionalTFromObject(root, "ConvertCopiesToWrites") + .value_or(static_cast(s_config.convert_copies_to_writes)); + s_config.replacement_scale_linear_filter = GetOptionalTFromObject(root, "ReplacementScaleLinearFilter") + .value_or(static_cast(s_config.replacement_scale_linear_filter)); + s_config.max_vram_write_splits = + GetOptionalTFromObject(root, "MaxVRAMWriteSplits").value_or(s_config.max_vram_write_splits); + s_config.max_vram_write_coalesce_width = + GetOptionalTFromObject(root, "MaxVRAMWriteCoalesceWidth").value_or(s_config.max_vram_write_coalesce_width); + s_config.max_vram_write_coalesce_height = + GetOptionalTFromObject(root, "MaxVRAMWriteCoalesceHeight").value_or(s_config.max_vram_write_coalesce_height); + s_config.texture_dump_width_threshold = + GetOptionalTFromObject(root, "DumpTextureWidthThreshold").value_or(s_config.texture_dump_width_threshold); + s_config.texture_dump_height_threshold = + GetOptionalTFromObject(root, "DumpTextureHeightThreshold").value_or(s_config.texture_dump_height_threshold); + s_config.vram_write_dump_width_threshold = + GetOptionalTFromObject(root, "DumpVRAMWriteWidthThreshold").value_or(s_config.vram_write_dump_width_threshold); + s_config.vram_write_dump_height_threshold = GetOptionalTFromObject(root, "DumpVRAMWriteHeightThreshold") + .value_or(s_config.vram_write_dump_height_threshold); + + if (load_vram_write_replacement_aliases || load_texture_replacement_aliases) + { + const ryml::ConstNodeRef aliases = root.find_child("Aliases"); + if (aliases.valid() && aliases.has_children()) + LoadTextureReplacementAliases(aliases, load_vram_write_replacement_aliases, load_texture_replacement_aliases); + } + + // Any change? + return (s_config != old_config); +} + +void GPUTextureCache::ReloadTextureReplacements() +{ + s_vram_replacements.clear(); + s_vram_write_texture_replacements.clear(); + s_texture_page_texture_replacements.clear(); + + const bool load_vram_write_replacements = (g_settings.texture_replacements.enable_vram_write_replacements); + const bool load_texture_replacements = + (g_settings.gpu_texture_cache && g_settings.texture_replacements.enable_texture_replacements); + if (load_vram_write_replacements || load_texture_replacements) + FindTextureReplacements(load_vram_write_replacements, load_texture_replacements); + + LoadLocalConfiguration(load_vram_write_replacements, load_texture_replacements); + + if (g_settings.texture_replacements.preload_textures) + PreloadReplacementTextures(); + + PurgeUnreferencedTexturesFromCache(); + + DebugAssert(g_gpu); + GPUTextureCache::UpdateVRAMTrackingState(); +} + +void GPUTextureCache::PurgeUnreferencedTexturesFromCache() +{ + TextureCache old_map = std::move(s_replacement_image_cache); + s_replacement_image_cache = {}; + + for (const auto& it : s_vram_replacements) + { + const auto it2 = old_map.find(it.second); + if (it2 != old_map.end()) + { + s_replacement_image_cache[it.second] = std::move(it2->second); + old_map.erase(it2); + } + } + + for (const auto& map : {s_vram_write_texture_replacements, s_texture_page_texture_replacements}) + { + for (const auto& it : map) + { + const auto it2 = old_map.find(it.second.second); + if (it2 != old_map.end()) + { + s_replacement_image_cache[it.second.second] = std::move(it2->second); + old_map.erase(it2); + } + } + } +} + +void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash, + HashCacheEntry* entry) +{ + std::vector subimages; + if (HasTexturePageTextureReplacements()) + { + GetTexturePageTextureReplacements(subimages, key.page, tex_hash, pal_hash, key.mode, key.palette); + } + + if (HasVRAMWriteTextureReplacements()) + { + const GSVector4i page_rect = VRAMPageRect(key.page); + LoopRectPages(page_rect, [&key, &pal_hash, &subimages, &page_rect](u32 pn) { + const PageEntry& page = s_pages[pn]; + ListIterate(page.writes, [&key, &pal_hash, &subimages, &page_rect](const VRAMWrite* vrw) { + // TODO: Is this needed? + if (!vrw->write_rect.rintersects(page_rect)) + return; + + // Map VRAM write to the start of the page. + GSVector2i offset_to_page = page_rect.sub32(vrw->write_rect).xy(); + + // Need to apply the texture shift on the X dimension, not Y. No SLLV on SSE4.. :( + offset_to_page.x = ApplyTextureModeShift(key.mode, offset_to_page.x); + + GetVRAMWriteTextureReplacements(subimages, vrw->hash, pal_hash, key.mode, key.palette, offset_to_page); + }); + }); + } + + if (subimages.empty()) + return; + + float max_scale_x = subimages[0].scale_x, max_scale_y = subimages[0].scale_y; + for (size_t i = 0; i < subimages.size(); i++) + { + max_scale_x = std::max(max_scale_x, subimages[i].scale_x); + max_scale_y = std::max(max_scale_y, subimages[i].scale_y); + } + + // Clamp to max texture size + const float max_possible_scale = + static_cast(g_gpu_device->GetMaxTextureSize()) / static_cast(TEXTURE_PAGE_WIDTH); + max_scale_x = std::min(max_scale_x, max_possible_scale); + max_scale_y = std::min(max_scale_y, max_possible_scale); + + const GSVector4 max_scale_v = GSVector4(max_scale_x, max_scale_y).xyxy(); + GPUSampler* filter = + s_config.replacement_scale_linear_filter ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler(); + + const u32 new_width = static_cast(std::ceil(static_cast(TEXTURE_PAGE_WIDTH) * max_scale_x)); + const u32 new_height = static_cast(std::ceil(static_cast(TEXTURE_PAGE_HEIGHT) * max_scale_y)); + if (!s_replacement_texture_render_target || s_replacement_texture_render_target->GetWidth() < new_width || + s_replacement_texture_render_target->GetHeight() < new_height) + { + // NOTE: Not recycled, it's unlikely to be reused. + s_replacement_texture_render_target.reset(); + if (!(s_replacement_texture_render_target = g_gpu_device->CreateTexture( + new_width, new_height, 1, 1, 1, GPUTexture::Type::RenderTarget, REPLACEMENT_TEXTURE_FORMAT))) + { + ERROR_LOG("Failed to create {}x{} render target.", new_width, new_height); + return; + } + } + + // Grab the actual texture beforehand, in case we OOM. + std::unique_ptr replacement_tex = + g_gpu_device->FetchTexture(new_width, new_height, 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT); + if (!replacement_tex) + { + ERROR_LOG("Failed to create {}x{} texture.", new_width, new_height); + return; + } + + // TODO: This is AWFUL. Need a better way. + // Linear filtering is also wrong, it should do hard edges for 0000 pixels. + // We could just copy this from the original image... + static constexpr const float u_src_rect[4] = {0.0f, 0.0f, 1.0f, 1.0f}; + g_gpu_device->InvalidateRenderTarget(s_replacement_texture_render_target.get()); + g_gpu_device->SetRenderTarget(s_replacement_texture_render_target.get()); + g_gpu_device->SetViewportAndScissor(0, 0, new_width, new_height); + g_gpu_device->SetPipeline(s_replacement_init_pipeline.get()); + g_gpu_device->PushUniformBuffer(u_src_rect, sizeof(u_src_rect)); + g_gpu_device->SetTextureSampler(0, entry->texture.get(), filter); + g_gpu_device->Draw(3, 0); + + for (const TextureReplacementSubImage& si : subimages) + { + const auto temp_texture = g_gpu_device->FetchAutoRecycleTexture( + si.image.GetWidth(), si.image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT, + si.image.GetPixels(), si.image.GetPitch()); + if (!temp_texture) + continue; + + const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v); + g_gpu_device->SetViewportAndScissor(dst_rect); + g_gpu_device->SetTextureSampler(0, temp_texture.get(), filter); + g_gpu_device->SetPipeline(si.invert_alpha ? s_replacement_semitransparent_draw_pipeline.get() : + s_replacement_draw_pipeline.get()); + g_gpu_device->Draw(3, 0); + } + + g_gpu_device->CopyTextureRegion(replacement_tex.get(), 0, 0, 0, 0, s_replacement_texture_render_target.get(), 0, 0, 0, + 0, new_width, new_height); + g_gpu_device->RecycleTexture(std::move(entry->texture)); + entry->texture = std::move(replacement_tex); + + g_gpu->RestoreDeviceContext(); +} \ No newline at end of file diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h new file mode 100644 index 000000000..3bb1d8e3a --- /dev/null +++ b/src/core/gpu_hw_texture_cache.h @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#pragma once + +#include "gpu_types.h" + +class GPUTexture; +class RGBA8Image; +class StateWrapper; + +struct Settings; + +////////////////////////////////////////////////////////////////////////// +// Texture Cache +////////////////////////////////////////////////////////////////////////// +namespace GPUTextureCache { + +/// 4 pages in C16 mode, 2+4 pages in P8 mode, 1+1 pages in P4 mode. +static constexpr u32 MAX_PAGE_REFS_PER_SOURCE = 6; + +static constexpr u32 MAX_PAGE_REFS_PER_WRITE = 32; + +enum class PaletteRecordFlags : u32 +{ + None = 0, + HasSemiTransparentDraws = (1 << 0), +}; +IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(PaletteRecordFlags); + +using HashType = u64; +using TextureReplacementImage = RGBA8Image; + +struct Source; +struct HashCacheEntry; + +template +struct TList; +template +struct TListNode; + +template +struct TList +{ + TListNode* head; + TListNode* tail; +}; + +template +struct TListNode +{ + // why inside itself? because we have 3 lists + T* ref; + TList* list; + TListNode* prev; + TListNode* next; +}; + +struct SourceKey +{ + u8 page; + GPUTextureMode mode; + GPUTexturePaletteReg palette; + + SourceKey() = default; + ALWAYS_INLINE constexpr SourceKey(u8 page_, GPUTexturePaletteReg palette_, GPUTextureMode mode_) + : page(page_), mode(mode_), palette(palette_) + { + } + ALWAYS_INLINE constexpr SourceKey(const SourceKey& k) : page(k.page), mode(k.mode), palette(k.palette) {} + + ALWAYS_INLINE bool HasPalette() const { return (mode < GPUTextureMode::Direct16Bit); } + + ALWAYS_INLINE SourceKey& operator=(const SourceKey& k) + { + page = k.page; + mode = k.mode; + palette.bits = k.palette.bits; + return *this; + } + + ALWAYS_INLINE bool operator==(const SourceKey& k) const { return (std::memcmp(&k, this, sizeof(SourceKey)) == 0); } + ALWAYS_INLINE bool operator!=(const SourceKey& k) const { return (std::memcmp(&k, this, sizeof(SourceKey)) != 0); } +}; +static_assert(sizeof(SourceKey) == 4); + +// TODO: Pool objects +struct Source +{ + SourceKey key; + u32 num_page_refs; + GPUTexture* texture; + HashCacheEntry* from_hash_cache; + GSVector4i texture_rect; + GSVector4i palette_rect; + HashType texture_hash; + HashType palette_hash; + GSVector4i active_uv_rect; + PaletteRecordFlags palette_record_flags; + + std::array, MAX_PAGE_REFS_PER_SOURCE> page_refs; + TListNode hash_cache_ref; +}; + +bool Initialize(); +void UpdateSettings(const Settings& old_settings); +bool DoState(StateWrapper& sw, bool skip); +void Shutdown(); + +void Invalidate(); + +void AddWrittenRectangle(const GSVector4i rect, bool update_vram_writes = false); +void AddDrawnRectangle(const GSVector4i rect, const GSVector4i clip_rect); + +void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, bool check_mask, + const GSVector4i src_bounds, const GSVector4i dst_bounds); +void WriteVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask, + const GSVector4i bounds); +void UpdateVRAMTrackingState(); + +const Source* LookupSource(SourceKey key, const GSVector4i uv_rect, PaletteRecordFlags flags); + +bool IsPageDrawn(u32 page_index); +bool IsPageDrawn(u32 page_index, const GSVector4i rect); +bool IsRectDrawn(const GSVector4i rect); +bool AreSourcePagesDrawn(SourceKey key, const GSVector4i rect); + +void InvalidatePageSources(u32 pn); +void InvalidatePageSources(u32 pn, const GSVector4i rc); +void DestroySource(Source* src); + +void Compact(); + +void DecodeTexture(GPUTextureMode mode, const u16* page_ptr, const u16* palette, u32* dest, u32 dest_stride, u32 width, + u32 height); +HashType HashPartialPalette(GPUTexturePaletteReg palette, GPUTextureMode mode, u32 min, u32 max); +HashType HashRect(const GSVector4i rc); + +void SetGameID(std::string game_id); +void ReloadTextureReplacements(); + +// VRAM Write Replacements +const TextureReplacementImage* GetVRAMReplacement(u32 width, u32 height, const void* pixels); +void DumpVRAMWrite(u32 width, u32 height, const void* pixels); +bool ShouldDumpVRAMWrite(u32 width, u32 height); + +} // namespace GPUTextureCache diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index db87f2944..3e93cc960 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_sw.h" +#include "gpu_hw_texture_cache.h" #include "settings.h" #include "system.h" @@ -70,7 +71,11 @@ bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di m_backend.Sync(true); // ignore the host texture for software mode, since we want to save vram here - return GPU::DoState(sw, nullptr, update_display); + if (!GPU::DoState(sw, nullptr, update_display)) + return false; + + // need to still call the TC, to toss any data in the state + return GPUTextureCache::DoState(sw, true); } void GPU_SW::Reset(bool clear_vram) diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index 95e153e5e..6d884835a 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -26,7 +26,15 @@ enum : u32 GPU_MAX_DISPLAY_WIDTH = 720, GPU_MAX_DISPLAY_HEIGHT = 576, - DITHER_MATRIX_SIZE = 4 + DITHER_MATRIX_SIZE = 4, + + VRAM_PAGE_WIDTH = 64, + VRAM_PAGE_HEIGHT = 256, + VRAM_PAGES_WIDE = VRAM_WIDTH / VRAM_PAGE_WIDTH, + VRAM_PAGES_HIGH = VRAM_HEIGHT / VRAM_PAGE_HEIGHT, + VRAM_PAGE_X_MASK = 0xf, // 16 pages wide + VRAM_PAGE_Y_MASK = 0x10, // 2 pages high + NUM_VRAM_PAGES = VRAM_PAGES_WIDE * VRAM_PAGES_HIGH, }; enum : s32 @@ -61,6 +69,11 @@ enum class GPUTextureMode : u8 IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode); +ALWAYS_INLINE static constexpr bool TextureModeHasPalette(GPUTextureMode mode) +{ + return (mode < GPUTextureMode::Direct16Bit); +} + enum class GPUTransparencyMode : u8 { HalfBackgroundPlusHalfForeground = 0, @@ -169,7 +182,7 @@ static constexpr s32 TruncateGPUVertexPosition(s32 x) union GPUDrawModeReg { static constexpr u16 MASK = 0b1111111111111; - static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111); + static constexpr u16 TEXTURE_MODE_AND_PAGE_MASK = UINT16_C(0b0000000110011111); // Polygon texpage commands only affect bits 0-8, 11 static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111; @@ -177,11 +190,9 @@ union GPUDrawModeReg // Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time. static constexpr u32 GPUSTAT_MASK = 0b11111111111; - static constexpr std::array texture_page_widths = { - {TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}}; - u16 bits; + BitField texture_page; BitField texture_page_x_base; BitField texture_page_y_base; BitField transparency_mode; @@ -197,15 +208,6 @@ union GPUDrawModeReg /// Returns true if the texture mode requires a palette. ALWAYS_INLINE bool IsUsingPalette() const { return (bits & (2 << 7)) == 0; } - - /// Returns a rectangle comprising the texture page area. - ALWAYS_INLINE_RELEASE GSVector4i GetTexturePageRectangle() const - { - const u32 base_x = GetTexturePageBaseX(); - const u32 base_y = GetTexturePageBaseY(); - return GSVector4i(base_x, base_y, base_x + texture_page_widths[static_cast(texture_mode.GetValue())], - base_y + TEXTURE_PAGE_HEIGHT); - } }; union GPUTexturePaletteReg @@ -217,17 +219,8 @@ union GPUTexturePaletteReg BitField x; BitField y; - ALWAYS_INLINE u32 GetXBase() const { return static_cast(x) * 16u; } - ALWAYS_INLINE u32 GetYBase() const { return static_cast(y); } - - /// Returns a rectangle comprising the texture palette area. - ALWAYS_INLINE_RELEASE GSVector4i GetRectangle(GPUTextureMode mode) const - { - static constexpr std::array palette_widths = {{16, 256, 0, 0}}; - const u32 base_x = GetXBase(); - const u32 base_y = GetYBase(); - return GSVector4i(base_x, base_y, base_x + palette_widths[static_cast(mode)], base_y + 1); - } + ALWAYS_INLINE constexpr u32 GetXBase() const { return static_cast(x) * 16u; } + ALWAYS_INLINE constexpr u32 GetYBase() const { return static_cast(y); } }; struct GPUTextureWindow @@ -238,6 +231,119 @@ struct GPUTextureWindow u8 or_y; }; +ALWAYS_INLINE static constexpr u32 VRAMPageIndex(u32 px, u32 py) +{ + return ((py * VRAM_PAGES_WIDE) + px); +} +ALWAYS_INLINE static constexpr GSVector4i VRAMPageRect(u32 px, u32 py) +{ + return GSVector4i::cxpr(px * VRAM_PAGE_WIDTH, py * VRAM_PAGE_HEIGHT, (px + 1) * VRAM_PAGE_WIDTH, + (py + 1) * VRAM_PAGE_HEIGHT); +} +ALWAYS_INLINE static constexpr GSVector4i VRAMPageRect(u32 pn) +{ + // TODO: Put page rects in a LUT instead? + return VRAMPageRect(pn % VRAM_PAGES_WIDE, pn / VRAM_PAGES_WIDE); +} + +ALWAYS_INLINE static constexpr u32 VRAMCoordinateToPage(u32 x, u32 y) +{ + return VRAMPageIndex(x / VRAM_PAGE_WIDTH, y / VRAM_PAGE_HEIGHT); +} + +ALWAYS_INLINE static constexpr u32 VRAMPageStartX(u32 pn) +{ + return (pn % VRAM_PAGES_WIDE) * VRAM_PAGE_WIDTH; +} + +ALWAYS_INLINE static constexpr u32 VRAMPageStartY(u32 pn) +{ + return (pn / VRAM_PAGES_WIDE) * VRAM_PAGE_HEIGHT; +} + +ALWAYS_INLINE static constexpr u8 GetTextureModeShift(GPUTextureMode mode) +{ + return ((mode < GPUTextureMode::Direct16Bit) ? (2 - static_cast(mode)) : 0); +} + +ALWAYS_INLINE static constexpr u32 ApplyTextureModeShift(GPUTextureMode mode, u32 vram_width) +{ + return vram_width << GetTextureModeShift(mode); +} + +ALWAYS_INLINE static GSVector4i ApplyTextureModeShift(GPUTextureMode mode, const GSVector4i rect) +{ + return rect.sll32(GetTextureModeShift(mode)); +} + +ALWAYS_INLINE static constexpr u32 TexturePageCountForMode(GPUTextureMode mode) +{ + return ((mode < GPUTextureMode::Direct16Bit) ? (1 + static_cast(mode)) : 4); +} + +ALWAYS_INLINE static constexpr u32 TexturePageWidthForMode(GPUTextureMode mode) +{ + return TEXTURE_PAGE_WIDTH >> GetTextureModeShift(mode); +} + +ALWAYS_INLINE static constexpr bool TexturePageIsWrapping(GPUTextureMode mode, u32 pn) +{ + return ((VRAMPageStartX(pn) + TexturePageWidthForMode(mode)) > VRAM_WIDTH); +} + +ALWAYS_INLINE static constexpr u32 PalettePageCountForMode(GPUTextureMode mode) +{ + return (mode == GPUTextureMode::Palette4Bit) ? 1 : 4; +} + +ALWAYS_INLINE static constexpr u32 PalettePageNumber(GPUTexturePaletteReg reg) +{ + return VRAMCoordinateToPage(reg.GetXBase(), reg.GetYBase()); +} + +ALWAYS_INLINE static constexpr GSVector4i GetTextureRect(u32 pn, GPUTextureMode mode) +{ + u32 left = VRAMPageStartX(pn); + u32 top = VRAMPageStartY(pn); + u32 right = left + TexturePageWidthForMode(mode); + u32 bottom = top + VRAM_PAGE_HEIGHT; + if (right > VRAM_WIDTH) [[unlikely]] + { + left = 0; + right = VRAM_WIDTH; + } + if (bottom > VRAM_HEIGHT) [[unlikely]] + { + top = 0; + bottom = VRAM_HEIGHT; + } + + return GSVector4i::cxpr(left, top, right, bottom); +} + +/// Returns the maximum index for a paletted texture. +ALWAYS_INLINE static constexpr u32 GetPaletteWidth(GPUTextureMode mode) +{ + return (mode == GPUTextureMode::Palette4Bit ? 16 : ((mode == GPUTextureMode::Palette8Bit) ? 256 : 0)); +} + +/// Returns a rectangle comprising the texture palette area. +ALWAYS_INLINE static constexpr GSVector4i GetPaletteRect(GPUTexturePaletteReg palette, GPUTextureMode mode, + bool clamp_instead_of_wrapping = false) +{ + const u32 width = GetPaletteWidth(mode); + u32 left = palette.GetXBase(); + u32 top = palette.GetYBase(); + u32 right = left + width; + u32 bottom = top + 1; + if (right > VRAM_WIDTH) [[unlikely]] + { + right = VRAM_WIDTH; + left = clamp_instead_of_wrapping ? left : 0; + } + return GSVector4i::cxpr(left, top, right, bottom); +} + // 4x4 dither matrix. static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{-4, +0, -3, +1}, // row 0 {+2, -2, +3, -1}, // row 1 diff --git a/src/core/hotkeys.cpp b/src/core/hotkeys.cpp index 83de4396d..9897ff7ae 100644 --- a/src/core/hotkeys.cpp +++ b/src/core/hotkeys.cpp @@ -7,12 +7,12 @@ #include "cpu_pgxp.h" #include "fullscreen_ui.h" #include "gpu.h" +#include "gpu_hw_texture_cache.h" #include "host.h" #include "imgui_overlays.h" #include "settings.h" #include "spu.h" #include "system.h" -#include "texture_replacements.h" #include "util/gpu_device.h" #include "util/input_manager.h" @@ -22,8 +22,8 @@ #include "common/file_system.h" #include "common/timer.h" -#include "IconsFontAwesome5.h" #include "IconsEmoji.h" +#include "IconsFontAwesome5.h" #include "fmt/format.h" #include @@ -441,7 +441,7 @@ DEFINE_HOTKEY("ReloadTextureReplacements", TRANSLATE_NOOP("Hotkeys", "Graphics") { Host::AddKeyedOSDMessage("ReloadTextureReplacements", TRANSLATE_STR("OSDMessage", "Texture replacements reloaded."), 10.0f); - TextureReplacements::Reload(); + GPUTextureCache::ReloadTextureReplacements(); } }) @@ -554,7 +554,8 @@ DEFINE_HOTKEY("AudioCDAudioMute", TRANSLATE_NOOP("Hotkeys", "Audio"), TRANSLATE_ { g_settings.cdrom_mute_cd_audio = !g_settings.cdrom_mute_cd_audio; Host::AddIconOSDMessage( - "AudioControlHotkey", g_settings.cdrom_mute_cd_audio ? ICON_EMOJI_MUTED_SPEAKER : ICON_EMOJI_MEDIUM_VOLUME_SPEAKER, + "AudioControlHotkey", + g_settings.cdrom_mute_cd_audio ? ICON_EMOJI_MUTED_SPEAKER : ICON_EMOJI_MEDIUM_VOLUME_SPEAKER, g_settings.cdrom_mute_cd_audio ? TRANSLATE_STR("OSDMessage", "CD Audio Muted.") : TRANSLATE_STR("OSDMessage", "CD Audio Unmuted."), 2.0f); diff --git a/src/core/save_state_version.h b/src/core/save_state_version.h index c6e6aceae..58c53690f 100644 --- a/src/core/save_state_version.h +++ b/src/core/save_state_version.h @@ -6,7 +6,7 @@ #include "common/types.h" static constexpr u32 SAVE_STATE_MAGIC = 0x43435544; -static constexpr u32 SAVE_STATE_VERSION = 72; +static constexpr u32 SAVE_STATE_VERSION = 73; static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42; static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 01662e5d4..1f7bb59be 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -228,6 +228,7 @@ void Settings::Load(SettingsInterface& si, SettingsInterface& controller_si) si.GetStringValue("GPU", "ForceVideoTiming", GetForceVideoTimingName(DEFAULT_FORCE_VIDEO_TIMING_MODE)).c_str()) .value_or(DEFAULT_FORCE_VIDEO_TIMING_MODE); gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false); + gpu_texture_cache = si.GetBoolValue("GPU", "EnableTextureCache", false); display_24bit_chroma_smoothing = si.GetBoolValue("GPU", "ChromaSmoothing24Bit", false); gpu_pgxp_enable = si.GetBoolValue("GPU", "PGXPEnable", false); gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true); @@ -433,16 +434,43 @@ void Settings::Load(SettingsInterface& si, SettingsInterface& controller_si) debugging.show_mdec_state = si.GetBoolValue("Debug", "ShowMDECState"); debugging.show_dma_state = si.GetBoolValue("Debug", "ShowDMAState"); + texture_replacements.enable_texture_replacements = + si.GetBoolValue("TextureReplacements", "EnableTextureReplacements", false); texture_replacements.enable_vram_write_replacements = si.GetBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", false); texture_replacements.preload_textures = si.GetBoolValue("TextureReplacements", "PreloadTextures", false); + texture_replacements.dump_textures = si.GetBoolValue("TextureReplacements", "DumpTextures", false); + texture_replacements.dump_replaced_textures = si.GetBoolValue("TextureReplacements", "DumpReplacedTextures", true); texture_replacements.dump_vram_writes = si.GetBoolValue("TextureReplacements", "DumpVRAMWrites", false); - texture_replacements.dump_vram_write_force_alpha_channel = + + texture_replacements.config.dump_texture_pages = si.GetBoolValue("TextureReplacements", "DumpTexturePages", false); + texture_replacements.config.dump_full_texture_pages = + si.GetBoolValue("TextureReplacements", "DumpFullTexturePages", false); + texture_replacements.config.dump_texture_force_alpha_channel = + si.GetBoolValue("TextureReplacements", "DumpTextureForceAlphaChannel", false); + texture_replacements.config.dump_vram_write_force_alpha_channel = si.GetBoolValue("TextureReplacements", "DumpVRAMWriteForceAlphaChannel", true); - texture_replacements.dump_vram_write_width_threshold = - si.GetIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", 128); - texture_replacements.dump_vram_write_height_threshold = - si.GetIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", 128); + texture_replacements.config.dump_c16_textures = si.GetBoolValue("TextureReplacements", "DumpC16Textures", false); + texture_replacements.config.reduce_palette_range = si.GetBoolValue("TextureReplacements", "ReducePaletteRange", true); + texture_replacements.config.convert_copies_to_writes = + si.GetBoolValue("TextureReplacements", "ConvertCopiesToWrites", false); + texture_replacements.config.replacement_scale_linear_filter = + si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", true); + + texture_replacements.config.max_vram_write_splits = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", 0u); + texture_replacements.config.max_vram_write_coalesce_width = + si.GetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceWidth", 0u); + texture_replacements.config.max_vram_write_coalesce_height = + si.GetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceHeight", 0u); + + texture_replacements.config.texture_dump_width_threshold = + si.GetUIntValue("TextureReplacements", "DumpTextureWidthThreshold", 16); + texture_replacements.config.texture_dump_height_threshold = + si.GetUIntValue("TextureReplacements", "DumpTextureHeightThreshold", 16); + texture_replacements.config.vram_write_dump_width_threshold = + si.GetUIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", 128); + texture_replacements.config.vram_write_dump_height_threshold = + si.GetUIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", 128); #ifdef __ANDROID__ // Android users are incredibly silly and don't understand that stretch is in the aspect ratio list... @@ -526,6 +554,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetStringValue("GPU", "WireframeMode", GetGPUWireframeModeName(gpu_wireframe_mode)); si.SetStringValue("GPU", "ForceVideoTiming", GetForceVideoTimingName(gpu_force_video_timing)); si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack); + si.SetBoolValue("GPU", "EnableTextureCache", gpu_texture_cache); si.SetBoolValue("GPU", "ChromaSmoothing24Bit", display_24bit_chroma_smoothing); si.SetBoolValue("GPU", "PGXPEnable", gpu_pgxp_enable); si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling); @@ -677,16 +706,41 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetBoolValue("Debug", "ShowDMAState", debugging.show_dma_state); } + si.SetBoolValue("TextureReplacements", "EnableTextureReplacements", texture_replacements.enable_texture_replacements); si.SetBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", texture_replacements.enable_vram_write_replacements); si.SetBoolValue("TextureReplacements", "PreloadTextures", texture_replacements.preload_textures); si.SetBoolValue("TextureReplacements", "DumpVRAMWrites", texture_replacements.dump_vram_writes); + si.SetBoolValue("TextureReplacements", "DumpTextures", texture_replacements.dump_textures); + si.SetBoolValue("TextureReplacements", "DumpReplacedTextures", texture_replacements.dump_replaced_textures); + + si.SetBoolValue("TextureReplacements", "DumpTexturePages", texture_replacements.config.dump_texture_pages); + si.SetBoolValue("TextureReplacements", "DumpFullTexturePages", texture_replacements.config.dump_full_texture_pages); + si.SetBoolValue("TextureReplacements", "DumpTextureForceAlphaChannel", + texture_replacements.config.dump_texture_force_alpha_channel); + si.SetBoolValue("TextureReplacements", "DumpVRAMWriteForceAlphaChannel", - texture_replacements.dump_vram_write_force_alpha_channel); - si.SetIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", - texture_replacements.dump_vram_write_width_threshold); - si.SetIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", - texture_replacements.dump_vram_write_height_threshold); + texture_replacements.config.dump_vram_write_force_alpha_channel); + si.SetBoolValue("TextureReplacements", "DumpC16Textures", texture_replacements.config.dump_c16_textures); + si.SetBoolValue("TextureReplacements", "ReducePaletteRange", texture_replacements.config.reduce_palette_range); + si.SetBoolValue("TextureReplacements", "ConvertCopiesToWrites", texture_replacements.config.convert_copies_to_writes); + si.SetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", + texture_replacements.config.replacement_scale_linear_filter); + + si.SetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", texture_replacements.config.max_vram_write_splits); + si.SetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceWidth", + texture_replacements.config.max_vram_write_coalesce_width); + si.GetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceHeight", + texture_replacements.config.max_vram_write_coalesce_height); + + si.SetUIntValue("TextureReplacements", "DumpTextureWidthThreshold", + texture_replacements.config.texture_dump_width_threshold); + si.SetUIntValue("TextureReplacements", "DumpTextureHeightThreshold", + texture_replacements.config.texture_dump_height_threshold); + si.SetUIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", + texture_replacements.config.vram_write_dump_width_threshold); + si.SetUIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", + texture_replacements.config.vram_write_dump_height_threshold); } void Settings::Clear(SettingsInterface& si) @@ -716,6 +770,146 @@ void Settings::Clear(SettingsInterface& si) si.ClearSection("TextureReplacements"); } +bool Settings::TextureReplacementSettings::Configuration::operator==(const Configuration& rhs) const +{ + return (dump_texture_pages == rhs.dump_texture_pages && dump_full_texture_pages == rhs.dump_full_texture_pages && + dump_texture_force_alpha_channel == rhs.dump_texture_force_alpha_channel && + dump_vram_write_force_alpha_channel == rhs.dump_vram_write_force_alpha_channel && + dump_c16_textures == rhs.dump_c16_textures && reduce_palette_range == rhs.reduce_palette_range && + convert_copies_to_writes == rhs.convert_copies_to_writes && + replacement_scale_linear_filter == rhs.replacement_scale_linear_filter && + max_vram_write_splits == rhs.max_vram_write_splits && + max_vram_write_coalesce_width == rhs.max_vram_write_coalesce_width && + max_vram_write_coalesce_height == rhs.max_vram_write_coalesce_height && + texture_dump_width_threshold == rhs.texture_dump_width_threshold && + texture_dump_height_threshold == rhs.texture_dump_height_threshold && + vram_write_dump_width_threshold == rhs.vram_write_dump_width_threshold && + vram_write_dump_height_threshold == rhs.vram_write_dump_height_threshold); +} + +bool Settings::TextureReplacementSettings::Configuration::operator!=(const Configuration& rhs) const +{ + return !operator==(rhs); +} + +bool Settings::TextureReplacementSettings::operator==(const TextureReplacementSettings& rhs) const +{ + return (enable_texture_replacements == rhs.enable_texture_replacements && + enable_vram_write_replacements == rhs.enable_vram_write_replacements && + preload_textures == rhs.preload_textures && dump_textures == rhs.dump_textures && + dump_replaced_textures == rhs.dump_replaced_textures && dump_vram_writes == rhs.dump_vram_writes && + config == rhs.config); +} + +bool Settings::TextureReplacementSettings::operator!=(const TextureReplacementSettings& rhs) const +{ + return !operator==(rhs); +} + +std::string Settings::TextureReplacementSettings::Configuration::ExportToYAML(bool comment) const +{ + static constexpr const char CONFIG_TEMPLATE[] = R"(# DuckStation Texture Replacement Configuration +# This file allows you to set a per-game configuration for the dumping and +# replacement system, avoiding the need to use the normal per-game settings +# when moving files to a different computer. It also allows for the definition +# of texture aliases, for reducing duplicate files. +# +# All options are commented out by default. If an option is commented, the user's +# current setting will be used instead. If an option is defined in this file, it +# will always take precedence over the user's choice. + +# Enables texture page dumping mode. +# Instead of tracking VRAM writes and attempting to identify the "real" size of +# textures, create sub-rectangles from pages based on how they are drawn. In +# most games, this will lead to significant duplication in dumps, and reduce +# replacement reliability. However, some games are incompatible with write +# tracking, and must use page mode. +{}DumpTexturePages: {} + +# Dumps full texture pages instead of sub-rectangles. +# 256x256 pages will be dumped/replaced instead. +{}DumpFullTexturePages: {} + +# Enables the dumping of direct textures (i.e. C16 format). +# Most games do not use direct textures, and when they do, it is usually for +# post-processing or FMVs. Ignoring C16 textures typically reduces garbage/false +# positive texture dumps, however, some games may require it. +{}DumpC16Textures: {} + +# Reduces the size of palettes (i.e. CLUTs) to only those indices that are used. +# This can help reduce duplication and improve replacement reliability in games +# that use 8-bit textures, but do not reserve or use the full 1x256 region in +# video memory for storage of the palette. When replacing textures dumped with +# this option enabled, CPU usage on the GPU thread does increase trivially, +# however, generally it is worthwhile for the reliability improvement. Games +# that require this option include Metal Gear Solid. +{}ReducePaletteRange: {} + +# Converts VRAM copies to VRAM writes, when a copy of performed into a previously +# tracked VRAM write. This is required for some games that construct animated +# textures by copying and replacing small portions of the texture with the parts +# that are animated. Generally this option will cause duplication when dumping, +# but it is required in some games, such as Final Fantasy VIII. +{}ConvertCopiesToWrites: {} + +# Determines the maximum number of times a VRAM write/upload can be split, before +# it is discarded and no longer tracked. This is required for games that partially +# overwrite texture data, such as Gran Turismo. +{}MaxVRAMWriteSplits: {} + +# Determines the maximum size of an incoming VRAM write that will be merged with +# another write to the left/above of the incoming write. Needed for games that +# upload textures one line at a time. These games will log "tracking VRAM write +# of Nx1" repeatedly during loading. If the upload size is 1, then you can set +# the corresponding maximum coalesce dimension to 1 to merge these uploads, +# which should enable these textures to be dumped/replaced. +{}MaxVRAMWriteCoalesceWidth: {} +{}MaxVRAMWriteCoalesceHeight: {} + +# Determines the minimum size of a texture that will be dumped. Textures with a +# width/height smaller than this value will be ignored. +{}DumpTextureWidthThreshold: {} +{}DumpTextureHeightThreshold: {} + +# Determines the minimum size of a VRAM write that will be dumped, in background +# dumping mode. Uploads smaller than this size will be ignored. +{}DumpVRAMWriteWidthThreshold: {} +{}DumpVRAMWriteHeightThreshold: {} + +# Enables the use of a bilinear filter when scaling replacement textures. +# If more than one replacement texture in a 256x256 texture page has a different +# scaling over the native resolution, or the texture page is not covered, a +# bilinear filter will be used to resize/stretch the replacement texture, and/or +# the original native data. +{}ReplacementScaleLinearFilter: {} + +# Use this section to define replacement aliases. One line per replacement +# texture, with the key set to the source ID, and the value set to the filename +# which should be loaded as a replacement. For example, without the newline, +# or keep the multi-line separator. +#Aliases: + # Alias-Texture-Name: Path-To-Texture + # texupload-P4-AAAAAAAAAAAAAAAA-BBBBBBBBBBBBBBBB-64x256-0-192-64x64-P0-14: | + # texupload-P4-BBBBBBBBBBBBBBBB-BBBBBBBBBBBBBBBB-64x256-0-64-64x64-P0-13.png + # texupload-P4-AAAAAAAAAAAAAAAA-BBBBBBBBBBBBBBBB-64x256-0-192-64x64-P0-14: mytexture.png +)"; + + const std::string_view comment_str = comment ? "#" : ""; + return fmt::format(CONFIG_TEMPLATE, comment_str, dump_texture_pages, // DumpTexturePages + comment_str, dump_full_texture_pages, // DumpFullTexturePages + comment_str, dump_c16_textures, // DumpC16Textures + comment_str, reduce_palette_range, // ReducePaletteRange + comment_str, convert_copies_to_writes, // ConvertCopiesToWrites + comment_str, max_vram_write_splits, // MaxVRAMWriteSplits + comment_str, max_vram_write_coalesce_width, // MaxVRAMWriteCoalesceWidth + comment_str, max_vram_write_coalesce_height, // MaxVRAMWriteCoalesceHeight + comment_str, texture_dump_width_threshold, // DumpTextureWidthThreshold + comment_str, texture_dump_height_threshold, // DumpTextureHeightThreshold + comment_str, vram_write_dump_width_threshold, // DumpVRAMWriteWidthThreshold + comment_str, vram_write_dump_height_threshold, // DumpVRAMWriteHeightThreshold + comment_str, replacement_scale_linear_filter); // ReplacementScaleLinearFilter +} + void Settings::FixIncompatibleSettings(bool display_osd_messages) { if (g_settings.disable_all_enhancements) @@ -2043,7 +2237,6 @@ bool EmuFolders::EnsureFoldersExist() result = FileSystem::EnsureDirectoryExists(Covers.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(Dumps.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(Path::Combine(Dumps, "audio").c_str(), false) && result; - result = FileSystem::EnsureDirectoryExists(Path::Combine(Dumps, "textures").c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(GameIcons.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(GameSettings.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(InputProfiles.c_str(), false) && result; diff --git a/src/core/settings.h b/src/core/settings.h index 5ddcfc381..d1c9568b4 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -117,6 +117,7 @@ struct Settings bool gpu_force_round_texcoords : 1 = false; bool gpu_accurate_blending : 1 = false; bool gpu_widescreen_hack : 1 = false; + bool gpu_texture_cache : 1 = false; bool gpu_pgxp_enable : 1 = false; bool gpu_pgxp_culling : 1 = true; bool gpu_pgxp_texture_correction : 1 = true; @@ -239,20 +240,46 @@ struct Settings // texture replacements struct TextureReplacementSettings { + struct Configuration + { + constexpr Configuration() = default; + + bool dump_texture_pages : 1 = false; + bool dump_full_texture_pages : 1 = false; + bool dump_texture_force_alpha_channel : 1 = false; + bool dump_vram_write_force_alpha_channel : 1 = true; + bool dump_c16_textures : 1 = false; + bool reduce_palette_range : 1 = true; + bool convert_copies_to_writes : 1 = false; + bool replacement_scale_linear_filter = true; + + u32 max_vram_write_splits = 0; + u32 max_vram_write_coalesce_width = 0; + u32 max_vram_write_coalesce_height = 0; + u32 texture_dump_width_threshold = 16; + u32 texture_dump_height_threshold = 16; + + u32 vram_write_dump_width_threshold = 128; + u32 vram_write_dump_height_threshold = 128; + + bool operator==(const Configuration& rhs) const; + bool operator!=(const Configuration& rhs) const; + + std::string ExportToYAML(bool comment) const; + }; + + bool enable_texture_replacements : 1 = false; bool enable_vram_write_replacements : 1 = false; bool preload_textures : 1 = false; + bool dump_textures : 1 = false; + bool dump_replaced_textures : 1 = true; bool dump_vram_writes : 1 = false; - bool dump_vram_write_force_alpha_channel : 1 = true; - u32 dump_vram_write_width_threshold = 128; - u32 dump_vram_write_height_threshold = 128; - ALWAYS_INLINE bool AnyReplacementsEnabled() const { return enable_vram_write_replacements; } + Configuration config; - ALWAYS_INLINE bool ShouldDumpVRAMWrite(u32 width, u32 height) - { - return dump_vram_writes && width >= dump_vram_write_width_threshold && height >= dump_vram_write_height_threshold; - } + bool operator==(const TextureReplacementSettings& rhs) const; + bool operator!=(const TextureReplacementSettings& rhs) const; } texture_replacements; bool bios_tty_logging : 1 = false; @@ -345,8 +372,6 @@ struct Settings DEFAULT_DMA_HALT_TICKS = 100, DEFAULT_GPU_FIFO_SIZE = 16, DEFAULT_GPU_MAX_RUN_AHEAD = 128, - DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD = 128, - DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD = 128, }; void Load(SettingsInterface& si, SettingsInterface& controller_si); diff --git a/src/core/system.cpp b/src/core/system.cpp index 0a432dd5d..4bb2f300c 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -16,6 +16,7 @@ #include "game_database.h" #include "game_list.h" #include "gpu.h" +#include "gpu_hw_texture_cache.h" #include "gte.h" #include "host.h" #include "host_interface_progress_callback.h" @@ -30,7 +31,6 @@ #include "save_state_version.h" #include "sio.h" #include "spu.h" -#include "texture_replacements.h" #include "timers.h" #include "scmversion/scmversion.h" @@ -1794,7 +1794,7 @@ bool System::BootSystem(SystemBootParameters parameters, Error* error) // Texture replacement preloading. // TODO: Move this and everything else below OnSystemStarted(). - TextureReplacements::SetGameID(s_running_game_serial); + GPUTextureCache::SetGameID(s_running_game_serial); // Good to go. s_state = State::Running; @@ -1969,8 +1969,6 @@ void System::DestroySystem() ClearMemorySaveStates(); - TextureReplacements::Shutdown(); - PCDrv::Shutdown(); SIO::Shutdown(); MDEC::Shutdown(); @@ -1984,6 +1982,7 @@ void System::DestroySystem() CPU::Shutdown(); Bus::Shutdown(); TimingEvents::Shutdown(); + GPUTextureCache::Shutdown(); ClearRunningGame(); // Restore present-all-frames behavior. @@ -4078,7 +4077,7 @@ void System::UpdateRunningGame(const std::string_view path, CDImage* image, bool } if (!booting) - TextureReplacements::SetGameID(s_running_game_serial); + GPUTextureCache::SetGameID(s_running_game_serial); if (booting) Achievements::ResetHardcoreMode(true); @@ -4389,6 +4388,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale || g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode || + g_settings.gpu_texture_cache != old_settings.gpu_texture_cache || g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing || g_settings.display_crop_mode != old_settings.display_crop_mode || @@ -4404,7 +4404,8 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.display_line_start_offset != old_settings.display_line_start_offset || g_settings.display_line_end_offset != old_settings.display_line_end_offset || g_settings.rewind_enable != old_settings.rewind_enable || - g_settings.runahead_frames != old_settings.runahead_frames) + g_settings.runahead_frames != old_settings.runahead_frames || + g_settings.texture_replacements.config != old_settings.texture_replacements.config) { g_gpu->UpdateSettings(old_settings); if (IsPaused()) @@ -4455,13 +4456,6 @@ void System::CheckForSettingsChanges(const Settings& old_settings) UpdateMemorySaveStateSettings(); } - if (g_settings.texture_replacements.enable_vram_write_replacements != - old_settings.texture_replacements.enable_vram_write_replacements || - g_settings.texture_replacements.preload_textures != old_settings.texture_replacements.preload_textures) - { - TextureReplacements::Reload(); - } - if (g_settings.audio_backend != old_settings.audio_backend || g_settings.emulation_speed != old_settings.emulation_speed || g_settings.fast_forward_speed != old_settings.fast_forward_speed || @@ -4576,53 +4570,66 @@ void System::WarnAboutUnsafeSettings() LargeString messages; auto append = [&messages](const char* icon, std::string_view msg) { messages.append_format("{} {}\n", icon, msg); }; - if (!g_settings.disable_all_enhancements && ImGuiManager::IsShowingOSDMessages()) + if (!g_settings.disable_all_enhancements) { - if (g_settings.cpu_overclock_active) + if (ImGuiManager::IsShowingOSDMessages()) { - append(ICON_EMOJI_WARNING, - SmallString::from_format( - TRANSLATE_FS("System", "CPU clock speed is set to {}% ({} / {}). This may crash games."), - g_settings.GetCPUOverclockPercent(), g_settings.cpu_overclock_numerator, - g_settings.cpu_overclock_denominator)); - } - if (g_settings.cdrom_read_speedup > 1) - { - append(ICON_EMOJI_WARNING, - SmallString::from_format( - TRANSLATE_FS("System", "CD-ROM read speedup set to {}x (effective speed {}x). This may crash games."), - g_settings.cdrom_read_speedup, g_settings.cdrom_read_speedup * 2)); - } - if (g_settings.cdrom_seek_speedup != 1) - { - append(ICON_EMOJI_WARNING, - SmallString::from_format(TRANSLATE_FS("System", "CD-ROM seek speedup set to {}. This may crash games."), - (g_settings.cdrom_seek_speedup == 0) ? - TinyString(TRANSLATE_SV("System", "Instant")) : - TinyString::from_format("{}x", g_settings.cdrom_seek_speedup))); - } - if (g_settings.gpu_force_video_timing != ForceVideoTimingMode::Disabled) - { - append(ICON_FA_TV, TRANSLATE_SV("System", "Force frame timings is enabled. Games may run at incorrect speeds.")); - } - if (!g_settings.IsUsingSoftwareRenderer()) - { - if (g_settings.gpu_multisamples != 1) + if (g_settings.cpu_overclock_active) { append(ICON_EMOJI_WARNING, - TRANSLATE_SV("System", "Multisample anti-aliasing is enabled, some games may not render correctly.")); + SmallString::from_format( + TRANSLATE_FS("System", "CPU clock speed is set to {}% ({} / {}). This may crash games."), + g_settings.GetCPUOverclockPercent(), g_settings.cpu_overclock_numerator, + g_settings.cpu_overclock_denominator)); } - if (g_settings.gpu_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) + if (g_settings.cdrom_read_speedup > 1) { - append( - ICON_EMOJI_WARNING, - TRANSLATE_SV("System", "Round upscaled texture coordinates is enabled. This may cause rendering errors.")); + append(ICON_EMOJI_WARNING, + SmallString::from_format( + TRANSLATE_FS("System", "CD-ROM read speedup set to {}x (effective speed {}x). This may crash games."), + g_settings.cdrom_read_speedup, g_settings.cdrom_read_speedup * 2)); + } + if (g_settings.cdrom_seek_speedup != 1) + { + append(ICON_EMOJI_WARNING, + SmallString::from_format(TRANSLATE_FS("System", "CD-ROM seek speedup set to {}. This may crash games."), + (g_settings.cdrom_seek_speedup == 0) ? + TinyString(TRANSLATE_SV("System", "Instant")) : + TinyString::from_format("{}x", g_settings.cdrom_seek_speedup))); + } + if (g_settings.gpu_force_video_timing != ForceVideoTimingMode::Disabled) + { + append(ICON_FA_TV, + TRANSLATE_SV("System", "Force frame timings is enabled. Games may run at incorrect speeds.")); + } + if (!g_settings.IsUsingSoftwareRenderer()) + { + if (g_settings.gpu_multisamples != 1) + { + append(ICON_EMOJI_WARNING, + TRANSLATE_SV("System", "Multisample anti-aliasing is enabled, some games may not render correctly.")); + } + if (g_settings.gpu_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) + { + append( + ICON_EMOJI_WARNING, + TRANSLATE_SV("System", "Round upscaled texture coordinates is enabled. This may cause rendering errors.")); + } + } + if (g_settings.enable_8mb_ram) + { + append(ICON_EMOJI_WARNING, + TRANSLATE_SV("System", "8MB RAM is enabled, this may be incompatible with some games.")); } } - if (g_settings.enable_8mb_ram) + + // Always display TC warning. + if (g_settings.gpu_texture_cache) { - append(ICON_EMOJI_WARNING, - TRANSLATE_SV("System", "8MB RAM is enabled, this may be incompatible with some games.")); + append( + ICON_FA_PAINT_ROLLER, + TRANSLATE_SV("System", + "Texture cache is enabled. This feature is experimental, some games may not render correctly.")); } } diff --git a/src/core/texture_replacements.cpp b/src/core/texture_replacements.cpp deleted file mode 100644 index 8b518e749..000000000 --- a/src/core/texture_replacements.cpp +++ /dev/null @@ -1,335 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#include "texture_replacements.h" -#include "gpu_types.h" -#include "host.h" -#include "settings.h" - -#include "common/bitutils.h" -#include "common/file_system.h" -#include "common/hash_combine.h" -#include "common/log.h" -#include "common/path.h" -#include "common/string_util.h" -#include "common/timer.h" - -#include "fmt/format.h" -#include "xxhash.h" -#if defined(CPU_ARCH_X86) || defined(CPU_ARCH_X64) -#include "xxh_x86dispatch.h" -#endif - -#include -#include -#include -#include - -LOG_CHANNEL(TextureReplacements); - -namespace TextureReplacements { -namespace { -struct VRAMReplacementHash -{ - u64 low; - u64 high; - - TinyString ToString() const; - bool ParseString(std::string_view sv); - - bool operator<(const VRAMReplacementHash& rhs) const { return std::tie(low, high) < std::tie(rhs.low, rhs.high); } - bool operator==(const VRAMReplacementHash& rhs) const { return low == rhs.low && high == rhs.high; } - bool operator!=(const VRAMReplacementHash& rhs) const { return low != rhs.low || high != rhs.high; } -}; - -struct VRAMReplacementHashMapHash -{ - size_t operator()(const VRAMReplacementHash& hash) const; -}; -} // namespace - -using VRAMWriteReplacementMap = std::unordered_map; -using TextureCache = std::unordered_map; - -static bool ParseReplacementFilename(const std::string& filename, VRAMReplacementHash* replacement_hash, - ReplacmentType* replacement_type); - -static std::string GetSourceDirectory(); -static std::string GetDumpDirectory(); - -static VRAMReplacementHash GetVRAMWriteHash(u32 width, u32 height, const void* pixels); -static std::string GetVRAMWriteDumpFilename(u32 width, u32 height, const void* pixels); - -static void FindTextures(const std::string& dir); - -static const ReplacementImage* LoadTexture(const std::string& filename); -static void PreloadTextures(); -static void PurgeUnreferencedTexturesFromCache(); - -static std::string s_game_id; - -// TODO: Check the size, purge some when it gets too large. -static TextureCache s_texture_cache; - -static VRAMWriteReplacementMap s_vram_write_replacements; -} // namespace TextureReplacements - -size_t TextureReplacements::VRAMReplacementHashMapHash::operator()(const VRAMReplacementHash& hash) const -{ - size_t hash_hash = std::hash{}(hash.low); - hash_combine(hash_hash, hash.high); - return hash_hash; -} - -TinyString TextureReplacements::VRAMReplacementHash::ToString() const -{ - return TinyString::from_format("{:08X}{:08X}", high, low); -} - -bool TextureReplacements::VRAMReplacementHash::ParseString(std::string_view sv) -{ - if (sv.length() != 32) - return false; - - std::optional high_value = StringUtil::FromChars(sv.substr(0, 16), 16); - std::optional low_value = StringUtil::FromChars(sv.substr(16), 16); - if (!high_value.has_value() || !low_value.has_value()) - return false; - - low = low_value.value(); - high = high_value.value(); - return true; -} - -void TextureReplacements::SetGameID(std::string game_id) -{ - if (s_game_id == game_id) - return; - - s_game_id = game_id; - Reload(); -} - -const TextureReplacements::ReplacementImage* TextureReplacements::GetVRAMReplacement(u32 width, u32 height, - const void* pixels) -{ - const VRAMReplacementHash hash = GetVRAMWriteHash(width, height, pixels); - - const auto it = s_vram_write_replacements.find(hash); - if (it == s_vram_write_replacements.end()) - return nullptr; - - return LoadTexture(it->second); -} - -void TextureReplacements::DumpVRAMWrite(u32 width, u32 height, const void* pixels) -{ - const std::string filename = GetVRAMWriteDumpFilename(width, height, pixels); - if (filename.empty()) - return; - - RGBA8Image image; - image.SetSize(width, height); - - const u16* src_pixels = reinterpret_cast(pixels); - - for (u32 y = 0; y < height; y++) - { - for (u32 x = 0; x < width; x++) - { - image.SetPixel(x, y, VRAMRGBA5551ToRGBA8888(*src_pixels)); - src_pixels++; - } - } - - if (g_settings.texture_replacements.dump_vram_write_force_alpha_channel) - { - for (u32 y = 0; y < height; y++) - { - for (u32 x = 0; x < width; x++) - image.SetPixel(x, y, image.GetPixel(x, y) | 0xFF000000u); - } - } - - INFO_LOG("Dumping {}x{} VRAM write to '{}'", width, height, Path::GetFileName(filename)); - if (!image.SaveToFile(filename.c_str())) [[unlikely]] - ERROR_LOG("Failed to dump {}x{} VRAM write to '{}'", width, height, filename); -} - -void TextureReplacements::Shutdown() -{ - s_texture_cache.clear(); - s_vram_write_replacements.clear(); - s_game_id.clear(); -} - -// TODO: Organize into PCSX2-style. -std::string TextureReplacements::GetSourceDirectory() -{ - return Path::Combine(EmuFolders::Textures, s_game_id); -} - -std::string TextureReplacements::GetDumpDirectory() -{ - return Path::Combine(EmuFolders::Dumps, Path::Combine("textures", s_game_id)); -} - -TextureReplacements::VRAMReplacementHash TextureReplacements::GetVRAMWriteHash(u32 width, u32 height, - const void* pixels) -{ - XXH128_hash_t hash = XXH3_128bits(pixels, width * height * sizeof(u16)); - return {hash.low64, hash.high64}; -} - -std::string TextureReplacements::GetVRAMWriteDumpFilename(u32 width, u32 height, const void* pixels) -{ - if (s_game_id.empty()) - return {}; - - const VRAMReplacementHash hash = GetVRAMWriteHash(width, height, pixels); - const std::string dump_directory(GetDumpDirectory()); - std::string filename(Path::Combine(dump_directory, fmt::format("vram-write-{}.png", hash.ToString()))); - - if (FileSystem::FileExists(filename.c_str())) - return {}; - - if (!FileSystem::EnsureDirectoryExists(dump_directory.c_str(), false)) - return {}; - - return filename; -} - -void TextureReplacements::Reload() -{ - s_vram_write_replacements.clear(); - - if (g_settings.texture_replacements.AnyReplacementsEnabled()) - FindTextures(GetSourceDirectory()); - - if (g_settings.texture_replacements.preload_textures) - PreloadTextures(); - - PurgeUnreferencedTexturesFromCache(); -} - -void TextureReplacements::PurgeUnreferencedTexturesFromCache() -{ - TextureCache old_map = std::move(s_texture_cache); - s_texture_cache = {}; - - for (const auto& it : s_vram_write_replacements) - { - auto it2 = old_map.find(it.second); - if (it2 != old_map.end()) - { - s_texture_cache[it.second] = std::move(it2->second); - old_map.erase(it2); - } - } -} - -bool TextureReplacements::ParseReplacementFilename(const std::string& filename, VRAMReplacementHash* replacement_hash, - ReplacmentType* replacement_type) -{ - const std::string_view file_title = Path::GetFileTitle(filename); - if (!file_title.starts_with("vram-write-")) - return false; - - const std::string_view hashpart = file_title.substr(11); - if (!replacement_hash->ParseString(hashpart)) - return false; - - const std::string_view file_extension = Path::GetExtension(filename); - bool valid_extension = false; - for (const char* test_extension : {"png", "jpg", "webp"}) - { - if (StringUtil::EqualNoCase(file_extension, test_extension)) - { - valid_extension = true; - break; - } - } - - *replacement_type = ReplacmentType::VRAMWrite; - return valid_extension; -} - -void TextureReplacements::FindTextures(const std::string& dir) -{ - FileSystem::FindResultsArray files; - FileSystem::FindFiles(dir.c_str(), "*", FILESYSTEM_FIND_FILES | FILESYSTEM_FIND_RECURSIVE, &files); - - for (FILESYSTEM_FIND_DATA& fd : files) - { - if (fd.Attributes & FILESYSTEM_FILE_ATTRIBUTE_DIRECTORY) - continue; - - VRAMReplacementHash hash; - ReplacmentType type; - if (!ParseReplacementFilename(fd.FileName, &hash, &type)) - continue; - - switch (type) - { - case ReplacmentType::VRAMWrite: - { - auto it = s_vram_write_replacements.find(hash); - if (it != s_vram_write_replacements.end()) - { - WARNING_LOG("Duplicate VRAM write replacement: '{}' and '{}'", it->second, fd.FileName); - continue; - } - - s_vram_write_replacements.emplace(hash, std::move(fd.FileName)); - } - break; - } - } - - INFO_LOG("Found {} replacement VRAM writes for '{}'", s_vram_write_replacements.size(), s_game_id); -} - -const TextureReplacements::ReplacementImage* TextureReplacements::LoadTexture(const std::string& filename) -{ - auto it = s_texture_cache.find(filename); - if (it != s_texture_cache.end()) - return &it->second; - - RGBA8Image image; - if (!image.LoadFromFile(filename.c_str())) - { - ERROR_LOG("Failed to load '{}'", Path::GetFileName(filename)); - return nullptr; - } - - INFO_LOG("Loaded '{}': {}x{}", Path::GetFileName(filename), image.GetWidth(), image.GetHeight()); - it = s_texture_cache.emplace(filename, std::move(image)).first; - return &it->second; -} - -void TextureReplacements::PreloadTextures() -{ - static constexpr float UPDATE_INTERVAL = 1.0f; - - Common::Timer last_update_time; - u32 num_textures_loaded = 0; - const u32 total_textures = static_cast(s_vram_write_replacements.size()); - -#define UPDATE_PROGRESS() \ - if (last_update_time.GetTimeSeconds() >= UPDATE_INTERVAL) \ - { \ - Host::DisplayLoadingScreen("Preloading replacement textures...", 0, static_cast(total_textures), \ - static_cast(num_textures_loaded)); \ - last_update_time.Reset(); \ - } - - for (const auto& it : s_vram_write_replacements) - { - UPDATE_PROGRESS(); - - LoadTexture(it.second); - num_textures_loaded++; - } - -#undef UPDATE_PROGRESS -} diff --git a/src/core/texture_replacements.h b/src/core/texture_replacements.h deleted file mode 100644 index acac05a36..000000000 --- a/src/core/texture_replacements.h +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -#pragma once - -#include "types.h" - -#include "util/image.h" - -#include - -namespace TextureReplacements { - -using ReplacementImage = RGBA8Image; - -enum class ReplacmentType -{ - VRAMWrite, -}; - -void SetGameID(std::string game_id); - -void Reload(); - -const ReplacementImage* GetVRAMReplacement(u32 width, u32 height, const void* pixels); -void DumpVRAMWrite(u32 width, u32 height, const void* pixels); - -void Shutdown(); - -} // namespace TextureReplacements diff --git a/src/duckstation-qt/CMakeLists.txt b/src/duckstation-qt/CMakeLists.txt index fbf9ae3f4..034050eeb 100644 --- a/src/duckstation-qt/CMakeLists.txt +++ b/src/duckstation-qt/CMakeLists.txt @@ -143,6 +143,7 @@ set(SRCS setupwizarddialog.cpp setupwizarddialog.h setupwizarddialog.ui + texturereplacementsettingsdialog.ui ) set(TS_FILES diff --git a/src/duckstation-qt/duckstation-qt.vcxproj b/src/duckstation-qt/duckstation-qt.vcxproj index c4c424dfa..9a5d77201 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj +++ b/src/duckstation-qt/duckstation-qt.vcxproj @@ -339,6 +339,9 @@ Document + + Document + diff --git a/src/duckstation-qt/duckstation-qt.vcxproj.filters b/src/duckstation-qt/duckstation-qt.vcxproj.filters index 7417689d1..c46c6434f 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj.filters +++ b/src/duckstation-qt/duckstation-qt.vcxproj.filters @@ -284,6 +284,7 @@ + diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index a07b13ef4..b8a13a383 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -5,13 +5,21 @@ #include "qtutils.h" #include "settingswindow.h" #include "settingwidgetbinder.h" +#include "ui_texturereplacementsettingsdialog.h" #include "core/game_database.h" #include "core/gpu.h" #include "core/settings.h" +#include "util/ini_settings_interface.h" #include "util/media_capture.h" +#include "common/error.h" + +#include +#include +#include +#include #include static QVariant GetMSAAModeValue(uint multisamples, bool ssaa) @@ -232,26 +240,50 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* // Texture Replacements Tab - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vramWriteReplacement, "TextureReplacements", - "EnableVRAMWriteReplacements", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureCache, "GPU", "EnableTextureCache", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useOldMDECRoutines, "Hacks", "UseOldMDECRoutines", false); + + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureReplacements, "TextureReplacements", + "EnableTextureReplacements", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.preloadTextureReplacements, "TextureReplacements", "PreloadTextures", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useOldMDECRoutines, "Hacks", "UseOldMDECRoutines", false); + + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureDumping, "TextureReplacements", "DumpTextures", + false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.dumpReplacedTextures, "TextureReplacements", + "DumpReplacedTextures", true); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vramWriteReplacement, "TextureReplacements", + "EnableVRAMWriteReplacements", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vramWriteDumping, "TextureReplacements", "DumpVRAMWrites", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.setVRAMWriteAlphaChannel, "TextureReplacements", - "DumpVRAMWriteForceAlphaChannel", true); - SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.minDumpedVRAMWriteWidth, "TextureReplacements", - "DumpVRAMWriteWidthThreshold", - Settings::DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD); - SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.minDumpedVRAMWriteHeight, "TextureReplacements", - "DumpVRAMWriteHeightThreshold", - Settings::DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD); + if (!m_dialog->isPerGameSettings()) + { + SettingWidgetBinder::BindWidgetToFolderSetting(sif, m_ui.texturesDirectory, m_ui.texturesDirectoryBrowse, + tr("Select Textures Directory"), m_ui.texturesDirectoryOpen, + m_ui.texturesDirectoryReset, "Folders", "Textures", + Path::Combine(EmuFolders::DataRoot, "textures")); + } + else + { + m_ui.tabTextureReplacementsLayout->removeWidget(m_ui.texturesDirectoryGroup); + delete m_ui.texturesDirectoryGroup; + m_ui.texturesDirectoryGroup = nullptr; + m_ui.texturesDirectoryBrowse = nullptr; + m_ui.texturesDirectoryOpen = nullptr; + m_ui.texturesDirectoryReset = nullptr; + m_ui.texturesDirectoryLabel = nullptr; + m_ui.texturesDirectory = nullptr; + } + + connect(m_ui.enableTextureCache, &QCheckBox::checkStateChanged, this, + &GraphicsSettingsWidget::onEnableTextureCacheChanged); + connect(m_ui.enableTextureReplacements, &QCheckBox::checkStateChanged, this, + &GraphicsSettingsWidget::onEnableAnyTextureReplacementsChanged); connect(m_ui.vramWriteReplacement, &QCheckBox::checkStateChanged, this, &GraphicsSettingsWidget::onEnableAnyTextureReplacementsChanged); - connect(m_ui.vramWriteDumping, &QCheckBox::checkStateChanged, this, - &GraphicsSettingsWidget::onEnableVRAMWriteDumpingChanged); + connect(m_ui.textureReplacementOptions, &QPushButton::clicked, this, + &GraphicsSettingsWidget::onTextureReplacementOptionsClicked); // Debugging Tab @@ -274,8 +306,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* onMediaCaptureBackendChanged(); onMediaCaptureAudioEnabledChanged(); onMediaCaptureVideoEnabledChanged(); + onEnableTextureCacheChanged(); onEnableAnyTextureReplacementsChanged(); - onEnableVRAMWriteDumpingChanged(); onShowDebugSettingsChanged(QtHost::ShouldShowDebugOptions()); // Rendering Tab @@ -534,23 +566,26 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* "separate two pairs from each other.
For example: \"compression_level = 4 : joint_stereo = 1\"")); // Texture Replacements Tab - - dialog->registerWidgetHelp(m_ui.vramWriteReplacement, tr("Enable VRAM Write Replacement"), tr("Unchecked"), - tr("Enables the replacement of background textures in supported games. This is " - "not general texture replacement.")); - dialog->registerWidgetHelp(m_ui.preloadTextureReplacements, tr("Preload Texture Replacements"), tr("Unchecked"), - tr("Loads all replacement texture to RAM, reducing stuttering at runtime.")); + dialog->registerWidgetHelp(m_ui.enableTextureCache, tr("Enable Texture Cache"), tr("Unchecked"), + tr("Enables caching of guest textures, required for texture replacement.")); dialog->registerWidgetHelp(m_ui.useOldMDECRoutines, tr("Use Old MDEC Routines"), tr("Unchecked"), tr("Enables the older, less accurate MDEC decoding routines. May be required for old " "replacement backgrounds to match/load.")); - dialog->registerWidgetHelp(m_ui.setVRAMWriteAlphaChannel, tr("Set Alpha Channel"), tr("Checked"), - tr("Clears the mask/transparency bit in VRAM write dumps.")); + + dialog->registerWidgetHelp(m_ui.enableTextureReplacements, tr("Enable Texture Replacements"), tr("Unchecked"), + tr("Enables loading of replacement textures. Not compatible with all games.")); + dialog->registerWidgetHelp(m_ui.preloadTextureReplacements, tr("Preload Texture Replacements"), tr("Unchecked"), + tr("Loads all replacement texture to RAM, reducing stuttering at runtime.")); + dialog->registerWidgetHelp( + m_ui.enableTextureDumping, tr("Enable Texture Dumping"), tr("Unchecked"), + tr("Enables dumping of textures to image files, which can be replaced. Not compatible with all games.")); + dialog->registerWidgetHelp(m_ui.dumpReplacedTextures, tr("Dump Replaced Textures"), tr("Unchecked"), + tr("Dumps textures that have replacements already loaded.")); + + dialog->registerWidgetHelp(m_ui.vramWriteReplacement, tr("Enable VRAM Write Replacement"), tr("Unchecked"), + tr("Enables the replacement of background textures in supported games.")); dialog->registerWidgetHelp(m_ui.vramWriteDumping, tr("Enable VRAM Write Dumping"), tr("Unchecked"), tr("Writes backgrounds that can be replaced to the dump directory.")); - dialog->registerWidgetHelp(m_ui.minDumpedVRAMWriteWidth, tr("Dump Size Threshold"), tr("128px"), - tr("Determines the threshold that triggers a VRAM write to be dumped.")); - dialog->registerWidgetHelp(m_ui.minDumpedVRAMWriteHeight, tr("Dump Size Threshold"), tr("128px"), - tr("Determines the threshold that triggers a VRAM write to be dumped.")); // Debugging Tab @@ -1097,19 +1132,120 @@ void GraphicsSettingsWidget::onMediaCaptureAudioEnabledChanged() m_ui.audioCaptureArguments->setEnabled(enabled); } +void GraphicsSettingsWidget::onEnableTextureCacheChanged() +{ + const bool tc_enabled = m_dialog->getEffectiveBoolValue("GPU", "EnableTextureCache", false); + m_ui.enableTextureReplacements->setEnabled(tc_enabled); + m_ui.enableTextureDumping->setEnabled(tc_enabled); +} + void GraphicsSettingsWidget::onEnableAnyTextureReplacementsChanged() { const bool any_replacements_enabled = - m_dialog->getEffectiveBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", false); + (m_dialog->getEffectiveBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", false) || + (m_dialog->getEffectiveBoolValue("GPU", "EnableTextureCache", false) && + m_dialog->getEffectiveBoolValue("TextureReplacements", "EnableTextureReplacements", false))); m_ui.preloadTextureReplacements->setEnabled(any_replacements_enabled); } -void GraphicsSettingsWidget::onEnableVRAMWriteDumpingChanged() +void GraphicsSettingsWidget::onTextureReplacementOptionsClicked() { - const bool enabled = m_dialog->getEffectiveBoolValue("TextureReplacements", "DumpVRAMWrites", false); - m_ui.setVRAMWriteAlphaChannel->setEnabled(enabled); - m_ui.minDumpedVRAMWriteWidth->setEnabled(enabled); - m_ui.minDumpedVRAMWriteHeight->setEnabled(enabled); - m_ui.vramWriteDumpThresholdLabel->setEnabled(enabled); - m_ui.vramWriteDumpThresholdSeparator->setEnabled(enabled); + QDialog dlg(QtUtils::GetRootWidget(this)); + + Ui::TextureReplacementSettingsDialog dlgui; + dlgui.setupUi(&dlg); + dlgui.icon->setPixmap(QIcon::fromTheme(QStringLiteral("image-fill")).pixmap(32, 32)); + + constexpr Settings::TextureReplacementSettings::Configuration default_replacement_config; + SettingsInterface* const sif = m_dialog->getSettingsInterface(); + + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.dumpTexturePages, "TextureReplacements", "DumpTexturePages", + default_replacement_config.dump_texture_pages); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.dumpFullTexturePages, "TextureReplacements", + "DumpFullTexturePages", + default_replacement_config.dump_full_texture_pages); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.dumpC16Textures, "TextureReplacements", "DumpC16Textures", + default_replacement_config.dump_c16_textures); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.reducePaletteRange, "TextureReplacements", + "ReducePaletteRange", default_replacement_config.reduce_palette_range); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.convertCopiesToWrites, "TextureReplacements", + "ConvertCopiesToWrites", + default_replacement_config.convert_copies_to_writes); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteSplits, "TextureReplacements", + "MaxVRAMWriteSplits", default_replacement_config.max_vram_write_splits); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteCoalesceWidth, "TextureReplacements", + "MaxVRAMWriteCoalesceWidth", + default_replacement_config.max_vram_write_coalesce_width); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteCoalesceHeight, "TextureReplacements", + "MaxVRAMWriteCoalesceHeight", + default_replacement_config.max_vram_write_coalesce_height); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedTextureWidth, "TextureReplacements", + "DumpTextureWidthThreshold", + default_replacement_config.texture_dump_width_threshold); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedTextureHeight, "TextureReplacements", + "DumpTextureHeightThreshold", + default_replacement_config.texture_dump_height_threshold); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.setTextureDumpAlphaChannel, "TextureReplacements", + "DumpTextureForceAlphaChannel", + default_replacement_config.dump_texture_force_alpha_channel); + + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedVRAMWriteWidth, "TextureReplacements", + "DumpVRAMWriteWidthThreshold", + default_replacement_config.vram_write_dump_width_threshold); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedVRAMWriteHeight, "TextureReplacements", + "DumpVRAMWriteHeightThreshold", + default_replacement_config.vram_write_dump_height_threshold); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.setVRAMWriteAlphaChannel, "TextureReplacements", + "DumpVRAMWriteForceAlphaChannel", + default_replacement_config.dump_vram_write_force_alpha_channel); + + dlgui.dumpFullTexturePages->setEnabled( + m_dialog->getEffectiveBoolValue("TextureReplacements", "DumpTexturePages", false)); + connect(dlgui.dumpTexturePages, &QCheckBox::checkStateChanged, this, [this, full_cb = dlgui.dumpFullTexturePages]() { + full_cb->setEnabled(m_dialog->getEffectiveBoolValue("TextureReplacements", "DumpTexturePages", false)); + }); + connect(dlgui.closeButton, &QPushButton::clicked, &dlg, &QDialog::accept); + connect(dlgui.exportButton, &QPushButton::clicked, &dlg, [&dlg, &dlgui]() { + Settings::TextureReplacementSettings::Configuration config; + + config.dump_texture_pages = dlgui.dumpTexturePages->isChecked(); + config.dump_full_texture_pages = dlgui.dumpFullTexturePages->isChecked(); + config.dump_c16_textures = dlgui.dumpC16Textures->isChecked(); + config.reduce_palette_range = dlgui.reducePaletteRange->isChecked(); + config.convert_copies_to_writes = dlgui.convertCopiesToWrites->isChecked(); + config.max_vram_write_splits = dlgui.maxVRAMWriteSplits->value(); + config.max_vram_write_coalesce_width = dlgui.maxVRAMWriteCoalesceWidth->value(); + config.max_vram_write_coalesce_height = dlgui.maxVRAMWriteCoalesceHeight->value(); + config.texture_dump_width_threshold = dlgui.minDumpedTextureWidth->value(); + config.texture_dump_height_threshold = dlgui.minDumpedTextureHeight->value(); + config.dump_texture_force_alpha_channel = dlgui.setTextureDumpAlphaChannel->isChecked(); + config.vram_write_dump_width_threshold = dlgui.minDumpedVRAMWriteWidth->value(); + config.vram_write_dump_height_threshold = dlgui.minDumpedVRAMWriteHeight->value(); + config.dump_vram_write_force_alpha_channel = dlgui.setTextureDumpAlphaChannel->isChecked(); + + QInputDialog idlg(&dlg); + idlg.resize(600, 400); + idlg.setWindowTitle(tr("Texture Replacement Configuration")); + idlg.setInputMode(QInputDialog::TextInput); + idlg.setOption(QInputDialog::UsePlainTextEditForTextInput); + idlg.setLabelText(tr("Texture Replacement Configuration (config.yaml)")); + idlg.setTextValue(QString::fromStdString(config.ExportToYAML(false))); + idlg.setOkButtonText(tr("Save")); + if (idlg.exec()) + { + const QString path = QFileDialog::getSaveFileName(&dlg, tr("Save Configuration"), QString(), + tr("Configuration Files (config.yaml)")); + if (path.isEmpty()) + return; + + Error error; + if (!FileSystem::WriteStringToFile(QDir::toNativeSeparators(path).toUtf8().constData(), + idlg.textValue().toStdString(), &error)) + { + QMessageBox::critical(&dlg, tr("Write Failed"), QString::fromStdString(error.GetDescription())); + } + } + }); + + dlg.exec(); } diff --git a/src/duckstation-qt/graphicssettingswidget.h b/src/duckstation-qt/graphicssettingswidget.h index 6752ac2f1..d698742a2 100644 --- a/src/duckstation-qt/graphicssettingswidget.h +++ b/src/duckstation-qt/graphicssettingswidget.h @@ -39,8 +39,9 @@ private Q_SLOTS: void onMediaCaptureVideoAutoResolutionChanged(); void onMediaCaptureAudioEnabledChanged(); + void onEnableTextureCacheChanged(); void onEnableAnyTextureReplacementsChanged(); - void onEnableVRAMWriteDumpingChanged(); + void onTextureReplacementOptionsClicked(); private: static constexpr int TAB_INDEX_RENDERING = 0; diff --git a/src/duckstation-qt/graphicssettingswidget.ui b/src/duckstation-qt/graphicssettingswidget.ui index fb3f2ce20..618ca4720 100644 --- a/src/duckstation-qt/graphicssettingswidget.ui +++ b/src/duckstation-qt/graphicssettingswidget.ui @@ -1054,9 +1054,9 @@ - Texture Replacements + Texture Replacement - + 0 @@ -1067,31 +1067,85 @@ 0 - + General Settings + + + + + Enable Texture Cache + + + + + + + The texture cache is currently experimental, and may cause rendering errors in some games. + + + true + + + + + + + Use Old MDEC Routines + + + + + + + + + + Texture Replacement + - + - + - Enable VRAM Write Replacement + Enable Texture Replacements - + Preload Texture Replacements - - + + + + + + + + + Enable Texture Dumping + + + + + + + + + + + + + + - Use Old MDEC Routines + Dump Replaced Textures @@ -1103,70 +1157,66 @@ - VRAM Write Dumping + VRAM Write (Background) Replacement + + + Enable VRAM Write Replacement + + + + Enable VRAM Write Dumping - - - - Set Alpha Channel - - - - - + + + + + + + Textures Directory + + + + - Dump Size Threshold: + Open... + + + + + + + Directory to load replacement textures from, and save dumps to. - - - - - px - - - 1 - - - 1024 - - - - - - - x - - - - - - - px - - - 1 - - - 512 - - - - + + + Browse... + + + + + + + + + + Reset + + diff --git a/src/duckstation-qt/mainwindow.cpp b/src/duckstation-qt/mainwindow.cpp index 805de498a..5b047d4c2 100644 --- a/src/duckstation-qt/mainwindow.cpp +++ b/src/duckstation-qt/mainwindow.cpp @@ -1814,6 +1814,7 @@ void MainWindow::updateEmulationActions(bool starting, bool running, bool cheevo m_ui.menuCheats->setDisabled(cheevos_challenge_mode); m_ui.actionCPUDebugger->setDisabled(cheevos_challenge_mode); m_ui.actionMemoryScanner->setDisabled(cheevos_challenge_mode); + m_ui.actionReloadTextureReplacements->setDisabled(starting || !running); m_ui.actionDumpRAM->setDisabled(starting || !running || cheevos_challenge_mode); m_ui.actionDumpVRAM->setDisabled(starting || !running || cheevos_challenge_mode); m_ui.actionDumpSPURAM->setDisabled(starting || !running || cheevos_challenge_mode); @@ -2095,6 +2096,8 @@ void MainWindow::connectSignals() connect(m_ui.actionCPUDebugger, &QAction::triggered, this, &MainWindow::openCPUDebugger); SettingWidgetBinder::BindWidgetToBoolSetting(nullptr, m_ui.actionEnableGDBServer, "Debug", "EnableGDBServer", false); connect(m_ui.actionOpenDataDirectory, &QAction::triggered, this, &MainWindow::onToolsOpenDataDirectoryTriggered); + connect(m_ui.actionOpenTextureDirectory, &QAction::triggered, this, &MainWindow::onToolsOpenTextureDirectoryTriggered); + connect(m_ui.actionReloadTextureReplacements, &QAction::triggered, g_emu_thread, &EmuThread::reloadTextureReplacements); connect(m_ui.actionMergeDiscSets, &QAction::triggered, m_game_list_widget, &GameListWidget::setMergeDiscSets); connect(m_ui.actionShowGameIcons, &QAction::triggered, m_game_list_widget, &GameListWidget::setShowGameIcons); connect(m_ui.actionGridViewShowTitles, &QAction::triggered, m_game_list_widget, &GameListWidget::setShowCoverTitles); @@ -2815,6 +2818,15 @@ void MainWindow::onToolsOpenDataDirectoryTriggered() QtUtils::OpenURL(this, QUrl::fromLocalFile(QString::fromStdString(EmuFolders::DataRoot))); } +void MainWindow::onToolsOpenTextureDirectoryTriggered() +{ + QString dir = QString::fromStdString(EmuFolders::Textures); + if (s_system_valid && !s_current_game_serial.isEmpty()) + dir = QStringLiteral("%1" FS_OSPATH_SEPARATOR_STR "%2").arg(dir).arg(s_current_game_serial); + + QtUtils::OpenURL(this, QUrl::fromLocalFile(dir)); +} + void MainWindow::onSettingsTriggeredFromToolbar() { if (s_system_valid) diff --git a/src/duckstation-qt/mainwindow.h b/src/duckstation-qt/mainwindow.h index c14bf4411..039ebddff 100644 --- a/src/duckstation-qt/mainwindow.h +++ b/src/duckstation-qt/mainwindow.h @@ -178,6 +178,7 @@ private Q_SLOTS: void onToolsCoverDownloaderTriggered(); void onToolsMediaCaptureToggled(bool checked); void onToolsOpenDataDirectoryTriggered(); + void onToolsOpenTextureDirectoryTriggered(); void onSettingsTriggeredFromToolbar(); void onGameListRefreshComplete(); diff --git a/src/duckstation-qt/mainwindow.ui b/src/duckstation-qt/mainwindow.ui index 9668983d0..ac295481a 100644 --- a/src/duckstation-qt/mainwindow.ui +++ b/src/duckstation-qt/mainwindow.ui @@ -208,6 +208,8 @@ &Tools + + @@ -215,7 +217,8 @@ - + + @@ -917,6 +920,16 @@ Media Ca&pture + + + Open Texture Directory... + + + + + Reload Texture Replacements + + diff --git a/src/duckstation-qt/qthost.cpp b/src/duckstation-qt/qthost.cpp index 0a4a68a12..ea39688e2 100644 --- a/src/duckstation-qt/qthost.cpp +++ b/src/duckstation-qt/qthost.cpp @@ -19,6 +19,7 @@ #include "core/game_list.h" #include "core/gdb_server.h" #include "core/gpu.h" +#include "core/gpu_hw_texture_cache.h" #include "core/host.h" #include "core/imgui_overlays.h" #include "core/memory_card.h" @@ -1351,6 +1352,18 @@ void EmuThread::clearInputBindStateFromSource(InputBindingKey key) InputManager::ClearBindStateFromSource(key); } +void EmuThread::reloadTextureReplacements() +{ + if (!isOnThread()) + { + QMetaObject::invokeMethod(this, "reloadTextureReplacements", Qt::QueuedConnection); + return; + } + + if (System::IsValid()) + GPUTextureCache::ReloadTextureReplacements(); +} + void EmuThread::runOnEmuThread(std::function callback) { callback(); diff --git a/src/duckstation-qt/qthost.h b/src/duckstation-qt/qthost.h index 7c72db7be..f2881b168 100644 --- a/src/duckstation-qt/qthost.h +++ b/src/duckstation-qt/qthost.h @@ -199,6 +199,7 @@ public Q_SLOTS: void reloadPostProcessingShaders(); void updatePostProcessingSettings(); void clearInputBindStateFromSource(InputBindingKey key); + void reloadTextureReplacements(); private Q_SLOTS: void stopInThread(); diff --git a/src/duckstation-qt/texturereplacementsettingsdialog.ui b/src/duckstation-qt/texturereplacementsettingsdialog.ui new file mode 100644 index 000000000..ac42db411 --- /dev/null +++ b/src/duckstation-qt/texturereplacementsettingsdialog.ui @@ -0,0 +1,380 @@ + + + TextureReplacementSettingsDialog + + + + 0 + 0 + 646 + 587 + + + + Texture Replacement Settings + + + + + + 10 + + + + + + 32 + 32 + + + + + 32 + 16777215 + + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + + + + + <html><head/><body><p><span style=" font-weight:700;">Texture Replacement Settings</span><br/>These settings fine-tune the behavior of the texture replacement system. You can also export a game-specific configuration file. Each of the options is explained in the configuration file, and at <a href="https://github.com/stenzek/duckstation/wiki/Texture-Replacement"><span style=" text-decoration: underline; color:#0078d4;">https://github.com/stenzek/duckstation/wiki/Texture-Replacement</span></a>.</p></body></html> + + + Qt::TextFormat::RichText + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + true + + + + + + + + + Texture Dumping Mode + + + + + + Reduce Palette Range + + + + + + + Dump Texture Pages + + + + + + + Dump C16 Textures + + + + + + + Dump Full Texture Pages + + + + + + + The texture dumping system can either operate in page mode, or write-tracking mode. Replacements can be loaded from either dump method. + + + true + + + + + + + + + + Write Tracking Options + + + + + + Maximum Write Coalesce Size: + + + + + + + + + px + + + 0 + + + 1024 + + + 0 + + + + + + + x + + + + + + + px + + + 0 + + + 512 + + + 0 + + + + + + + + + Maximum Write Splits: + + + + + + + 0 + + + 32 + + + + + + + Convert Copies To Writes + + + + + + + + + + Texture Dumping Options + + + + + + Dump Size Threshold: + + + + + + + + + px + + + 1 + + + 1024 + + + + + + + x + + + + + + + px + + + 1 + + + 512 + + + + + + + Set Alpha Channel + + + + + + + + + Determines the minimum size of a texture that will be dumped. Textures with a size smaller than this value will be ignored. + + + true + + + + + + + + + + Background Dumping Options + + + + + + Dump Size Threshold: + + + + + + + + + px + + + 1 + + + 1024 + + + + + + + x + + + + + + + px + + + 1 + + + 512 + + + + + + + Set Alpha Channel + + + + + + + + + Determines the minimum size of a VRAM write that will be dumped, in background dumping mode. Uploads smaller than this size will be ignored. + + + true + + + + + + + + + + Qt::Orientation::Vertical + + + + 20 + 40 + + + + + + + + + + Qt::Orientation::Horizontal + + + + 198 + 20 + + + + + + + + Export... + + + + + + + Close + + + true + + + + + + + + + +