From 1e93201e179ef91afbe33a818970aed210baf3fc Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 31 Dec 2023 19:40:10 +1000 Subject: [PATCH] GPU: Add hardware texture cache --- src/core/CMakeLists.txt | 2 + src/core/core.vcxproj | 2 + src/core/core.vcxproj.filters | 2 + src/core/gpu.cpp | 4 +- src/core/gpu_commands.cpp | 2 +- src/core/gpu_hw.cpp | 354 ++- src/core/gpu_hw.h | 26 +- src/core/gpu_hw_shadergen.cpp | 118 +- src/core/gpu_hw_shadergen.h | 6 +- src/core/gpu_hw_texture_cache.cpp | 1915 +++++++++++++++++ src/core/gpu_hw_texture_cache.h | 137 ++ src/core/gpu_sw.cpp | 7 +- src/core/gpu_types.h | 156 +- src/core/save_state_version.h | 2 +- src/core/settings.cpp | 98 +- src/core/settings.h | 40 +- src/core/system.cpp | 102 +- src/core/texture_replacements.cpp | 1251 ++++++++++- src/core/texture_replacements.h | 52 +- src/duckstation-qt/CMakeLists.txt | 1 + src/duckstation-qt/duckstation-qt.vcxproj | 3 + .../duckstation-qt.vcxproj.filters | 1 + src/duckstation-qt/graphicssettingswidget.cpp | 158 +- src/duckstation-qt/graphicssettingswidget.h | 3 +- src/duckstation-qt/graphicssettingswidget.ui | 130 +- .../texturereplacementsettingsdialog.ui | 359 +++ 26 files changed, 4530 insertions(+), 401 deletions(-) create mode 100644 src/core/gpu_hw_texture_cache.cpp create mode 100644 src/core/gpu_hw_texture_cache.h create mode 100644 src/duckstation-qt/texturereplacementsettingsdialog.ui diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index a0a9662e7..2c098860d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -51,6 +51,8 @@ add_library(core gpu_hw.h gpu_hw_shadergen.cpp gpu_hw_shadergen.h + gpu_hw_texture_cache.cpp + gpu_hw_texture_cache.h gpu_shadergen.cpp gpu_shadergen.h gpu_sw.cpp diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 2144c2873..7cf309b9d 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -47,6 +47,7 @@ + @@ -131,6 +132,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index f623ed9f2..ffa65a8a7 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -69,6 +69,7 @@ + @@ -143,6 +144,7 @@ + diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index bc0743296..9c8a4f908 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1719,8 +1719,8 @@ void GPU::SetDrawMode(u16 value) if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) return; - m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) != - (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK)); + m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK) != + (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_MODE_AND_PAGE_MASK)); m_draw_mode.mode_reg.bits = new_mode_reg.bits; if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 6bc3effee..ad12627c3 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -532,7 +532,7 @@ void GPU::FinishVRAMWrite() m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width, m_blit_buffer.data(), true); } - if (g_settings.texture_replacements.ShouldDumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height)) + if (TextureReplacements::ShouldDumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height)) { TextureReplacements::DumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height, reinterpret_cast(m_blit_buffer.data())); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 2f18e44c5..dfa378caa 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -188,6 +188,8 @@ GPU_HW::GPU_HW() : GPU() GPU_HW::~GPU_HW() { + GPUTextureCache::Shutdown(); + if (m_sw_renderer) { m_sw_renderer->Shutdown(); @@ -256,6 +258,8 @@ bool GPU_HW::Initialize() m_clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); + m_use_texture_cache = g_settings.gpu_texture_cache; + m_texture_dumping = m_use_texture_cache && g_settings.texture_replacements.dump_textures; CheckSettings(); @@ -276,13 +280,27 @@ bool GPU_HW::Initialize() return false; } + if (m_use_texture_cache) + { + if (!GPUTextureCache::Initialize()) + { + ERROR_LOG("Failed to initialize texture cache, disabling."); + m_use_texture_cache = false; + } + } + UpdateDownsamplingLevels(); + RestoreDeviceContext(); return true; } void GPU_HW::Reset(bool clear_vram) { + // Texture cache needs to be invalidated before we load, otherwise we dump black. + if (m_use_texture_cache) + GPUTextureCache::Invalidate(); + if (m_batch_vertex_ptr) UnmapGPUBuffer(0, 0); @@ -360,6 +378,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di else if (sw.IsReading()) { // Need to update the VRAM copy on the GPU with the state data. + // Would invalidate the TC, but base DoState() calls Reset(). UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); } @@ -369,10 +388,12 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); ClearVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle(); + UpdateVRAMReadTexture(true, false); + ClearVRAMDirtyRectangle(); ResetBatchVertexDepth(); } - return true; + return GPUTextureCache::DoState(sw, !m_use_texture_cache); } void GPU_HW::RestoreDeviceContext() @@ -464,6 +485,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_clamp_uvs = clamp_uvs; m_compute_uv_range = m_clamp_uvs; m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering); + m_use_texture_cache = g_settings.gpu_texture_cache; + m_texture_dumping = m_use_texture_cache && g_settings.texture_replacements.dump_textures; m_batch.sprite_mode = (m_allow_sprite_mode && m_batch.sprite_mode); const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); @@ -517,6 +540,23 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) UpdateDepthBufferFromMaskBit(); } + if (m_use_texture_cache && !old_settings.gpu_texture_cache) + { + if (!GPUTextureCache::Initialize()) + { + ERROR_LOG("Failed to initialize texture cache, disabling."); + m_use_texture_cache = false; + } + } + else if (!m_use_texture_cache && old_settings.gpu_texture_cache) + { + GPUTextureCache::Shutdown(); + } + else if (m_use_texture_cache) + { + GPUTextureCache::UpdateSettings(old_settings); + } + if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || (g_settings.gpu_downsample_mode == GPUDownsampleMode::Box && g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)) @@ -722,6 +762,9 @@ void GPU_HW::AddWrittenRectangle(const GSVector4i rect) { m_vram_dirty_write_rect = m_vram_dirty_write_rect.runion(rect); SetTexPageChangedOnOverlap(m_vram_dirty_write_rect); + + if (m_use_texture_cache) + GPUTextureCache::AddWrittenRectangle(rect); } void GPU_HW::AddDrawnRectangle(const GSVector4i rect) @@ -729,13 +772,22 @@ void GPU_HW::AddDrawnRectangle(const GSVector4i rect) // Normally, we would check for overlap here. But the GPU's texture cache won't actually reload until the page // changes, or it samples a larger region, so we can get away without doing so. This reduces copies considerably in // games like Mega Man Legends 2. - m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect); + if (m_current_draw_rect.rcontains(rect)) + return; + + m_current_draw_rect = m_current_draw_rect.runion(rect); + m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(m_current_draw_rect); + + if (m_use_texture_cache) + GPUTextureCache::AddDrawnRectangle(m_current_draw_rect); } void GPU_HW::AddUnclampedDrawnRectangle(const GSVector4i rect) { m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect); SetTexPageChangedOnOverlap(m_vram_dirty_draw_rect); + if (m_use_texture_cache) + GPUTextureCache::AddDrawnRectangle(rect); } void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) @@ -743,9 +795,9 @@ void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // shadow texture is updated if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && - (m_draw_mode.mode_reg.GetTexturePageRectangle().rintersects(update_rect) || + (GetTextureRect(m_draw_mode.mode_reg.texture_page, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect) || (m_draw_mode.mode_reg.IsUsingPalette() && - m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) + GetPaletteRect(m_draw_mode.palette_reg, m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) { m_draw_mode.SetTexturePageChanged(); } @@ -886,6 +938,8 @@ void GPU_HW::ClearFramebuffer() g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); } ClearVRAMDirtyRectangle(); + if (m_use_texture_cache) + GPUTextureCache::Invalidate(); m_last_depth_z = 1.0f; } @@ -990,7 +1044,7 @@ bool GPU_HW::CompilePipelines(Error* error) m_allow_sprite_mode ? NUM_TEXTURE_MODES : (NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast(BatchTextureMode::SpriteStart))); const u32 total_pipelines = - (m_allow_sprite_mode ? 5 : 3) + // vertex shaders + (m_allow_sprite_mode ? 7 : 4) + // vertex shaders (active_texture_modes * 5 * 9 * 2 * 2 * 2 * (1 + BoolToUInt32(needs_rov_depth))) + // fragment shaders ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe @@ -1009,7 +1063,7 @@ bool GPU_HW::CompilePipelines(Error* error) // vertex shaders - [textured/palette/sprite] // fragment shaders - [depth_test][render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; - DimensionalArray, 2, 2, 2> batch_vertex_shaders{}; + DimensionalArray, 2, 3, 2> batch_vertex_shaders{}; DimensionalArray, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5, 2> batch_fragment_shaders{}; ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { batch_vertex_shaders.enumerate(destroy_shader); @@ -1018,13 +1072,13 @@ bool GPU_HW::CompilePipelines(Error* error) for (u8 textured = 0; textured < 2; textured++) { - for (u8 palette = 0; palette < (textured ? 2 : 1); palette++) + for (u8 palette = 0; palette < (textured ? 3 : 1); palette++) { for (u8 sprite = 0; sprite < (textured ? 2 : 1); sprite++) { const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); const std::string vs = shadergen.GenerateBatchVertexShader( - textured != 0, palette != 0, uv_limits, !sprite && force_round_texcoords, m_pgxp_depth_buffer); + textured != 0, palette == 1, palette == 2, uv_limits, !sprite && force_round_texcoords, m_pgxp_depth_buffer); if (!(batch_vertex_shaders[textured][palette][sprite] = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error))) { @@ -1175,6 +1229,8 @@ bool GPU_HW::CompilePipelines(Error* error) static_cast(texture_mode) == BatchTextureMode::Palette8Bit || static_cast(texture_mode) == BatchTextureMode::SpritePalette4Bit || static_cast(texture_mode) == BatchTextureMode::SpritePalette8Bit); + const bool page_texture = + (static_cast(texture_mode) == BatchTextureMode::PageTexture); const bool sprite = (static_cast(texture_mode) >= BatchTextureMode::SpriteStart); const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); const bool use_shader_blending = (render_mode == static_cast(BatchRenderMode::ShaderBlend)); @@ -1188,7 +1244,9 @@ bool GPU_HW::CompilePipelines(Error* error) std::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); plconfig.vertex_shader = - batch_vertex_shaders[BoolToUInt8(textured)][BoolToUInt8(palette)][BoolToUInt8(sprite)].get(); + batch_vertex_shaders[BoolToUInt8(textured)][page_texture ? 2 : BoolToUInt8(palette)] + [BoolToUInt8(sprite)] + .get(); plconfig.fragment_shader = batch_fragment_shaders[BoolToUInt8(depth_test && needs_rov_depth)][render_mode] [use_shader_blending ? transparency_mode : @@ -1817,19 +1875,26 @@ void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices) } ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, - u32 base_vertex) + u32 base_vertex, const GPUTextureCache::Source* texture) { // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] - const u8 texture_mode = static_cast(m_batch.texture_mode) + - ((m_batch.texture_mode != BatchTextureMode::Disabled && m_batch.sprite_mode) ? - static_cast(BatchTextureMode::SpriteStart) : - 0); + const u8 texture_mode = texture ? static_cast(BatchTextureMode::PageTexture) : + (static_cast(m_batch.texture_mode) + + ((m_batch.texture_mode < BatchTextureMode::PageTexture && m_batch.sprite_mode) ? + static_cast(BatchTextureMode::SpriteStart) : + 0)); const u8 depth_test = BoolToUInt8(m_batch.use_depth_buffer); const u8 check_mask = BoolToUInt8(m_batch.check_mask_before_draw); g_gpu_device->SetPipeline(m_batch_pipelines[depth_test][static_cast(m_batch.transparency_mode)][static_cast( render_mode)][texture_mode][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)][check_mask] .get()); + // TOOD: Totally not optimized. + if (texture) + g_gpu_device->SetTextureSampler(0, texture->texture, g_gpu_device->GetNearestSampler()); + else if (texture_mode != static_cast(BatchTextureMode::Disabled)) + g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); + GL_INS_FMT("Texture mode: {}", s_batch_texture_modes[texture_mode]); GL_INS_FMT("Transparency mode: {}", s_transparency_modes[static_cast(m_batch.transparency_mode)]); GL_INS_FMT("Render mode: {}", s_batch_render_modes[static_cast(render_mode)]); @@ -2180,7 +2245,7 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) for (u32 i = 0; i < num_vertices; i++) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); - if (m_texpage_dirty != 0) + if (ShouldCheckForTexPageOverlap()) CheckForTexPageOverlap(GSVector4i(min).upl32(GSVector4i(max)).u16to32()); } @@ -2601,7 +2666,7 @@ void GPU_HW::LoadVertices() const u32 tex_right = tex_left + quad_width; const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - if (rc.texture_enable && m_texpage_dirty != 0) + if (rc.texture_enable && ShouldCheckForTexPageOverlap()) { CheckForTexPageOverlap(GSVector4i(static_cast(tex_left), static_cast(tex_top), static_cast(tex_right), static_cast(tex_bottom))); @@ -2825,7 +2890,7 @@ bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacements::ReplacementIm ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) { - DebugAssert(m_texpage_dirty != 0 && m_batch.texture_mode != BatchTextureMode::Disabled); + DebugAssert((m_texpage_dirty != 0 || m_texture_dumping) && m_batch.texture_mode != BatchTextureMode::Disabled); if (m_texture_window_active) { @@ -2852,6 +2917,34 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) m_current_uv_rect = new_uv_rect; bool update_drawn = false, update_written = false; + if (m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT) + { + DebugAssert(!(m_texpage_dirty & (TEXPAGE_DIRTY_DRAWN_RECT | TEXPAGE_DIRTY_WRITTEN_RECT))); + DebugAssert(m_batch.texture_mode == BatchTextureMode::PageTexture && + m_batch.texture_cache_key.page < NUM_VRAM_PAGES); + + if (GPUTextureCache::AreSourcePagesDrawn(m_batch.texture_cache_key, m_current_uv_rect)) + { + // UVs intersect with drawn area, can't use TC + if (m_batch_index_count > 0) + { + FlushRender(); + EnsureVertexBufferSpaceForCurrentCommand(); + } + + // We need to swap the dirty tracking over to drawn/written. + const GSVector4i page_rect = GetTextureRect(m_batch.texture_cache_key.page, m_batch.texture_cache_key.mode); + m_texpage_dirty = (m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0) | + (m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0); + m_compute_uv_range = (ShouldCheckForTexPageOverlap() || m_clamp_uvs); + m_batch.texture_mode = static_cast(m_draw_mode.mode_reg.texture_mode.GetValue()); + } + else + { + // Page isn't drawn, we're done. + return; + } + } if (m_texpage_dirty & TEXPAGE_DIRTY_DRAWN_RECT) { DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); @@ -2886,6 +2979,11 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) } } +bool GPU_HW::ShouldCheckForTexPageOverlap() const +{ + return (m_texpage_dirty != 0); +} + ALWAYS_INLINE bool GPU_HW::IsFlushed() const { return (m_batch_index_count == 0); @@ -2982,8 +3080,9 @@ ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) { + // TODO: SW-for-readbacks is currently incompatible with the texture cache, due to threading races. const bool current_enabled = (m_sw_renderer != nullptr); - const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks; + const bool new_enabled = !m_use_texture_cache && g_settings.gpu_use_software_renderer_for_readbacks; if (current_enabled == new_enabled) return; @@ -3061,7 +3160,21 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) GL_INS_FMT("Dirty draw area before: {}", m_vram_dirty_draw_rect); const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); - AddUnclampedDrawnRectangle(bounds); + + // If TC is enabled, we have to update local memory. + if (m_use_texture_cache && !IsInterlacedRenderingEnabled()) + { + AddWrittenRectangle(bounds); + + if (m_sw_renderer) + m_sw_renderer->Sync(true); + else + GPU::FillVRAM(x, y, width, height, color); + } + else + { + AddUnclampedDrawnRectangle(bounds); + } GL_INS_FMT("Dirty draw area after: {}", m_vram_dirty_draw_rect); @@ -3107,6 +3220,8 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) return; } + // TODO: Only read if it's in the drawn area + // Get bounds with wrap-around handled. GSVector4i copy_rect = GetVRAMTransferBounds(x, y, width, height); @@ -3158,7 +3273,21 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b { GL_SCOPE_FMT("UpdateVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); - if (m_sw_renderer) + // TODO: Handle wrapped transfers... break them up or something + const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); + DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); + AddWrittenRectangle(bounds); + + // We want to dump *before* the write goes through, otherwise we dump bad data. + if (m_use_texture_cache) + { + if (m_sw_renderer) + m_sw_renderer->Sync(true); + + GPU::UpdateVRAM(x, y, width, height, data, set_mask, check_mask); + GPUTextureCache::TrackVRAMWrite(bounds); + } + else if (m_sw_renderer) { const u32 num_words = width * height; GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); @@ -3173,10 +3302,6 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b m_sw_renderer->PushCommand(cmd); } - const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); - DebugAssert(bounds.right <= static_cast(VRAM_WIDTH) && bounds.bottom <= static_cast(VRAM_HEIGHT)); - AddWrittenRectangle(bounds); - if (check_mask) { // set new vertex counter since we want this to take into consideration previous masked pixels @@ -3264,7 +3389,32 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 { GL_SCOPE_FMT("CopyVRAM({}x{} @ {},{} => {},{}", width, height, src_x, src_y, dst_x, dst_y); - if (m_sw_renderer) + // masking enabled, oversized, or overlapping + const bool use_shader = + (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT); + const GSVector4i src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); + const GSVector4i dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); + + // If we're copying a region that hasn't been drawn to, and we're using the TC, we can do it in local memory. + if (m_use_texture_cache && !GPUTextureCache::IsRectDrawn(src_bounds)) + { + GL_INS("Performed in local memory."); + + if (m_sw_renderer) + m_sw_renderer->Sync(true); + + GPUTextureCache::AddWrittenRectangle(dst_bounds); + // GPUTextureCache::AddCopiedRectanglePart1(dst_bounds); // needed for FF8 because it animates textures by copying + GPU::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + UpdateVRAMOnGPU(dst_bounds.left, dst_bounds.top, dst_bounds.width(), dst_bounds.height(), + &g_vram[dst_bounds.top * VRAM_WIDTH + dst_bounds.left], VRAM_WIDTH * sizeof(u16), false, false, + dst_bounds); + // GPUTextureCache::AddCopiedRectanglePart2(dst_bounds); + return; + } + else if (m_sw_renderer) { GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); FillBackendCommandParameters(cmd); @@ -3277,16 +3427,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 m_sw_renderer->PushCommand(cmd); } - // masking enabled, oversized, or overlapping - const bool use_shader = - (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || - ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || - ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT); - const GSVector4i src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const GSVector4i dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); const bool intersect_with_draw = m_vram_dirty_draw_rect.rintersects(src_bounds); const bool intersect_with_write = m_vram_dirty_write_rect.rintersects(src_bounds); - if (use_shader || IsUsingMultisampling()) { if (intersect_with_draw || intersect_with_write) @@ -3324,6 +3466,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); g_gpu_device->SetPipeline( m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && m_write_mask_as_depth)].get()); + g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); @@ -3343,7 +3486,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); } - if (intersect_with_draw) + // We don't have it in local memory, so TC can't read it. + if (intersect_with_draw || m_use_texture_cache) { AddUnclampedDrawnRectangle(dst_bounds); } @@ -3379,77 +3523,112 @@ void GPU_HW::DispatchRenderCommand() { const GPURenderCommand rc{m_render_command.bits}; - BatchTextureMode texture_mode = BatchTextureMode::Disabled; + // TODO: avoid all this for vertex loading, only do when the type of draw changes + BatchTextureMode texture_mode = rc.IsTexturingEnabled() ? m_batch.texture_mode : BatchTextureMode::Disabled; + GPUTextureCache::SourceKey texture_cache_key = m_batch.texture_cache_key; if (rc.IsTexturingEnabled()) { // texture page changed - check that the new page doesn't intersect the drawing area - if (m_draw_mode.IsTexturePageChanged()) + if (m_draw_mode.IsTexturePageChanged() || texture_mode == BatchTextureMode::Disabled) { m_draw_mode.ClearTexturePageChangedFlag(); -#if 0 - if (!m_vram_dirty_draw_rect.eq(INVALID_RECT) || !m_vram_dirty_write_rect.eq(INVALID_RECT)) - { - GL_INS_FMT("VRAM DIRTY: {} {}", m_vram_dirty_draw_rect, m_vram_dirty_write_rect); - GL_INS_FMT("PAGE RECT: {}", m_draw_mode.mode_reg.GetTexturePageRectangle()); - if (m_draw_mode.mode_reg.IsUsingPalette()) - GL_INS_FMT("PALETTE RECT: {}", m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode)); - } -#endif + // start by assuming we can use the TC + bool use_texture_cache = m_use_texture_cache; + // check that the palette isn't in a drawn area if (m_draw_mode.mode_reg.IsUsingPalette()) { - const GSVector4i palette_rect = m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode); - const bool update_drawn = palette_rect.rintersects(m_vram_dirty_draw_rect); - const bool update_written = palette_rect.rintersects(m_vram_dirty_write_rect); - if (update_drawn || update_written) + const GSVector4i palette_rect = + GetPaletteRect(m_draw_mode.palette_reg, m_draw_mode.mode_reg.texture_mode, use_texture_cache); + if (!use_texture_cache || GPUTextureCache::IsRectDrawn(palette_rect)) { - GL_INS("Palette in VRAM dirty area, flushing cache"); - if (!IsFlushed()) - FlushRender(); + if (use_texture_cache) + GL_INS_FMT("Palette at {} is in drawn area, can't use TC", palette_rect); + use_texture_cache = false; - UpdateVRAMReadTexture(update_drawn, update_written); + const bool update_drawn = palette_rect.rintersects(m_vram_dirty_draw_rect); + const bool update_written = palette_rect.rintersects(m_vram_dirty_write_rect); + if (update_drawn || update_written) + { + GL_INS("Palette in VRAM dirty area, flushing cache"); + if (!IsFlushed()) + FlushRender(); + + UpdateVRAMReadTexture(update_drawn, update_written); + } } } - const GSVector4i page_rect = m_draw_mode.mode_reg.GetTexturePageRectangle(); - GSVector4i::storel(m_current_texture_page_offset, page_rect); + m_compute_uv_range = (m_clamp_uvs || m_texture_dumping); - u8 new_texpage_dirty = m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0; - new_texpage_dirty |= m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0; + const GPUTextureMode gpu_texture_mode = + (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit) ? GPUTextureMode::Direct16Bit : + m_draw_mode.mode_reg.texture_mode; + const GSVector4i page_rect = GetTextureRect(m_draw_mode.mode_reg.texture_page, m_draw_mode.mode_reg.texture_mode); - if (new_texpage_dirty != 0) + // TODO: This will result in incorrect global-space UVs when the texture page wraps around. + // Need to deal with it if it becomes a problem. + m_current_texture_page_offset[0] = static_cast(m_draw_mode.mode_reg.GetTexturePageBaseX()); + m_current_texture_page_offset[1] = static_cast(m_draw_mode.mode_reg.GetTexturePageBaseY()); + + if (use_texture_cache) { - GL_INS("Texpage is in dirty area, checking UV ranges"); - m_texpage_dirty = new_texpage_dirty; - m_compute_uv_range = true; - m_current_uv_rect = INVALID_RECT; + texture_mode = BatchTextureMode::PageTexture; + texture_cache_key = + GPUTextureCache::SourceKey(m_draw_mode.mode_reg.texture_page, m_draw_mode.palette_reg, gpu_texture_mode); + + const bool is_drawn = GPUTextureCache::IsRectDrawn(page_rect); + if (is_drawn) + GL_INS_FMT("Texpage [{}] {} is drawn in TC, checking UV ranges", texture_cache_key.page, page_rect); + + m_texpage_dirty = + (is_drawn ? TEXPAGE_DIRTY_PAGE_RECT : 0) | (m_texture_dumping ? TEXPAGE_DIRTY_ONLY_UV_RECT : 0); + m_compute_uv_range |= ShouldCheckForTexPageOverlap(); } else { - m_compute_uv_range = m_clamp_uvs; - if (m_texpage_dirty) - GL_INS("Texpage is no longer dirty"); - m_texpage_dirty = 0; + texture_mode = static_cast(gpu_texture_mode); + m_texpage_dirty = (m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0) | + (m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0); + if (m_texpage_dirty & TEXPAGE_DIRTY_DRAWN_RECT) + GL_INS_FMT("Texpage {} is in dirty DRAWN area {}", page_rect, m_vram_dirty_draw_rect); + if (m_texpage_dirty & TEXPAGE_DIRTY_WRITTEN_RECT) + GL_INS_FMT("Texpage {} is in dirty WRITTEN area {}", page_rect, m_vram_dirty_write_rect); + + // Current UV rect _must_ be cleared here, because we're only check for texpage intersection when it grows in + // size, a switch from a non-contained page to a contained page would go undetected otherwise. + if (m_texpage_dirty != 0) + { + m_compute_uv_range = true; + m_current_uv_rect = INVALID_RECT; + } } } - - texture_mode = (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit) ? - BatchTextureMode::Direct16Bit : - static_cast(m_draw_mode.mode_reg.texture_mode.GetValue()); } + DebugAssert((rc.IsTexturingEnabled() && (texture_mode == BatchTextureMode::PageTexture && + texture_cache_key.mode == m_draw_mode.mode_reg.texture_mode) || + texture_mode == static_cast(m_draw_mode.mode_reg.texture_mode.GetValue())) || + (!rc.IsTexturingEnabled() && texture_mode == BatchTextureMode::Disabled)); + DebugAssert(!(m_texpage_dirty & TEXPAGE_DIRTY_PAGE_RECT) || texture_mode == BatchTextureMode::PageTexture || + !rc.IsTexturingEnabled()); + // has any state changed which requires a new batch? // Reverse blending breaks with mixed transparent and opaque pixels, so we have to do one draw per polygon. // If we have fbfetch, we don't need to draw it in two passes. Test case: Suikoden 2 shadows. const GPUTransparencyMode transparency_mode = rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; - if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || - (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || - dithering_enable != m_batch.dithering) + if (!IsFlushed()) { - FlushRender(); + if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || + (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || + dithering_enable != m_batch.dithering || + (texture_mode == BatchTextureMode::PageTexture && m_batch.texture_cache_key != texture_cache_key)) + { + FlushRender(); + } } EnsureVertexBufferSpaceForCurrentCommand(); @@ -3493,6 +3672,7 @@ void GPU_HW::DispatchRenderCommand() m_batch.texture_mode = texture_mode; m_batch.transparency_mode = transparency_mode; m_batch.dithering = dithering_enable; + m_batch.texture_cache_key = texture_cache_key; if (m_draw_mode.IsTextureWindowChanged()) { @@ -3558,10 +3738,21 @@ void GPU_HW::FlushRender() return; #ifdef _DEBUG - GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number); + GL_SCOPE_FMT("Hardware Draw {}: {}", ++s_draw_number, m_current_draw_rect); #endif GL_INS_FMT("Dirty draw area: {}", m_vram_dirty_draw_rect); + if (m_compute_uv_range) + GL_INS_FMT("UV rect: {}", m_current_uv_rect); + + const GPUTextureCache::Source* texture = nullptr; + if (m_batch.texture_mode == BatchTextureMode::PageTexture) + { + texture = LookupSource(m_batch.texture_cache_key, m_current_uv_rect, + m_batch.transparency_mode != GPUTransparencyMode::Disabled ? + GPUTextureCache::PaletteRecordFlags::HasSemiTransparentDraws : + GPUTextureCache::PaletteRecordFlags::None); + } if (m_batch_ubo_dirty) { @@ -3570,21 +3761,24 @@ void GPU_HW::FlushRender() m_batch_ubo_dirty = false; } + m_current_draw_rect = INVALID_RECT; + m_current_uv_rect = INVALID_RECT; + if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe) { if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.texture_mode, m_batch.check_mask_before_draw) || m_rov_active || (m_use_rov_for_shader_blend && m_pgxp_depth_buffer)) { - DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex); + DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex, texture); } else if (NeedsTwoPassRendering()) { - DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex); - DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex); + DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex, texture); + DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex, texture); } else { - DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex); + DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex, texture); } } @@ -3604,6 +3798,8 @@ void GPU_HW::UpdateDisplay() GL_SCOPE("UpdateDisplay()"); + GPUTextureCache::Compact(); + if (g_settings.debugging.show_vram) { if (IsUsingMultisampling()) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 1ada1e17f..d94c75cfd 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -4,6 +4,7 @@ #pragma once #include "gpu.h" +#include "gpu_hw_texture_cache.h" #include "texture_replacements.h" #include "util/gpu_device.h" @@ -38,6 +39,7 @@ public: Palette4Bit, Palette8Bit, Direct16Bit, + PageTexture, Disabled, SpritePalette4Bit, @@ -52,6 +54,11 @@ public: static_cast(BatchTextureMode::Palette8Bit) == static_cast(GPUTextureMode::Palette8Bit) && static_cast(BatchTextureMode::Direct16Bit) == static_cast(GPUTextureMode::Direct16Bit)); + static constexpr GSVector4i VRAM_SIZE_RECT = GSVector4i::cxpr(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + static constexpr GSVector4i INVALID_RECT = + GSVector4i::cxpr(std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::min(), + std::numeric_limits::min()); + GPU_HW(); ~GPU_HW() override; @@ -83,6 +90,8 @@ private: { TEXPAGE_DIRTY_DRAWN_RECT = (1 << 0), TEXPAGE_DIRTY_WRITTEN_RECT = (1 << 1), + TEXPAGE_DIRTY_PAGE_RECT = (1 << 2), + TEXPAGE_DIRTY_ONLY_UV_RECT = (1 << 3), }; static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT)); @@ -116,6 +125,8 @@ private: bool use_depth_buffer = false; bool sprite_mode = false; + GPUTextureCache::SourceKey texture_cache_key = {}; + // Returns the render mode for this batch. BatchRenderMode GetRenderMode() const; }; @@ -136,11 +147,6 @@ private: u32 num_uniform_buffer_updates; }; - static constexpr GSVector4i VRAM_SIZE_RECT = GSVector4i::cxpr(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - static constexpr GSVector4i INVALID_RECT = - GSVector4i::cxpr(std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::min(), - std::numeric_limits::min()); - /// Returns true if a depth buffer should be created. GPUTexture::Format GetDepthBufferFormat() const; @@ -165,7 +171,8 @@ private: void DeactivateROV(); void MapGPUBuffer(u32 required_vertices, u32 required_indices); void UnmapGPUBuffer(u32 used_vertices, u32 used_indices); - void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex); + void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex, + const GPUTextureCache::Source* texture); u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; @@ -182,6 +189,7 @@ private: void SetTexPageChangedOnOverlap(const GSVector4i update_rect); void CheckForTexPageOverlap(GSVector4i uv_rect); + bool ShouldCheckForTexPageOverlap() const; bool IsFlushed() const; void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices); @@ -286,6 +294,9 @@ private: bool m_texture_window_active : 1 = false; bool m_rov_active : 1 = false; + bool m_use_texture_cache : 1 = false; + bool m_texture_dumping : 1 = false; + u8 m_texpage_dirty = 0; BatchConfig m_batch; @@ -296,8 +307,9 @@ private: // Bounding box of VRAM area that the GPU has drawn into. GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; - GSVector4i m_vram_dirty_write_rect = INVALID_RECT; + GSVector4i m_vram_dirty_write_rect = INVALID_RECT; // TODO: Don't use in TC mode, should be kept at zero. GSVector4i m_current_uv_rect = INVALID_RECT; + GSVector4i m_current_draw_rect = INVALID_RECT; s32 m_current_texture_page_offset[2] = {}; std::unique_ptr m_wireframe_pipeline; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 5e8c2852a..739fb260b 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -60,13 +60,14 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss) false); } -std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool palette, bool uv_limits, +std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool palette, bool page_texture, bool uv_limits, bool force_round_texcoords, bool pgxp_depth) { std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "PALETTE", palette); + DefineMacro(ss, "PAGE_TEXTURE", page_texture); DefineMacro(ss, "UV_LIMITS", uv_limits); DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords); DefineMacro(ss, "PGXP_DEPTH", pgxp_depth); @@ -74,7 +75,22 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale WriteCommonFunctions(ss); WriteBatchUniformBuffer(ss); - if (textured) + if (textured && page_texture) + { + if (uv_limits) + { + DeclareVertexEntryPoint( + ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1, + {{"nointerpolation", "float4 v_uv_limits"}}, false, "", UsingMSAA(), UsingPerSampleShading(), + m_disable_color_perspective); + } + else + { + DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, {}, + false, "", UsingMSAA(), UsingPerSampleShading(), m_disable_color_perspective); + } + } + else if (textured) { if (uv_limits) { @@ -132,16 +148,18 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale v_col0 = a_col0; #if TEXTURED v_tex0 = float2(uint2(a_texcoord & 0xFFFFu, a_texcoord >> 16)); - #if !PALETTE + #if !PALETTE && !PAGE_TEXTURE v_tex0 *= float(RESOLUTION_SCALE); #endif - // base_x,base_y,palette_x,palette_y - v_texpage.x = (a_texpage & 15u) * 64u; - v_texpage.y = ((a_texpage >> 4) & 1u) * 256u; - #if PALETTE - v_texpage.z = ((a_texpage >> 16) & 63u) * 16u; - v_texpage.w = ((a_texpage >> 22) & 511u); + #if !PAGE_TEXTURE + // base_x,base_y,palette_x,palette_y + v_texpage.x = (a_texpage & 15u) * 64u; + v_texpage.y = ((a_texpage >> 4) & 1u) * 256u; + #if PALETTE + v_texpage.z = ((a_texpage >> 16) & 63u) * 16u; + v_texpage.w = ((a_texpage >> 22) & 511u); + #endif #endif #if UV_LIMITS @@ -151,7 +169,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale // Add 0.5 to the upper bounds when upscaling, to work around interpolation differences. // Limited to force-round-texcoord hack, to avoid breaking other games. v_uv_limits.zw += 0.5; - #elif !PALETTE + #elif !PAGE_TEXTURE && !PALETTE // Treat coordinates as being in upscaled space, and extend the UV range to all "upscaled" // pixels. This means 1-pixel-high polygon-based framebuffer effects won't be downsampled. // (e.g. Mega Man Legends 2 haze effect) @@ -710,6 +728,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader( const bool textured = (texture_mode != GPU_HW::BatchTextureMode::Disabled); const bool palette = (texture_mode == GPU_HW::BatchTextureMode::Palette4Bit || texture_mode == GPU_HW::BatchTextureMode::Palette8Bit); + const bool page_texture = (texture_mode == GPU_HW::BatchTextureMode::PageTexture); const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend); const bool use_dual_source = (!shader_blending && !use_rov && m_supports_dual_source_blend && ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && @@ -728,6 +747,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader( DefineMacro(ss, "PALETTE", palette); DefineMacro(ss, "PALETTE_4_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette4Bit); DefineMacro(ss, "PALETTE_8_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette8Bit); + DefineMacro(ss, "PAGE_TEXTURE", page_texture); DefineMacro(ss, "DITHERING", dithering); DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering); // Debanding requires true color to work correctly. @@ -810,6 +830,8 @@ uint2 FloatToIntegerCoords(float2 coords) return uint2((RESOLUTION_SCALE == 1u || FORCE_ROUND_TEXCOORDS != 0) ? roundEven(coords) : floor(coords)); } +#if !PAGE_TEXTURE + float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) { #if PALETTE @@ -863,7 +885,24 @@ float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords) #endif } +#else + +float4 SampleFromPageTexture(float2 coords) +{ + // Cached textures. +#if FORCE_ROUND_TEXCOORDS + float2 fpart = coords - roundEven(coords); +#else + float2 fpart = frac(coords); #endif + uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); + coords = (float2(icoord) + fpart) * (1.0f / 256.0f); + return SAMPLE_TEXTURE(samp0, coords); +} + +#endif + +#endif // TEXTURED // From https://alex.vlachos.com/graphics/Alex_Vlachos_Advanced_VR_Rendering_GDC2015.pdf // and https://www.shadertoy.com/view/MslGR8 (5th one starting from the bottom) @@ -885,7 +924,22 @@ float3 ApplyDebanding(float2 frag_coord) )"; const u32 num_fragment_outputs = use_rov ? 0 : (use_dual_source ? 2 : 1); - if (textured) + if (textured && page_texture) + { + if (uv_limits) + { + DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "float4 v_uv_limits"}}, true, num_fragment_outputs, + use_dual_source, m_write_mask_as_depth, UsingMSAA(), UsingPerSampleShading(), false, + m_disable_color_perspective, shader_blending && !use_rov, use_rov); + } + else + { + DeclareFragmentEntryPoint(ss, 1, 1, {}, true, num_fragment_outputs, use_dual_source, m_write_mask_as_depth, + UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, + shader_blending && !use_rov, use_rov); + } + } + else if (textured) { if (texture_filtering != GPUTextureFilter::Nearest) WriteBatchTextureFilter(ss, texture_filtering); @@ -931,7 +985,17 @@ float3 ApplyDebanding(float2 frag_coord) #if TEXTURED float4 texcol; - #if TEXTURE_FILTERING + #if PAGE_TEXTURE + #if UV_LIMITS + texcol = SampleFromPageTexture(clamp(v_tex0, v_uv_limits.xy, v_uv_limits.zw)); + #else + texcol = SampleFromPageTexture(v_tex0); + #endif + if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR)) + discard; + + ialpha = 1.0; + #elif TEXTURE_FILTERING FilteredSampleFromVRAM(v_texpage, v_tex0, v_uv_limits, texcol, ialpha); if (ialpha < 0.5) discard; @@ -1730,3 +1794,33 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact return ss.str(); } + +std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent) +{ + std::stringstream ss; + WriteHeader(ss); + DefineMacro(ss, "SEMITRANSPARENT", semitransparent); + DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true); + DeclareTexture(ss, "samp0", 0); + DeclareFragmentEntryPoint(ss, 0, 1); + + ss << R"( +{ + float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw; + float4 color = SAMPLE_TEXTURE(samp0, coords); + o_col0.rgb = color.rgb; + + // Alpha processing. + #if SEMITRANSPARENT + // Map anything not 255 to 1 for semitransparent, otherwise zero for opaque. + o_col0.a = (color.a <= 0.95f) ? 1.0f : 0.0f; + o_col0.a = VECTOR_EQ(color, float4(0.0, 0.0, 0.0, 0.0)) ? 0.0f : o_col0.a; + #else + // Leave (0,0,0,0) as 0000 for opaque replacements for cutout alpha. + o_col0.a = color.a; + #endif +} +)"; + + return ss.str(); +} \ No newline at end of file diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 455b75e31..b4387d6c8 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -15,8 +15,8 @@ public: bool supports_dual_source_blend, bool supports_framebuffer_fetch, bool debanding); ~GPU_HW_ShaderGen(); - std::string GenerateBatchVertexShader(bool textured, bool palette, bool uv_limits, bool force_round_texcoords, - bool pgxp_depth); + std::string GenerateBatchVertexShader(bool textured, bool palette, bool page_texture, bool uv_limits, + bool force_round_texcoords, bool pgxp_depth); std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPU_HW::BatchTextureMode texture_mode, GPUTextureFilter texture_filtering, bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing, @@ -36,6 +36,8 @@ public: std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor); + std::string GenerateReplacementMergeFragmentShader(bool semitransparent); + private: ALWAYS_INLINE bool UsingMSAA() const { return m_multisamples > 1; } ALWAYS_INLINE bool UsingPerSampleShading() const { return m_multisamples > 1 && m_per_sample_shading; } diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp new file mode 100644 index 000000000..117353425 --- /dev/null +++ b/src/core/gpu_hw_texture_cache.cpp @@ -0,0 +1,1915 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#include "gpu_hw_texture_cache.h" +#include "gpu_hw.h" +#include "gpu_hw_shadergen.h" +#include "settings.h" +#include "system.h" + +#include "util/gpu_device.h" +#include "util/state_wrapper.h" + +#include "common/gsvector_formatter.h" +#include "common/log.h" +#include "common/string_util.h" + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" +#ifdef CPU_ARCH_SSE +#include "xxh_x86dispatch.h" +#endif + +#include +#include +#include + +Log_SetChannel(GPUTextureCache); + +// TODO: Fix copy-as-write. +// TODO: Write coalescing, xenogears. + +// #define ALWAYS_TRACK_VRAM_WRITES 1 + +namespace GPUTextureCache { +static constexpr u32 MAX_CLUT_SIZE = 256; + +struct VRAMWrite +{ + GSVector4i active_rect; + GSVector4i write_rect; + HashType hash; + + struct PaletteRecord + { + // TODO: Texture window, for sub texture dumping. + GSVector4i rect; + SourceKey key; + PaletteRecordFlags flags; + + // Awkward to store, but we need to keep a backup copy of each CLUT, because if the CLUT gets overwritten + // before the VRAM write, when we go to dump the texture, it'll be incorrect. + HashType palette_hash; + u16 palette[MAX_CLUT_SIZE]; + }; + + // List of palettes and rectangles drawn for dumping. + // TODO: Keep these in texel-local space, not global space, that way texture sizes aren't aligned to 4 pixels. + // But realistically, that probably isn't super common, and also requires modifying the renderer side of things. + std::vector palette_records; + + u32 num_splits; + u32 num_page_refs; + std::array, MAX_PAGE_REFS_PER_WRITE> page_refs; +}; + +struct PageEntry +{ + TList sources; + TList writes; // TODO: Split to own list + GSVector4i draw_rect; // NOTE: In global VRAM space. + bool is_drawn = false; // TODO: Split to bitset +}; + +struct HashCacheKey +{ + HashType texture_hash; + HashType palette_hash; + HashType mode; + + ALWAYS_INLINE bool operator==(const HashCacheKey& k) const + { + return (std::memcmp(&k, this, sizeof(HashCacheKey)) == 0); + } + ALWAYS_INLINE bool operator!=(const HashCacheKey& k) const + { + return (std::memcmp(&k, this, sizeof(HashCacheKey)) != 0); + } +}; +struct HashCacheKeyHash +{ + size_t operator()(const HashCacheKey& k) const; +}; + +struct HashCacheEntry +{ + std::unique_ptr texture; + u32 ref_count; + u32 last_used_frame; + TList sources; +}; + +using HashCache = std::unordered_map; + +template +ALWAYS_INLINE_RELEASE static void ListPrepend(TList* list, T* item, TListNode* item_node) +{ + item_node->ref = item; + item_node->list = list; + item_node->prev = nullptr; + if (list->tail) + { + item_node->next = list->head; + list->head->prev = item_node; + list->head = item_node; + } + else + { + item_node->next = nullptr; + list->head = item_node; + list->tail = item_node; + } +} + +template +ALWAYS_INLINE_RELEASE static void ListAppend(TList* list, T* item, TListNode* item_node) +{ + item_node->ref = item; + item_node->list = list; + item_node->next = nullptr; + if (list->tail) + { + item_node->prev = list->tail; + list->tail->next = item_node; + list->tail = item_node; + } + else + { + item_node->prev = nullptr; + list->head = item_node; + list->tail = item_node; + } +} + +template +ALWAYS_INLINE_RELEASE static void ListMoveToFront(TList* list, TListNode* item_node) +{ + DebugAssert(list->head); + if (!item_node->prev) + return; + + item_node->prev->next = item_node->next; + if (item_node->next) + item_node->next->prev = item_node->prev; + else + list->tail = item_node->prev; + + item_node->prev = nullptr; + list->head->prev = item_node; + item_node->next = list->head; + list->head = item_node; +} + +template +ALWAYS_INLINE_RELEASE static void ListUnlink(const TListNode& node) +{ + if (node.prev) + node.prev->next = node.next; + else + node.list->head = node.next; + if (node.next) + node.next->prev = node.prev; + else + node.list->tail = node.prev; +} + +template +ALWAYS_INLINE_RELEASE static void ListIterate(const TList& list, const F& f) +{ + for (const GPUTextureCache::TListNode* n = list.head; n;) + { + const GPUTextureCache::TListNode* tn = n; + n = n->next; + f(tn->ref); + } +} + +template +ALWAYS_INLINE_RELEASE static void ListIterateWithEarlyExit(const TList& list, const F& f) +{ + for (const GPUTextureCache::TListNode* n = list.head; n; n = n->next) + { + if (!f(n->ref)) + break; + } +} + +template +ALWAYS_INLINE_RELEASE static void LoopRectPages(u32 left, u32 top, u32 right, u32 bottom, const F& f) +{ + DebugAssert(right <= VRAM_WIDTH && bottom <= VRAM_HEIGHT); + DebugAssert((right - left) > 0 && (bottom - top) > 0); + + const u32 start_x = left / VRAM_PAGE_WIDTH; + const u32 end_x = (right - 1) / VRAM_PAGE_WIDTH; + const u32 start_y = top / VRAM_PAGE_HEIGHT; + const u32 end_y = (bottom - 1) / VRAM_PAGE_HEIGHT; + + u32 page_number = VRAMPageIndex(start_x, start_y); + for (u32 page_y = start_y; page_y <= end_y; page_y++) + { + u32 y_page_number = page_number; + + for (u32 page_x = start_x; page_x <= end_x; page_x++) + f(y_page_number++); + + page_number += VRAM_PAGES_WIDE; + } +} + +template +ALWAYS_INLINE_RELEASE static void LoopRectPagesWithEarlyExit(u32 left, u32 top, u32 right, u32 bottom, const F& f) +{ + DebugAssert(right <= VRAM_WIDTH && bottom <= VRAM_HEIGHT); + DebugAssert((right - left) > 0 && (bottom - top) > 0); + + const u32 start_x = left / VRAM_PAGE_WIDTH; + const u32 end_x = (right - 1) / VRAM_PAGE_WIDTH; + const u32 start_y = top / VRAM_PAGE_HEIGHT; + const u32 end_y = (bottom - 1) / VRAM_PAGE_HEIGHT; + + u32 page_number = VRAMPageIndex(start_x, start_y); + for (u32 page_y = start_y; page_y <= end_y; page_y++) + { + u32 y_page_number = page_number; + + for (u32 page_x = start_x; page_x <= end_x; page_x++) + { + if (!f(y_page_number++)) + return; + } + + page_number += VRAM_PAGES_WIDE; + } +} + +template +ALWAYS_INLINE_RELEASE static void LoopRectPages(const GSVector4i& rc, const F& f) +{ + LoopRectPages(rc.left, rc.top, rc.right, rc.bottom, f); +} + +template +ALWAYS_INLINE_RELEASE static void LoopRectPagesWithEarlyExit(const GSVector4i& rc, const F& f) +{ + LoopRectPagesWithEarlyExit(rc.left, rc.top, rc.right, rc.bottom, f); +} + +template +ALWAYS_INLINE_RELEASE static void LoopXWrappedPages(u32 page, u32 num_pages, const F& f) +{ + for (u32 i = 0; i < num_pages; i++) + f((page & VRAM_PAGE_Y_MASK) | ((page + i) & VRAM_PAGE_X_MASK)); +} + +ALWAYS_INLINE void DoStateVector(StateWrapper& sw, GSVector4i* vec) +{ + sw.DoBytes(vec->S32, sizeof(vec->S32)); +} + +static bool ShouldTrackVRAMWrites(); +static bool IsDumpingVRAMWriteTextures(); + +static bool CompilePipelines(); +static void DestroyPipelines(); + +static const Source* ReturnSource(Source* source, const GSVector4i uv_rect, PaletteRecordFlags flags); +static Source* CreateSource(SourceKey key); + +static HashCacheEntry* LookupHashCache(SourceKey key, HashType tex_hash, HashType pal_hash); +static void ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash, HashCacheEntry* entry); +static void RemoveFromHashCache(HashCache::iterator it); +static void ClearHashCache(); + +static HashType HashPage(u8 page, GPUTextureMode mode); +static HashType HashPalette(GPUTexturePaletteReg palette, GPUTextureMode mode); +static HashType HashPartialPalette(const u16* palette, u32 min, u32 max); + +static std::pair ReducePaletteBounds(const GSVector4i rect, GPUTextureMode mode, + GPUTexturePaletteReg palette); +static void SyncVRAMWritePaletteRecords(VRAMWrite* entry); +static void InitializeVRAMWritePaletteRecord(VRAMWrite::PaletteRecord* record, SourceKey source_key, + const GSVector4i rect, PaletteRecordFlags flags); +static void UpdateVRAMWriteSources(VRAMWrite* entry, SourceKey source_key, const GSVector4i global_uv_rect, + PaletteRecordFlags flags); +static void SplitVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect); +static void RemoveVRAMWrite(VRAMWrite* entry); +static void DumpTexturesFromVRAMWrite(VRAMWrite* entry); +static void DumpTextureFromPage(const Source* src); + +static void DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, u32 dest_stride); +static void DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, u32 dest_stride); +static void DecodeTexture16(const u16* page, u32 width, u32 height, u32* dest, u32 dest_stride); +static void DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture); + +static constexpr const GSVector4i& INVALID_RECT = GPU_HW::INVALID_RECT; +static constexpr const GPUTexture::Format REPLACEMENT_TEXTURE_FORMAT = GPUTexture::Format::RGBA8; + +// TODO: Pack in struct + +static HashCache s_hash_cache; +static size_t s_hash_cache_memory_usage = 0; +static size_t s_max_hash_cache_memory_usage = 1ULL * 1024ULL * 1024ULL * 1024ULL; // 2GB + +static std::array s_pages = {}; + +/// List of candidates for purging when the hash cache gets too large. +static std::vector> s_hash_cache_purge_list; + +/// List of VRAM writes collected when saving state. +static std::vector s_temp_vram_write_list; + +static std::unique_ptr s_replacement_texture_render_target; +static std::unique_ptr s_replacement_init_pipeline; +static std::unique_ptr s_replacement_draw_pipeline; // copies alpha as-is +static std::unique_ptr s_replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent) + +static bool s_track_vram_writes = false; + +} // namespace GPUTextureCache + +bool GPUTextureCache::ShouldTrackVRAMWrites() +{ +#ifdef ALWAYS_TRACK_VRAM_WRITES + return true; +#else + return (IsDumpingVRAMWriteTextures() || (g_settings.texture_replacements.enable_texture_replacements && + TextureReplacements::HasVRAMWriteTextureReplacements())); +#endif +} + +bool GPUTextureCache::IsDumpingVRAMWriteTextures() +{ + return (g_settings.texture_replacements.dump_textures && !TextureReplacements::GetConfig().dump_texture_pages); +} + +bool GPUTextureCache::Initialize() +{ + UpdateVRAMTrackingState(); + if (!CompilePipelines()) + return false; + + return true; +} + +void GPUTextureCache::UpdateSettings(const Settings& old_settings) +{ + UpdateVRAMTrackingState(); + + if (g_settings.texture_replacements.enable_texture_replacements != + old_settings.texture_replacements.enable_texture_replacements) + { + Invalidate(); + + DestroyPipelines(); + if (!CompilePipelines()) + Panic("Failed to compile pipelines on TC settings change"); + } +} + +bool GPUTextureCache::DoState(StateWrapper& sw, bool skip) +{ + if (sw.GetVersion() < 72) + { + if (!skip) + WARNING_LOG("Texture cache not in save state due to old version."); + + Invalidate(); + return true; + } + + if (!sw.DoMarker("GPUTextureCache")) + return false; + + if (sw.IsReading()) + { + if (!skip) + Invalidate(); + + u32 num_vram_writes = 0; + sw.Do(&num_vram_writes); + + const bool skip_writes = (skip || !s_track_vram_writes); + + for (u32 i = 0; i < num_vram_writes; i++) + { + static constexpr u32 PALETTE_RECORD_SIZE = sizeof(GSVector4i) + sizeof(SourceKey) + sizeof(PaletteRecordFlags) + + sizeof(HashType) + sizeof(u16) * MAX_CLUT_SIZE; + + if (skip_writes) + { + sw.SkipBytes(sizeof(GSVector4i) * 2 + sizeof(HashType)); + + u32 num_palette_records = 0; + sw.Do(&num_palette_records); + sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE); + } + else + { + VRAMWrite* vrw = new VRAMWrite(); + DoStateVector(sw, &vrw->active_rect); + DoStateVector(sw, &vrw->write_rect); + sw.Do(&vrw->hash); + + u32 num_palette_records = 0; + sw.Do(&num_palette_records); + + // Skip palette records if we're not dumping now. + if (g_settings.texture_replacements.dump_textures) + { + vrw->palette_records.reserve(num_palette_records); + for (u32 j = 0; j < num_palette_records; j++) + { + VRAMWrite::PaletteRecord& rec = vrw->palette_records.emplace_back(); + DoStateVector(sw, &rec.rect); + sw.DoBytes(&rec.key, sizeof(rec.key)); + sw.Do(&rec.flags); + sw.Do(&rec.palette_hash); + sw.DoBytes(rec.palette, sizeof(rec.palette)); + } + } + else + { + sw.SkipBytes(num_palette_records * PALETTE_RECORD_SIZE); + } + + if (sw.HasError()) + { + delete vrw; + Invalidate(); + return false; + } + + vrw->num_page_refs = 0; + LoopRectPages(vrw->active_rect, [vrw](u32 pn) { + DebugAssert(vrw->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, vrw, &vrw->page_refs[vrw->num_page_refs++]); + return true; + }); + } + } + } + else + { + s_temp_vram_write_list.clear(); + + if (!skip && s_track_vram_writes) + { + for (PageEntry& page : s_pages) + { + ListIterate(page.writes, [](VRAMWrite* vrw) { + if (std::find(s_temp_vram_write_list.begin(), s_temp_vram_write_list.end(), vrw) != + s_temp_vram_write_list.end()) + { + return; + } + + // try not to lose data... pull it from the sources + if (g_settings.texture_replacements.dump_textures) + SyncVRAMWritePaletteRecords(vrw); + + s_temp_vram_write_list.push_back(vrw); + }); + } + } + + u32 num_vram_writes = static_cast(s_temp_vram_write_list.size()); + sw.Do(&num_vram_writes); + for (VRAMWrite* vrw : s_temp_vram_write_list) + { + DoStateVector(sw, &vrw->active_rect); + DoStateVector(sw, &vrw->write_rect); + sw.Do(&vrw->hash); + + u32 num_palette_records = static_cast(vrw->palette_records.size()); + sw.Do(&num_palette_records); + for (VRAMWrite::PaletteRecord& rec : vrw->palette_records) + { + DoStateVector(sw, &rec.rect); + sw.DoBytes(&rec.key, sizeof(rec.key)); + sw.Do(&rec.flags); + sw.Do(&rec.palette_hash); + sw.DoBytes(rec.palette, sizeof(rec.palette)); + } + } + } + + return !sw.HasError(); +} + +void GPUTextureCache::Shutdown() +{ + Invalidate(); + ClearHashCache(); + DestroyPipelines(); + s_replacement_texture_render_target.reset(); + s_hash_cache_purge_list = {}; + s_temp_vram_write_list = {}; + s_track_vram_writes = false; +} + +bool GPUTextureCache::CompilePipelines() +{ + if (!g_settings.texture_replacements.enable_texture_replacements) + return true; + + GPUPipeline::GraphicsConfig plconfig = {}; + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.input_layout.vertex_attributes = {}; + plconfig.input_layout.vertex_stride = 0; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.geometry_shader = nullptr; + plconfig.SetTargetFormats(REPLACEMENT_TEXTURE_FORMAT); + + // Most flags don't matter here. + const GPUDevice::Features features = g_gpu_device->GetFeatures(); + GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), 1, 1, false, false, false, false, false, + features.dual_source_blend, features.framebuffer_fetch, false); + std::unique_ptr fullscreen_quad_vertex_shader = g_gpu_device->CreateShader( + GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateScreenQuadVertexShader()); + if (!fullscreen_quad_vertex_shader) + return false; + + plconfig.vertex_shader = fullscreen_quad_vertex_shader.get(); + + std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateCopyFragmentShader()); + if (!fs) + return false; + plconfig.fragment_shader = fs.get(); + if (!(s_replacement_init_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateReplacementMergeFragmentShader(false)); + if (!fs) + return false; + plconfig.fragment_shader = fs.get(); + if (!(s_replacement_draw_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateReplacementMergeFragmentShader(true)); + if (!fs) + return false; + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.fragment_shader = fs.get(); + if (!(s_replacement_semitransparent_draw_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + return true; +} + +void GPUTextureCache::DestroyPipelines() +{ + s_replacement_init_pipeline.reset(); + s_replacement_draw_pipeline.reset(); + s_replacement_semitransparent_draw_pipeline.reset(); +} + +void GPUTextureCache::AddDrawnRectangle(const GSVector4i rect) +{ + // TODO: This might be a bit slow... + LoopRectPages(rect, [&rect](u32 pn) { + PageEntry& page = s_pages[pn]; + const GSVector4i rc = rect.rintersect(VRAMPageRect(pn)); + if (page.is_drawn) + { + if (!page.draw_rect.rcontains(rc)) + { + page.draw_rect = page.draw_rect.runion(rc); + GL_INS_FMT("Page {} drawn rect is now {}", pn, page.draw_rect); + InvalidatePageSources(pn, page.draw_rect); + } + } + else + { + GL_INS_FMT("Page {} drawn rect is now {}", pn, rc); + page.draw_rect = rc; + page.is_drawn = true; + + // remove all sources, let them re-lookup if needed + InvalidatePageSources(pn, rc); + } + + for (TListNode* n = page.writes.head; n;) + { + VRAMWrite* it = n->ref; + n = n->next; + if (it->active_rect.rintersects(rect)) + RemoveVRAMWrite(it); + } + }); +} + +void GPUTextureCache::AddWrittenRectangle(const GSVector4i rect) +{ + LoopRectPages(rect, [&rect](u32 pn) { + PageEntry& page = s_pages[pn]; + InvalidatePageSources(pn, rect); + if (page.is_drawn) + { + const GSVector4i intersection = page.draw_rect.rintersect(rect); + if (intersection.eq(page.draw_rect)) + { + GL_INS_FMT("Clearing page {} draw rect due to write cover", pn); + page.is_drawn = false; + page.draw_rect = INVALID_RECT; + } + else if (!intersection.rempty()) + { + // I hate this. It's a hack for FF8, where it copies the framebuffer behind the HUD below the framebuffer, + // and copies it back to redraw the UI over it. If we toss the draw on any intersection, we lose the copied + // portion, since local memory is stale. + GSVector4i new_draw_rect = page.draw_rect; + if (((static_cast(intersection.width()) * 100) / static_cast(page.draw_rect.width())) >= 90) + new_draw_rect.y = intersection.w; + else if (((static_cast(intersection.height()) * 100) / static_cast(page.draw_rect.height())) >= 90) + new_draw_rect.x = intersection.z; + if (new_draw_rect.rempty()) + { + GL_INS_FMT("Clearing page {} draw rect due to write overlap", pn); + page.is_drawn = false; + page.draw_rect = INVALID_RECT; + } + else + { + GL_INS_FMT("Change page {} draw rect from {} to {} due to write overlap", pn, page.draw_rect, new_draw_rect); + page.draw_rect = new_draw_rect; + } + } + } + + for (TListNode* n = page.writes.head; n;) + { + VRAMWrite* it = n->ref; + n = n->next; + + const GSVector4i intersection = it->active_rect.rintersect(rect); + if (!intersection.rempty()) + { + if (it->num_splits < TextureReplacements::GetConfig().max_vram_write_splits && + !it->active_rect.eq(intersection)) + { + SplitVRAMWrite(it, intersection); + } + else + { + RemoveVRAMWrite(it); + } + } + } + }); +} + +void GPUTextureCache::AddCopiedRectanglePart1(const GSVector4i rect) +{ + LoopRectPages(rect, [&rect](u32 pn) { + PageEntry& page = s_pages[pn]; + InvalidatePageSources(pn, rect); + if (page.is_drawn) + { + if (page.draw_rect.rintersects(rect)) + { + GL_INS_FMT("Clearing page {} draw rect due to copy overlap", pn); + page.is_drawn = false; + page.draw_rect = INVALID_RECT; + } + } + + for (TListNode* n = page.writes.head; n;) + { + VRAMWrite* it = n->ref; + n = n->next; + if (it->active_rect.rintersects(rect)) + DumpTexturesFromVRAMWrite(it); + } + }); +} + +void GPUTextureCache::AddCopiedRectanglePart2(const GSVector4i rect) +{ + LoopRectPages(rect, [&rect](u32 pn) { + PageEntry& page = s_pages[pn]; + for (TListNode* n = page.writes.head; n;) + { + VRAMWrite* it = n->ref; + n = n->next; + if (it->write_rect.rintersects(rect)) + { + const HashType new_hash = HashRect(it->write_rect); + DEV_LOG("VRAM Copy {:016X} => {:016X}", it->hash, new_hash); + it->hash = new_hash; + } + } + }); +} + +[[maybe_unused]] ALWAYS_INLINE static TinyString SourceKeyToString(const GPUTextureCache::SourceKey& key) +{ + static constexpr const std::array texture_modes = { + {"Palette4Bit", "Palette8Bit", "Direct16Bit", "Reserved_Direct16Bit"}}; + + TinyString ret; + if (key.mode < GPUTextureMode::Direct16Bit) + { + ret.format("{} Page[{}] CLUT@[{},{}]", texture_modes[static_cast(key.mode)], key.page, key.palette.GetXBase(), + key.palette.GetYBase()); + } + else + { + ret.format("{} Page[{}]", texture_modes[static_cast(key.mode)], key.page); + } + return ret; +} + +[[maybe_unused]] ALWAYS_INLINE static TinyString SourceToString(const GPUTextureCache::Source* src) +{ + return SourceKeyToString(src->key); +} + +ALWAYS_INLINE_RELEASE static const u16* VRAMPagePointer(u32 pn) +{ + const u32 start_y = VRAMPageStartY(pn); + const u32 start_x = VRAMPageStartX(pn); + return &g_vram[start_y * VRAM_WIDTH + start_x]; +} + +ALWAYS_INLINE_RELEASE static const u16* VRAMPalettePointer(GPUTexturePaletteReg palette) +{ + return &g_vram[VRAM_WIDTH * palette.GetYBase() + palette.GetXBase()]; +} + +// TODO: Vectorize these with gather. +void GPUTextureCache::DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, + u32 dest_stride) +{ + if ((width % 4u) == 0) + { + const u32 vram_width = width / 4; + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + for (u32 x = 0; x < vram_width; x++) + { + const u32 pp = *(page_ptr++); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp & 0x0F]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[(pp >> 4) & 0x0F]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[(pp >> 8) & 0x0F]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp >> 12]); + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } + else + { + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + u32 offs = 0; + u16 texel = 0; + for (u32 x = 0; x < width; x++) + { + if (offs == 0) + texel = *(page_ptr++); + + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[texel & 0x0F]); + texel >>= 4; + + offs = (offs + 1) % 4; + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } +} +void GPUTextureCache::DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, + u32 dest_stride) +{ + if ((width % 2u) == 0) + { + const u32 vram_width = width / 2; + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + for (u32 x = 0; x < vram_width; x++) + { + const u32 pp = *(page_ptr++); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp & 0xFF]); + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[pp >> 8]); + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } + else + { + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + u32 offs = 0; + u16 texel = 0; + for (u32 x = 0; x < width; x++) + { + if (offs == 0) + texel = *(page_ptr++); + + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(palette[texel & 0xFF]); + texel >>= 8; + + offs ^= 1; + } + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } + } +} + +void GPUTextureCache::DecodeTexture16(const u16* page, u32 width, u32 height, u32* dest, u32 dest_stride) +{ + for (u32 y = 0; y < height; y++) + { + const u16* page_ptr = page; + u32* dest_ptr = dest; + + for (u32 x = 0; x < width; x++) + *(dest_ptr++) = VRAMRGBA5551ToRGBA8888(*(page_ptr++)); + + page += VRAM_WIDTH; + dest = reinterpret_cast(reinterpret_cast(dest) + dest_stride); + } +} + +void GPUTextureCache::DecodeTexture(GPUTextureMode mode, const u16* page_ptr, const u16* palette, u32* dest, + u32 dest_stride, u32 width, u32 height) +{ + switch (mode) + { + case GPUTextureMode::Palette4Bit: + DecodeTexture4(page_ptr, palette, width, height, dest, dest_stride); + break; + case GPUTextureMode::Palette8Bit: + DecodeTexture8(page_ptr, palette, width, height, dest, dest_stride); + break; + case GPUTextureMode::Direct16Bit: + case GPUTextureMode::Reserved_Direct16Bit: + DecodeTexture16(page_ptr, width, height, dest, dest_stride); + break; + + DefaultCaseIsUnreachable() + } +} + +void GPUTextureCache::DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture) +{ + alignas(16) static u32 s_temp_buffer[TEXTURE_PAGE_WIDTH * TEXTURE_PAGE_HEIGHT]; + + static constexpr bool DUMP = false; + + u32* tex_map; + u32 tex_stride; + const bool mapped = !DUMP && texture->Map(reinterpret_cast(&tex_map), &tex_stride, 0, 0, TEXTURE_PAGE_WIDTH, + TEXTURE_PAGE_HEIGHT); + if (!mapped) + { + tex_map = s_temp_buffer; + tex_stride = sizeof(u32) * TEXTURE_PAGE_WIDTH; + } + + const u16* page_ptr = VRAMPagePointer(page); + const u16* palette_ptr = TextureModeHasPalette(mode) ? VRAMPalettePointer(palette) : nullptr; + DecodeTexture(mode, page_ptr, palette_ptr, tex_map, tex_stride, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT); + + if constexpr (DUMP) + { + static u32 n = 0; + RGBA8Image image(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, tex_map); + image.SaveToFile(TinyString::from_format("D:\\dump\\hc_{}.png", ++n)); + } + + if (mapped) + texture->Unmap(); + else + texture->Update(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, tex_map, tex_stride); +} + +const GPUTextureCache::Source* GPUTextureCache::LookupSource(SourceKey key, const GSVector4i rect, + PaletteRecordFlags flags) +{ + GL_SCOPE_FMT("TC: Lookup source {}", SourceKeyToString(key)); + + TList& list = s_pages[key.page].sources; + for (TListNode* n = list.head; n; n = n->next) + { + if (n->ref->key == key) + { + GL_INS("TC: Source hit"); + ListMoveToFront(&list, n); + return ReturnSource(n->ref, rect, flags); + } + } + + return ReturnSource(CreateSource(key), rect, flags); +} + +const GPUTextureCache::Source* GPUTextureCache::ReturnSource(Source* source, const GSVector4i uv_rect, + PaletteRecordFlags flags) +{ +#ifdef _DEBUG + // GL_INS_FMT("Tex hash: {:016X}", source->texture_hash); + // GL_INS_FMT("Palette hash: {:016X}", source->palette_hash); + if (!uv_rect.eq(INVALID_RECT)) + { + LoopXWrappedPages(source->key.page, TexturePageCountForMode(source->key.mode), [&uv_rect](u32 pn) { + const PageEntry& pe = s_pages[pn]; + ListIterate(pe.writes, [&uv_rect](const VRAMWrite* vrw) { + if (const GSVector4i intersection = uv_rect.rintersect(vrw->write_rect); !intersection.rempty()) + GL_INS_FMT("TC: VRAM write was {:016X} ({})", vrw->hash, intersection); + }); + }); + if (TextureModeHasPalette(source->key.mode)) + GL_INS_FMT("TC: Palette was {:016X}", source->palette_hash); + } +#endif + + DebugAssert(source->from_hash_cache); + source->from_hash_cache->last_used_frame = System::GetFrameNumber(); + + // TODO: Cache var. + if (g_settings.texture_replacements.dump_textures) + { + source->active_uv_rect = source->active_uv_rect.runion(uv_rect); + source->palette_record_flags |= flags; + } + + return source; +} + +bool GPUTextureCache::IsPageDrawn(u32 page_index) +{ + return s_pages[page_index].is_drawn; +} + +bool GPUTextureCache::IsPageDrawn(u32 page_index, const GSVector4i rect) +{ + return s_pages[page_index].is_drawn && s_pages[page_index].draw_rect.rintersects(rect); +} + +bool GPUTextureCache::IsRectDrawn(const GSVector4i rect) +{ + // TODO: This is potentially hot, so replace it with an explicit loop over the pages instead. + bool drawn = false; + LoopRectPagesWithEarlyExit(rect, [&rect, &drawn](u32 pn) { + if (IsPageDrawn(pn, rect)) + { + drawn = true; + return false; + } + return true; + }); + return drawn; +} + +bool GPUTextureCache::AreSourcePagesDrawn(SourceKey key, const GSVector4i rect) +{ +#ifdef _DEBUG + { + const u32 shift = ((key.mode < GPUTextureMode::Direct16Bit) ? (2 - static_cast(key.mode)) : 0); + const GSVector4i vram_rect = rect.add32(GSVector4i(0, 0, (1 << shift) - 1, 0)).srl32(shift).blend32<0xa>(rect); + for (u32 offset = 0; offset < TexturePageCountForMode(key.mode); offset++) + { + const u32 wrapped_page = ((key.page + offset) & VRAM_PAGE_X_MASK) + (key.page & VRAM_PAGE_Y_MASK); + const GSVector4i page_rect = vram_rect.sub32(GSVector4i(offset * 64, 0, offset * 64, 0)); + if (IsPageDrawn(wrapped_page, page_rect)) + { + GL_INS_FMT("UV rect {} intersects page [{}] dirty rect {}, disabling TC", rect, wrapped_page, + s_pages[wrapped_page].draw_rect); + } + } + } +#endif + + switch (key.mode) + { + case GPUTextureMode::Palette4Bit: + { + const GSVector4i vram_rect = rect.add32(GSVector4i::cxpr(0, 0, 3, 0)).srl32<2>().blend32<0xa>(rect); + return IsPageDrawn(key.page, vram_rect); + } + + case GPUTextureMode::Palette8Bit: + { + // 2 P4 pages per P8 page. + const u32 yoffs = (key.page & VRAM_PAGE_Y_MASK); + const GSVector4i vram_rect = rect.add32(GSVector4i::cxpr(0, 0, 1, 0)).srl32<1>().blend32<0xa>(rect); + return (IsPageDrawn(key.page, vram_rect) || IsPageDrawn(((key.page + 1) & VRAM_PAGE_X_MASK) + yoffs, + vram_rect.sub32(GSVector4i::cxpr(64, 0, 64, 0)))); + } + + case GPUTextureMode::Direct16Bit: + case GPUTextureMode::Reserved_Direct16Bit: + { + // 4 P4 pages per C16 page. + const u32 yoffs = (key.page & VRAM_PAGE_Y_MASK); + return (IsPageDrawn(key.page, rect) || + IsPageDrawn(((key.page + 1) & VRAM_PAGE_X_MASK) + yoffs, rect.sub32(GSVector4i::cxpr(64, 0, 64, 0))) || + IsPageDrawn(((key.page + 2) & VRAM_PAGE_X_MASK) + yoffs, rect.sub32(GSVector4i::cxpr(128, 0, 128, 0))) || + IsPageDrawn(((key.page + 3) & VRAM_PAGE_X_MASK) + yoffs, rect.sub32(GSVector4i::cxpr(192, 0, 192, 0)))); + } + + DefaultCaseIsUnreachable() + } +} + +void GPUTextureCache::Invalidate() +{ + for (u32 i = 0; i < NUM_VRAM_PAGES; i++) + { + InvalidatePageSources(i); + + PageEntry& page = s_pages[i]; + page.is_drawn = false; + page.draw_rect = GSVector4i::zero(); + + while (page.writes.tail) + RemoveVRAMWrite(page.writes.tail->ref); + } + + // should all be null +#ifdef _DEBUG + for (u32 i = 0; i < NUM_VRAM_PAGES; i++) + DebugAssert(!s_pages[i].sources.head && !s_pages[i].sources.tail); +#endif + + ClearHashCache(); +} + +void GPUTextureCache::InvalidatePageSources(u32 pn) +{ + DebugAssert(pn < NUM_VRAM_PAGES); + + TList& ps = s_pages[pn].sources; + if (ps.head) + GL_INS_FMT("Invalidate page {} sources", pn); + + for (TListNode* n = ps.head; n;) + { + Source* src = n->ref; + n = n->next; + + DestroySource(src); + } + + DebugAssert(!ps.head && !ps.tail); +} + +void GPUTextureCache::InvalidatePageSources(u32 pn, const GSVector4i rc) +{ + DebugAssert(pn < NUM_VRAM_PAGES); + + TList& ps = s_pages[pn].sources; + for (TListNode* n = ps.head; n;) + { + Source* src = n->ref; + n = n->next; + + // TODO: Make faster? + if (!src->texture_rect.rintersects(rc) && + (src->key.mode == GPUTextureMode::Direct16Bit || !src->palette_rect.rintersects(rc))) + { + continue; + } + + GL_INS_FMT("Invalidate source {} in page {} due to overlapping with {}", SourceToString(src), pn, rc); + DestroySource(src); + } +} + +void GPUTextureCache::DestroySource(Source* src) +{ + GL_INS_FMT("Invalidate source {}", SourceToString(src)); + + if (g_settings.texture_replacements.dump_textures && !src->active_uv_rect.eq(INVALID_RECT)) + { + if (!TextureReplacements::GetConfig().dump_texture_pages) + { + // Find VRAM writes that overlap with this source + LoopRectPages(src->active_uv_rect, [src](const u32 pn) { + PageEntry& pg = s_pages[pn]; + ListIterate(pg.writes, [src](VRAMWrite* vw) { + UpdateVRAMWriteSources(vw, src->key, src->active_uv_rect, src->palette_record_flags); + }); + return true; + }); + } + else + { + DumpTextureFromPage(src); + } + } + + for (u32 i = 0; i < src->num_page_refs; i++) + ListUnlink(src->page_refs[i]); + + DebugAssert(src->from_hash_cache && src->from_hash_cache->ref_count > 0); + ListUnlink(src->hash_cache_ref); + src->from_hash_cache->ref_count--; + delete src; +} + +GPUTextureCache::Source* GPUTextureCache::CreateSource(SourceKey key) +{ + GL_INS_FMT("TC: Create source {}", SourceKeyToString(key)); + + const HashType tex_hash = HashPage(key.page, key.mode); + const HashType pal_hash = (key.mode < GPUTextureMode::Direct16Bit) ? HashPalette(key.palette, key.mode) : 0; + HashCacheEntry* hcentry = LookupHashCache(key, tex_hash, pal_hash); + if (!hcentry) + { + GL_INS("TC: Hash cache lookup fail?!"); + return nullptr; + } + + hcentry->ref_count++; + + Source* src = new Source(); + src->key = key; + src->num_page_refs = 0; + src->texture = hcentry->texture.get(); + src->from_hash_cache = hcentry; + ListAppend(&hcentry->sources, src, &src->hash_cache_ref); + src->texture_hash = tex_hash; + src->palette_hash = pal_hash; + + // Textures at front, CLUTs at back. + std::array page_refns; + const auto add_page_ref = [src, &page_refns](u32 pn) { + // Don't double up references + for (u32 i = 0; i < src->num_page_refs; i++) + { + if (page_refns[i] == pn) + return; + } + + const u32 ri = src->num_page_refs++; + page_refns[ri] = pn; + + ListPrepend(&s_pages[pn].sources, src, &src->page_refs[ri]); + }; + const auto add_page_ref_back = [src, &page_refns](u32 pn) { + // Don't double up references + for (u32 i = 0; i < src->num_page_refs; i++) + { + if (page_refns[i] == pn) + return; + } + + const u32 ri = src->num_page_refs++; + page_refns[ri] = pn; + + ListAppend(&s_pages[pn].sources, src, &src->page_refs[ri]); + }; + + src->texture_rect = GetTextureRect(key.page, key.mode); + src->active_uv_rect = INVALID_RECT; + LoopXWrappedPages(key.page, TexturePageCountForMode(key.mode), add_page_ref); + + if (key.mode < GPUTextureMode::Direct16Bit) + { + src->palette_rect = GetPaletteRect(key.palette, key.mode, true); + LoopXWrappedPages(PalettePageNumber(key.palette), PalettePageCountForMode(key.mode), add_page_ref_back); + } + + GL_INS_FMT("Appended new source {} to {} pages", SourceToString(src), src->num_page_refs); + return src; +} + +void GPUTextureCache::TrackVRAMWrite(const GSVector4i rect) +{ + if (!s_track_vram_writes) + return; + + VRAMWrite* it = new VRAMWrite(); + it->active_rect = rect; + it->write_rect = rect; + it->hash = HashRect(rect); + it->num_page_refs = 0; + LoopRectPages(rect, [it](u32 pn) { + DebugAssert(it->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, it, &it->page_refs[it->num_page_refs++]); + return true; + }); + + DEV_LOG("New VRAM write {:016X} at {} touching {} pages", it->hash, rect, it->num_page_refs); +} + +void GPUTextureCache::UpdateVRAMTrackingState() +{ + s_track_vram_writes = ShouldTrackVRAMWrites(); +} + +std::pair GPUTextureCache::ReducePaletteBounds(const GSVector4i rect, GPUTextureMode mode, + GPUTexturePaletteReg palette) +{ + DebugAssert(TextureModeHasPalette(mode)); + u32 pal_min = GetPaletteWidth(mode) - 1; + u32 pal_max = 0; + + const u32 rect_width = rect.width(); + const u32 rect_height = rect.height(); + + if (mode == GPUTextureMode::Palette4Bit) + { + for (u32 y = 0; y < rect_height; y++) + { + const u16* ptr = &g_vram[rect.y * VRAM_WIDTH + rect.x]; + for (u32 x = 0; x < rect_width; x++) + { + const u16 val = *(ptr++); + const u32 p0 = val & 0xf; + const u32 p1 = (val >> 4) & 0xf; + const u32 p2 = (val >> 8) & 0xf; + const u32 p3 = (val >> 12) & 0xf; + pal_min = std::min(pal_min, std::min(p0, std::min(p1, std::min(p2, p3)))); + pal_max = std::max(pal_max, std::max(p0, std::max(p1, std::max(p2, p3)))); + } + } + } + else // if (mode == GPUTextureMode::Palette8Bit) + { + const u32 aligned_width = Common::AlignDownPow2(rect_width, 8); + const u16* row_ptr = &g_vram[rect.y * VRAM_WIDTH + rect.x]; + for (u32 y = 0; y < rect_height; y++) + { + const u16* ptr = reinterpret_cast(row_ptr); + row_ptr += VRAM_WIDTH; + + if (aligned_width > 0) [[likely]] + { + GSVector4i min = GSVector4i::load(ptr); + GSVector4i max = min; + ptr += 8; + + for (u32 x = 8; x < aligned_width; x += 8) + { + const GSVector4i v = GSVector4i::load(ptr); + ptr += 8; + + min = min.min_u8(v); + max = max.max_u8(v); + } + + pal_min = std::min(pal_min, min.minv_u8()); + pal_max = std::max(pal_max, max.maxv_u8()); + } + + for (u32 x = aligned_width; x < rect_width; x++) + { + const u16 val = *(ptr++); + const u32 p0 = (val & 0xFF); + const u32 p1 = (val >> 8); + pal_min = std::min(pal_min, std::min(p0, p1)); + pal_max = std::max(pal_max, std::max(p0, p1)); + } + } + } + + // Clamp to VRAM bounds. + const u32 x_base = palette.GetXBase(); + if ((x_base + pal_max) >= VRAM_WIDTH) [[unlikely]] + { + WARNING_LOG("Texture with CLUT at {},{} is outside of VRAM bounds, clamping.", x_base, palette.GetYBase()); + pal_min = std::min(pal_min, VRAM_WIDTH - x_base - 1); + pal_max = std::min(pal_max, VRAM_WIDTH - x_base - 1); + } + + return std::make_pair(pal_min, pal_max); +} + +void GPUTextureCache::SyncVRAMWritePaletteRecords(VRAMWrite* entry) +{ + // Have to go through any sources that intersect this write, because they may not have been invalidated yet, in which + // case the active rect also will not have been updated. + if (IsDumpingVRAMWriteTextures()) + { + LoopRectPages(entry->active_rect, [entry](const u32 pn) { + const PageEntry& page = s_pages[pn]; + ListIterate(page.sources, [entry](const Source* src) { + if (!src->active_uv_rect.eq(INVALID_RECT)) + UpdateVRAMWriteSources(entry, src->key, src->active_uv_rect, src->palette_record_flags); + }); + + return true; + }); + } +} + +void GPUTextureCache::UpdateVRAMWriteSources(VRAMWrite* entry, SourceKey source_key, const GSVector4i global_uv_rect, + PaletteRecordFlags flags) +{ + // convert to VRAM write space + const GSVector4i write_intersection = entry->active_rect.rintersect(global_uv_rect); + if (write_intersection.rempty()) + return; + + // Add to the palette tracking list + auto iter = std::find_if(entry->palette_records.begin(), entry->palette_records.end(), + [&source_key](const auto& it) { return (it.key == source_key); }); + if (iter != entry->palette_records.end()) + { + iter->rect = iter->rect.runion(write_intersection); + iter->flags |= flags; + } + else + { + InitializeVRAMWritePaletteRecord(&entry->palette_records.emplace_back(), source_key, write_intersection, flags); + } +} + +void GPUTextureCache::SplitVRAMWrite(VRAMWrite* entry, const GSVector4i written_rect) +{ + SyncVRAMWritePaletteRecords(entry); + + const s32 to_left = (written_rect.left - entry->active_rect.left); + const s32 to_right = (entry->active_rect.right - written_rect.right); + const s32 to_top = (written_rect.top - entry->active_rect.top); + const s32 to_bottom = (entry->active_rect.bottom - written_rect.bottom); + DebugAssert(to_left > 0 || to_right > 0 || to_top > 0 || to_bottom > 0); + + entry->num_splits++; + + GSVector4i rects[4]; + + // TODO: more efficient vector swizzle + if (std::max(to_top, to_bottom) > std::max(to_left, to_right)) + { + // split top/bottom, then left/right + rects[0] = GSVector4i(entry->active_rect.left, entry->active_rect.top, entry->active_rect.right, written_rect.top); + rects[1] = + GSVector4i(entry->active_rect.left, written_rect.bottom, entry->active_rect.right, entry->active_rect.bottom); + rects[2] = GSVector4i(entry->active_rect.left, entry->active_rect.top + to_top, entry->active_rect.left + to_left, + entry->active_rect.bottom - to_bottom); + rects[3] = GSVector4i(entry->active_rect.right - to_right, entry->active_rect.top + to_top, + entry->active_rect.right, entry->active_rect.bottom - to_bottom); + } + else + { + // split left/right, then top/bottom + rects[0] = + GSVector4i(entry->active_rect.left, entry->active_rect.top, written_rect.left, entry->active_rect.bottom); + rects[1] = + GSVector4i(written_rect.right, entry->active_rect.top, entry->active_rect.right, entry->active_rect.bottom); + rects[2] = GSVector4i(entry->active_rect.left + to_left, entry->active_rect.top + to_top, + written_rect.right - to_right, entry->active_rect.top - to_top); + rects[3] = GSVector4i(entry->active_rect.left + to_left, entry->active_rect.bottom - to_bottom, + written_rect.right - to_right, entry->active_rect.bottom); + } + + for (size_t i = 0; i < std::size(rects); i++) + { + const GSVector4i splitr = rects[i]; + if (splitr.rempty()) + continue; + + VRAMWrite* it = new VRAMWrite(); + it->write_rect = entry->write_rect; + it->active_rect = splitr; + it->hash = entry->hash; + it->num_splits = entry->num_splits; + it->num_page_refs = 0; + + // TODO: We probably want to share this... + it->palette_records.reserve(entry->palette_records.size()); + for (const VRAMWrite::PaletteRecord& prec : it->palette_records) + { + if (prec.rect.rintersects(splitr)) + it->palette_records.push_back(prec); + } + + LoopRectPages(splitr, [it](u32 pn) { + DebugAssert(it->num_page_refs < MAX_PAGE_REFS_PER_WRITE); + ListAppend(&s_pages[pn].writes, it, &it->page_refs[it->num_page_refs++]); + return true; + }); + + DEV_LOG("Split VRAM write {:016X} at {} in direction {} => {}", it->hash, entry->active_rect, i, splitr); + } + + for (u32 i = 0; i < entry->num_page_refs; i++) + ListUnlink(entry->page_refs[i]); + + delete entry; +} + +void GPUTextureCache::RemoveVRAMWrite(VRAMWrite* entry) +{ + DEV_LOG("Remove VRAM write {:016X} at {}", entry->hash, entry->write_rect); + + SyncVRAMWritePaletteRecords(entry); + + if (entry->num_splits > 0 && !entry->palette_records.empty()) + { + // Combine palette records with another write. + VRAMWrite* other_write = nullptr; + LoopRectPagesWithEarlyExit(entry->write_rect, [&entry, &other_write](u32 pn) { + PageEntry& pg = s_pages[pn]; + ListIterateWithEarlyExit(pg.writes, [&entry, &other_write](VRAMWrite* cur) { + if (cur->hash != entry->hash) + return true; + + other_write = cur; + return false; + }); + return (other_write == nullptr); + }); + if (other_write) + { + for (const VRAMWrite::PaletteRecord& prec : entry->palette_records) + { + const auto iter = std::find_if(other_write->palette_records.begin(), other_write->palette_records.end(), + [&prec](const VRAMWrite::PaletteRecord& it) { return it.key == prec.key; }); + if (iter != other_write->palette_records.end()) + iter->rect = iter->rect.runion(prec.rect); + else + other_write->palette_records.push_back(prec); + } + + // No dumping from here! + entry->palette_records.clear(); + } + } + + for (u32 i = 0; i < entry->num_page_refs; i++) + ListUnlink(entry->page_refs[i]); + + DumpTexturesFromVRAMWrite(entry); + + delete entry; +} + +void GPUTextureCache::DumpTexturesFromVRAMWrite(VRAMWrite* entry) +{ + if (g_settings.texture_replacements.dump_textures && !TextureReplacements::GetConfig().dump_texture_pages) + { + for (const VRAMWrite::PaletteRecord& prec : entry->palette_records) + { + if (prec.key.mode == GPUTextureMode::Direct16Bit && !TextureReplacements::GetConfig().dump_c16_textures) + continue; + + HashType pal_hash = + (prec.key.mode < GPUTextureMode::Direct16Bit) ? HashPalette(prec.key.palette, prec.key.mode) : 0; + + // If it's 8-bit, try reducing the range of the palette. + u32 pal_min = 0, pal_max = prec.key.HasPalette() ? (GetPaletteWidth(prec.key.mode) - 1) : 0; + if (prec.key.HasPalette() && TextureReplacements::GetConfig().reduce_palette_range) + { + std::tie(pal_min, pal_max) = ReducePaletteBounds(prec.rect, prec.key.mode, prec.key.palette); + pal_hash = HashPartialPalette(prec.palette, pal_min, pal_max); + } + + const u32 offset_x = ApplyTextureModeShift(prec.key.mode, prec.rect.left - entry->write_rect.left); + const u32 offset_y = prec.rect.top - entry->write_rect.top; + + TextureReplacements::DumpTexture(TextureReplacements::ReplacementType::TextureFromVRAMWrite, offset_x, offset_y, + entry->write_rect.width(), entry->write_rect.height(), prec.key.mode, + entry->hash, pal_hash, pal_min, pal_max, prec.palette, prec.rect, prec.flags); + } + } +} + +void GPUTextureCache::DumpTextureFromPage(const Source* src) +{ + // C16 filter + if (!TextureReplacements::GetConfig().dump_c16_textures && src->key.mode >= GPUTextureMode::Direct16Bit) + return; + + const bool dump_full_page = TextureReplacements::GetConfig().dump_full_texture_pages; + + // Dump active area from page + HashType pal_hash = src->palette_hash; + const u16* pal_ptr = src->key.HasPalette() ? VRAMPalettePointer(src->key.palette) : nullptr; + + // We don't want to dump the wraparound + const GSVector4i unwrapped_texture_rect = + (TexturePageIsWrapping(src->key.mode, src->key.page) ? + GSVector4i(VRAMPageStartX(src->key.page), src->texture_rect.y, VRAM_WIDTH, src->texture_rect.w) : + src->texture_rect); + const GSVector4i dump_rect = + dump_full_page ? unwrapped_texture_rect : src->active_uv_rect.rintersect(unwrapped_texture_rect); + if (dump_rect.rempty()) + return; + + // Need to hash only the active area. + const HashType tex_hash = HashRect(dump_rect); + + // Source rect needs the offset, but we still only want to hash the active area when replacing + const GSVector4i dump_offset_in_page = dump_rect.sub32(unwrapped_texture_rect); + + // If it's 8-bit, try reducing the range of the palette. + u32 pal_min = 0, pal_max = src->key.HasPalette() ? (GetPaletteWidth(src->key.mode) - 1) : 0; + if (src->key.HasPalette() && TextureReplacements::GetConfig().reduce_palette_range) + { + std::tie(pal_min, pal_max) = ReducePaletteBounds(dump_rect, src->key.mode, src->key.palette); + pal_hash = HashPartialPalette(pal_ptr, pal_min, pal_max); + } + + TextureReplacements::DumpTexture(TextureReplacements::ReplacementType::TextureFromPage, + ApplyTextureModeShift(src->key.mode, dump_offset_in_page.x), dump_offset_in_page.y, + unwrapped_texture_rect.width(), unwrapped_texture_rect.height(), src->key.mode, + tex_hash, pal_hash, pal_min, pal_max, pal_ptr, dump_rect, src->palette_record_flags); +} + +GPUTextureCache::HashType GPUTextureCache::HashPage(u8 page, GPUTextureMode mode) +{ + XXH3_state_t state; + XXH3_64bits_reset(&state); + + // Pages aren't contiguous in memory :( + const u16* page_ptr = VRAMPagePointer(page); + + switch (mode) + { + case GPUTextureMode::Palette4Bit: + { + for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++) + { + XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * sizeof(u16)); + page_ptr += VRAM_WIDTH; + } + } + break; + + case GPUTextureMode::Palette8Bit: + { + for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++) + { + XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * 2 * sizeof(u16)); + page_ptr += VRAM_WIDTH; + } + } + break; + + case GPUTextureMode::Direct16Bit: + { + for (u32 y = 0; y < VRAM_PAGE_HEIGHT; y++) + { + XXH3_64bits_update(&state, page_ptr, VRAM_PAGE_WIDTH * 4 * sizeof(u16)); + page_ptr += VRAM_WIDTH; + } + } + break; + + DefaultCaseIsUnreachable() + } + + return XXH3_64bits_digest(&state); +} + +GPUTextureCache::HashType GPUTextureCache::HashPalette(GPUTexturePaletteReg palette, GPUTextureMode mode) +{ + const u32 x_base = palette.GetXBase(); + const u16* base = VRAMPalettePointer(palette); + + switch (mode) + { + case GPUTextureMode::Palette4Bit: + return XXH3_64bits(base, sizeof(u16) * 16); + + case GPUTextureMode::Palette8Bit: + { + // If the palette wraps around, chances are we aren't using those indices. + // Games that do this: Metal Gear Solid. + if ((x_base + 256) > VRAM_WIDTH) [[unlikely]] + return XXH3_64bits(base, sizeof(u16) * (VRAM_WIDTH - x_base)); + else + return XXH3_64bits(base, sizeof(u16) * 256); + } + + DefaultCaseIsUnreachable() + } +} + +GPUTextureCache::HashType GPUTextureCache::HashPartialPalette(GPUTexturePaletteReg palette, GPUTextureMode mode, + u32 min, u32 max) +{ + DebugAssert((palette.GetXBase() + max + 1) <= VRAM_WIDTH); + return HashPartialPalette(VRAMPalettePointer(palette), min, max); +} + +GPUTextureCache::HashType GPUTextureCache::HashPartialPalette(const u16* palette, u32 min, u32 max) +{ + const u32 size = max - min + 1; + return XXH3_64bits(palette, sizeof(u16) * size); +} + +GPUTextureCache::HashType GPUTextureCache::HashRect(const GSVector4i rc) +{ + XXH3_state_t state; + XXH3_64bits_reset(&state); + + const u32 width = rc.width(); + const u32 height = rc.height(); + const u16* ptr = &g_vram[rc.top * VRAM_WIDTH + rc.left]; + for (u32 y = 0; y < height; y++) + { + XXH3_64bits_update(&state, ptr, width * sizeof(u16)); + ptr += VRAM_WIDTH; + } + + return XXH3_64bits_digest(&state); +} + +void GPUTextureCache::InitializeVRAMWritePaletteRecord(VRAMWrite::PaletteRecord* record, SourceKey source_key, + const GSVector4i rect, PaletteRecordFlags flags) +{ + record->rect = rect; + record->key = source_key; + record->flags = flags; + + switch (source_key.mode) + { + case GPUTextureMode::Palette4Bit: + { + // Always has 16 colours. + std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), 16 * sizeof(u16)); + record->palette_hash = XXH3_64bits(record->palette, 16 * sizeof(u16)); + } + break; + + case GPUTextureMode::Palette8Bit: + { + // Might have less if we're extending over the edge. Clamp it. + const u32 pal_width = std::min(256, VRAM_WIDTH - source_key.palette.GetXBase()); + if (pal_width != 256) + { + std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), pal_width * sizeof(u16)); + std::memset(&record->palette[pal_width], 0, sizeof(record->palette) - (pal_width * sizeof(u16))); + record->palette_hash = XXH3_64bits(record->palette, pal_width * sizeof(u16)); + } + else + { + // Whole thing, 2ez. + std::memcpy(record->palette, VRAMPalettePointer(source_key.palette), 256 * sizeof(u16)); + record->palette_hash = XXH3_64bits(record->palette, 256 * sizeof(u16)); + } + } + break; + + case GPUTextureMode::Direct16Bit: + { + // No palette. + std::memset(record->palette, 0, sizeof(record->palette)); + record->palette_hash = 0; + } + break; + + DefaultCaseIsUnreachable() + } +} + +GPUTextureCache::HashCacheEntry* GPUTextureCache::LookupHashCache(SourceKey key, HashType tex_hash, HashType pal_hash) +{ + const HashCacheKey hkey = {tex_hash, pal_hash, static_cast(key.mode)}; + + const auto it = s_hash_cache.find(hkey); + if (it != s_hash_cache.end()) + { + GL_INS_FMT("TC: Hash cache hit {:X} {:X}", hkey.texture_hash, hkey.palette_hash); + return &it->second; + } + + GL_INS_FMT("TC: Hash cache miss {:X} {:X}", hkey.texture_hash, hkey.palette_hash); + + HashCacheEntry entry; + entry.ref_count = 0; + entry.last_used_frame = 0; + entry.sources = {}; + entry.texture = g_gpu_device->FetchTexture(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, 1, 1, 1, + GPUTexture::Type::Texture, GPUTexture::Format::RGBA8); + if (!entry.texture) + { + ERROR_LOG("Failed to create texture."); + return nullptr; + } + + DecodeTexture(key.page, key.palette, key.mode, entry.texture.get()); + + if (g_settings.texture_replacements.enable_texture_replacements) + ApplyTextureReplacements(key, tex_hash, pal_hash, &entry); + + s_hash_cache_memory_usage += entry.texture->GetVRAMUsage(); + + return &s_hash_cache.emplace(hkey, std::move(entry)).first->second; +} + +void GPUTextureCache::RemoveFromHashCache(HashCache::iterator it) +{ + ListIterate(it->second.sources, [](Source* source) { DestroySource(source); }); + + const size_t vram_usage = it->second.texture->GetVRAMUsage(); + DebugAssert(s_hash_cache_memory_usage >= vram_usage); + s_hash_cache_memory_usage -= vram_usage; + + g_gpu_device->RecycleTexture(std::move(it->second.texture)); + s_hash_cache.erase(it); +} + +void GPUTextureCache::ClearHashCache() +{ + while (!s_hash_cache.empty()) + RemoveFromHashCache(s_hash_cache.begin()); +} + +void GPUTextureCache::Compact() +{ + // Number of frames before unused hash cache entries are evicted. + static constexpr u32 MAX_HASH_CACHE_AGE = 600; + + // Maximum number of textures which are permitted in the hash cache at the end of the frame. + static constexpr u32 MAX_HASH_CACHE_SIZE = 500; + + bool might_need_cache_purge = + (s_hash_cache.size() > MAX_HASH_CACHE_SIZE || s_hash_cache_memory_usage >= s_max_hash_cache_memory_usage); + if (might_need_cache_purge) + s_hash_cache_purge_list.clear(); + + const u32 frame_number = System::GetFrameNumber(); + const u32 min_frame_number = ((frame_number > MAX_HASH_CACHE_AGE) ? (frame_number - MAX_HASH_CACHE_AGE) : 0); + + for (auto it = s_hash_cache.begin(); it != s_hash_cache.end();) + { + HashCacheEntry& e = it->second; + if (e.ref_count == 0 && e.last_used_frame < min_frame_number) + { + RemoveFromHashCache(it++); + continue; + } + + // We might free up enough just with "normal" removals above. + if (might_need_cache_purge) + { + might_need_cache_purge = + (s_hash_cache.size() > MAX_HASH_CACHE_SIZE || s_hash_cache_memory_usage >= s_max_hash_cache_memory_usage); + if (might_need_cache_purge) + s_hash_cache_purge_list.emplace_back(it, static_cast(e.last_used_frame)); + } + + ++it; + } + + // Pushing to a list, sorting, and removing ends up faster than re-iterating the map. + if (might_need_cache_purge) + { + std::sort(s_hash_cache_purge_list.begin(), s_hash_cache_purge_list.end(), + [](const auto& lhs, const auto& rhs) { return lhs.second < rhs.second; }); + + size_t purge_index = 0; + while (s_hash_cache.size() > MAX_HASH_CACHE_SIZE || s_hash_cache_memory_usage >= s_max_hash_cache_memory_usage) + { + if (purge_index == s_hash_cache_purge_list.size()) + { + WARNING_LOG("Cannot find hash cache entries to purge, current hash cache size is {} MB in {} textures.", + static_cast(s_hash_cache_memory_usage) / 1048576.0, s_hash_cache.size()); + break; + } + + RemoveFromHashCache(s_hash_cache_purge_list[purge_index++].first); + } + } +} + +size_t GPUTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& k) const +{ + std::size_t h = 0; + hash_combine(h, k.texture_hash, k.palette_hash, k.mode); + return h; +} + +void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash, + HashCacheEntry* entry) +{ + std::vector subimages; + if (TextureReplacements::HasTexturePageTextureReplacements()) + { + TextureReplacements::GetTexturePageTextureReplacements(subimages, key.page, tex_hash, pal_hash, key.mode, + key.palette); + } + + if (TextureReplacements::HasVRAMWriteTextureReplacements()) + { + const GSVector4i page_rect = VRAMPageRect(key.page); + LoopRectPages(page_rect, [&key, &pal_hash, &subimages, &page_rect](u32 pn) { + const PageEntry& page = s_pages[pn]; + ListIterate(page.writes, [&key, &pal_hash, &subimages, &page_rect](const VRAMWrite* vrw) { + // TODO: Is this needed? + if (!vrw->write_rect.rintersects(page_rect)) + return; + + // Map VRAM write to the start of the page. + GSVector2i offset_to_page = page_rect.sub32(vrw->write_rect).xy(); + + // Need to apply the texture shift on the X dimension, not Y. No SLLV on SSE4.. :( + offset_to_page.x = ApplyTextureModeShift(key.mode, offset_to_page.x); + + TextureReplacements::GetVRAMWriteTextureReplacements(subimages, vrw->hash, pal_hash, key.mode, key.palette, + offset_to_page); + }); + }); + } + + if (subimages.empty()) + return; + + float max_scale_x = subimages[0].scale_x, max_scale_y = subimages[0].scale_y; + for (size_t i = 0; i < subimages.size(); i++) + { + max_scale_x = std::max(max_scale_x, subimages[i].scale_x); + max_scale_y = std::max(max_scale_y, subimages[i].scale_y); + } + + // Clamp to max texture size + const float max_possible_scale = + static_cast(g_gpu_device->GetMaxTextureSize()) / static_cast(TEXTURE_PAGE_WIDTH); + max_scale_x = std::min(max_scale_x, max_possible_scale); + max_scale_y = std::min(max_scale_y, max_possible_scale); + + const GSVector4 max_scale_v = GSVector4(max_scale_x, max_scale_y).xyxy(); + GPUSampler* filter = TextureReplacements::GetConfig().replacement_scale_linear_filter ? + g_gpu_device->GetLinearSampler() : + g_gpu_device->GetNearestSampler(); + + const u32 new_width = static_cast(std::ceil(static_cast(TEXTURE_PAGE_WIDTH) * max_scale_x)); + const u32 new_height = static_cast(std::ceil(static_cast(TEXTURE_PAGE_HEIGHT) * max_scale_y)); + if (!s_replacement_texture_render_target || s_replacement_texture_render_target->GetWidth() < new_width || + s_replacement_texture_render_target->GetHeight() < new_height) + { + // NOTE: Not recycled, it's unlikely to be reused. + s_replacement_texture_render_target.reset(); + if (!(s_replacement_texture_render_target = g_gpu_device->CreateTexture( + new_width, new_height, 1, 1, 1, GPUTexture::Type::RenderTarget, REPLACEMENT_TEXTURE_FORMAT))) + { + ERROR_LOG("Failed to create {}x{} render target.", new_width, new_height); + return; + } + } + + // Grab the actual texture beforehand, in case we OOM. + std::unique_ptr replacement_tex = + g_gpu_device->FetchTexture(new_width, new_height, 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT); + if (!replacement_tex) + { + ERROR_LOG("Failed to create {}x{} texture.", new_width, new_height); + return; + } + + // TODO: This is AWFUL. Need a better way. + // Linear filtering is also wrong, it should do hard edges for 0000 pixels. + // We could just copy this from the original image... + static constexpr const float u_src_rect[4] = {0.0f, 0.0f, 1.0f, 1.0f}; + g_gpu_device->InvalidateRenderTarget(s_replacement_texture_render_target.get()); + g_gpu_device->SetRenderTarget(s_replacement_texture_render_target.get()); + g_gpu_device->SetViewportAndScissor(0, 0, new_width, new_height); + g_gpu_device->SetPipeline(s_replacement_init_pipeline.get()); + g_gpu_device->PushUniformBuffer(u_src_rect, sizeof(u_src_rect)); + g_gpu_device->SetTextureSampler(0, entry->texture.get(), filter); + g_gpu_device->Draw(3, 0); + + for (const TextureReplacements::ReplacementSubImage& si : subimages) + { + const auto temp_texture = g_gpu_device->FetchAutoRecycleTexture( + si.image.GetWidth(), si.image.GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, REPLACEMENT_TEXTURE_FORMAT, + si.image.GetPixels(), si.image.GetPitch()); + if (!temp_texture) + continue; + + const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v); + g_gpu_device->SetViewportAndScissor(dst_rect); + g_gpu_device->SetTextureSampler(0, temp_texture.get(), filter); + g_gpu_device->SetPipeline(si.invert_alpha ? s_replacement_semitransparent_draw_pipeline.get() : + s_replacement_draw_pipeline.get()); + g_gpu_device->Draw(3, 0); + } + + g_gpu_device->CopyTextureRegion(replacement_tex.get(), 0, 0, 0, 0, s_replacement_texture_render_target.get(), 0, 0, 0, + 0, new_width, new_height); + g_gpu_device->RecycleTexture(std::move(entry->texture)); + entry->texture = std::move(replacement_tex); + + g_gpu->RestoreDeviceContext(); +} \ No newline at end of file diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h new file mode 100644 index 000000000..3e3b78a76 --- /dev/null +++ b/src/core/gpu_hw_texture_cache.h @@ -0,0 +1,137 @@ +// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin +// SPDX-License-Identifier: CC-BY-NC-ND-4.0 + +#pragma once + +#include "gpu_types.h" +#include "texture_replacements.h" + +class GPUTexture; +class StateWrapper; + +struct Settings; + +////////////////////////////////////////////////////////////////////////// +// Texture Cache +////////////////////////////////////////////////////////////////////////// +namespace GPUTextureCache { + +/// 4 pages in C16 mode, 2+4 pages in P8 mode, 1+1 pages in P4 mode. +static constexpr u32 MAX_PAGE_REFS_PER_SOURCE = 6; + +static constexpr u32 MAX_PAGE_REFS_PER_WRITE = 32; + +enum class PaletteRecordFlags : u32 +{ + None = 0, + HasSemiTransparentDraws = (1 << 0), +}; +IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(PaletteRecordFlags); + +using HashType = u64; + +struct Source; +struct HashCacheEntry; + +template +struct TList; +template +struct TListNode; + +template +struct TList +{ + TListNode* head; + TListNode* tail; +}; + +template +struct TListNode +{ + // why inside itself? because we have 3 lists + T* ref; + TList* list; + TListNode* prev; + TListNode* next; +}; + +struct SourceKey +{ + u8 page; + GPUTextureMode mode; + GPUTexturePaletteReg palette; + + SourceKey() = default; + ALWAYS_INLINE constexpr SourceKey(u8 page_, GPUTexturePaletteReg palette_, GPUTextureMode mode_) + : page(page_), mode(mode_), palette(palette_) + { + } + ALWAYS_INLINE constexpr SourceKey(const SourceKey& k) : page(k.page), mode(k.mode), palette(k.palette) {} + + ALWAYS_INLINE bool HasPalette() const { return (mode < GPUTextureMode::Direct16Bit); } + + ALWAYS_INLINE SourceKey& operator=(const SourceKey& k) + { + page = k.page; + mode = k.mode; + palette.bits = k.palette.bits; + return *this; + } + + ALWAYS_INLINE bool operator==(const SourceKey& k) const { return (std::memcmp(&k, this, sizeof(SourceKey)) == 0); } + ALWAYS_INLINE bool operator!=(const SourceKey& k) const { return (std::memcmp(&k, this, sizeof(SourceKey)) != 0); } +}; +static_assert(sizeof(SourceKey) == 4); + +// TODO: Pool objects +struct Source +{ + SourceKey key; + u32 num_page_refs; + GPUTexture* texture; + HashCacheEntry* from_hash_cache; + GSVector4i texture_rect; + GSVector4i palette_rect; + HashType texture_hash; + HashType palette_hash; + GSVector4i active_uv_rect; + PaletteRecordFlags palette_record_flags; + + std::array, MAX_PAGE_REFS_PER_SOURCE> page_refs; + TListNode hash_cache_ref; +}; + +bool Initialize(); +void UpdateSettings(const Settings& old_settings); +bool DoState(StateWrapper& sw, bool skip); +void Shutdown(); + +void Invalidate(); + +void AddWrittenRectangle(const GSVector4i rect); +void AddCopiedRectanglePart1(const GSVector4i rect); // TODO: Rename this shit +void AddCopiedRectanglePart2(const GSVector4i rect); +void AddDrawnRectangle(const GSVector4i rect); + +void TrackVRAMWrite(const GSVector4i rect); +void UpdateVRAMTrackingState(); + +const Source* LookupSource(SourceKey key, const GSVector4i uv_rect, PaletteRecordFlags flags); + +bool IsPageDrawn(u32 page_index); +bool IsPageDrawn(u32 page_index, const GSVector4i rect); +bool IsRectDrawn(const GSVector4i rect); +bool AreSourcePagesDrawn(SourceKey key, const GSVector4i rect); + +void InvalidatePageSources(u32 pn); +void InvalidatePageSources(u32 pn, const GSVector4i rc); +void DestroySource(Source* src); + +void Compact(); + +void DecodeTexture(GPUTextureMode mode, const u16* page_ptr, const u16* palette, u32* dest, u32 dest_stride, u32 width, + u32 height); +HashType HashPartialPalette(GPUTexturePaletteReg palette, GPUTextureMode mode, u32 min, u32 max); +HashType HashRect(const GSVector4i rc); + +} // namespace GPUTextureCache diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index f9ba57035..4bbc9368f 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_sw.h" +#include "gpu_hw_texture_cache.h" #include "system.h" #include "util/gpu_device.h" @@ -69,7 +70,11 @@ bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di m_backend.Sync(true); // ignore the host texture for software mode, since we want to save vram here - return GPU::DoState(sw, nullptr, update_display); + if (!GPU::DoState(sw, nullptr, update_display)) + return false; + + // need to still call the TC, to toss any data in the state + return GPUTextureCache::DoState(sw, true); } void GPU_SW::Reset(bool clear_vram) diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index a6483a079..180c0e7ae 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -26,7 +26,15 @@ enum : u32 GPU_MAX_DISPLAY_WIDTH = 720, GPU_MAX_DISPLAY_HEIGHT = 576, - DITHER_MATRIX_SIZE = 4 + DITHER_MATRIX_SIZE = 4, + + VRAM_PAGE_WIDTH = 64, + VRAM_PAGE_HEIGHT = 256, + VRAM_PAGES_WIDE = VRAM_WIDTH / VRAM_PAGE_WIDTH, + VRAM_PAGES_HIGH = VRAM_HEIGHT / VRAM_PAGE_HEIGHT, + VRAM_PAGE_X_MASK = 0xf, // 16 pages wide + VRAM_PAGE_Y_MASK = 0x10, // 2 pages high + NUM_VRAM_PAGES = VRAM_PAGES_WIDE * VRAM_PAGES_HIGH, }; enum : s32 @@ -61,6 +69,11 @@ enum class GPUTextureMode : u8 IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode); +ALWAYS_INLINE static constexpr bool TextureModeHasPalette(GPUTextureMode mode) +{ + return (mode < GPUTextureMode::Direct16Bit); +} + enum class GPUTransparencyMode : u8 { HalfBackgroundPlusHalfForeground = 0, @@ -169,7 +182,7 @@ static constexpr s32 TruncateGPUVertexPosition(s32 x) union GPUDrawModeReg { static constexpr u16 MASK = 0b1111111111111; - static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111); + static constexpr u16 TEXTURE_MODE_AND_PAGE_MASK = UINT16_C(0b0000000110011111); // Polygon texpage commands only affect bits 0-8, 11 static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111; @@ -177,11 +190,9 @@ union GPUDrawModeReg // Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time. static constexpr u32 GPUSTAT_MASK = 0b11111111111; - static constexpr std::array texture_page_widths = { - {TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}}; - u16 bits; + BitField texture_page; BitField texture_page_x_base; BitField texture_page_y_base; BitField transparency_mode; @@ -197,15 +208,6 @@ union GPUDrawModeReg /// Returns true if the texture mode requires a palette. ALWAYS_INLINE bool IsUsingPalette() const { return (bits & (2 << 7)) == 0; } - - /// Returns a rectangle comprising the texture page area. - ALWAYS_INLINE_RELEASE GSVector4i GetTexturePageRectangle() const - { - const u32 base_x = GetTexturePageBaseX(); - const u32 base_y = GetTexturePageBaseY(); - return GSVector4i(base_x, base_y, base_x + texture_page_widths[static_cast(texture_mode.GetValue())], - base_y + TEXTURE_PAGE_HEIGHT); - } }; union GPUTexturePaletteReg @@ -217,17 +219,8 @@ union GPUTexturePaletteReg BitField x; BitField y; - ALWAYS_INLINE u32 GetXBase() const { return static_cast(x) * 16u; } - ALWAYS_INLINE u32 GetYBase() const { return static_cast(y); } - - /// Returns a rectangle comprising the texture palette area. - ALWAYS_INLINE_RELEASE GSVector4i GetRectangle(GPUTextureMode mode) const - { - static constexpr std::array palette_widths = {{16, 256, 0, 0}}; - const u32 base_x = GetXBase(); - const u32 base_y = GetYBase(); - return GSVector4i(base_x, base_y, base_x + palette_widths[static_cast(mode)], base_y + 1); - } + ALWAYS_INLINE constexpr u32 GetXBase() const { return static_cast(x) * 16u; } + ALWAYS_INLINE constexpr u32 GetYBase() const { return static_cast(y); } }; struct GPUTextureWindow @@ -238,6 +231,119 @@ struct GPUTextureWindow u8 or_y; }; +ALWAYS_INLINE static constexpr u32 VRAMPageIndex(u32 px, u32 py) +{ + return ((py * VRAM_PAGES_WIDE) + px); +} +ALWAYS_INLINE static constexpr GSVector4i VRAMPageRect(u32 px, u32 py) +{ + return GSVector4i::cxpr(px * VRAM_PAGE_WIDTH, py * VRAM_PAGE_HEIGHT, (px + 1) * VRAM_PAGE_WIDTH, + (py + 1) * VRAM_PAGE_HEIGHT); +} +ALWAYS_INLINE static constexpr GSVector4i VRAMPageRect(u32 pn) +{ + // TODO: Put page rects in a LUT instead? + return VRAMPageRect(pn % VRAM_PAGES_WIDE, pn / VRAM_PAGES_WIDE); +} + +ALWAYS_INLINE static constexpr u32 VRAMCoordinateToPage(u32 x, u32 y) +{ + return VRAMPageIndex(x / VRAM_PAGE_WIDTH, y / VRAM_PAGE_HEIGHT); +} + +ALWAYS_INLINE static constexpr u32 VRAMPageStartX(u32 pn) +{ + return (pn % VRAM_PAGES_WIDE) * VRAM_PAGE_WIDTH; +} + +ALWAYS_INLINE static constexpr u32 VRAMPageStartY(u32 pn) +{ + return (pn / VRAM_PAGES_WIDE) * VRAM_PAGE_HEIGHT; +} + +ALWAYS_INLINE static constexpr u8 GetTextureModeShift(GPUTextureMode mode) +{ + return ((mode < GPUTextureMode::Direct16Bit) ? (2 - static_cast(mode)) : 0); +} + +ALWAYS_INLINE static constexpr u32 ApplyTextureModeShift(GPUTextureMode mode, u32 vram_width) +{ + return vram_width << GetTextureModeShift(mode); +} + +ALWAYS_INLINE static GSVector4i ApplyTextureModeShift(GPUTextureMode mode, const GSVector4i rect) +{ + return rect.sll32(GetTextureModeShift(mode)); +} + +ALWAYS_INLINE static constexpr u32 TexturePageCountForMode(GPUTextureMode mode) +{ + return ((mode < GPUTextureMode::Direct16Bit) ? (1 + static_cast(mode)) : 4); +} + +ALWAYS_INLINE static constexpr u32 TexturePageWidthForMode(GPUTextureMode mode) +{ + return TEXTURE_PAGE_WIDTH >> GetTextureModeShift(mode); +} + +ALWAYS_INLINE static constexpr bool TexturePageIsWrapping(GPUTextureMode mode, u32 pn) +{ + return ((VRAMPageStartX(pn) + TexturePageWidthForMode(mode)) > VRAM_WIDTH); +} + +ALWAYS_INLINE static constexpr u32 PalettePageCountForMode(GPUTextureMode mode) +{ + return (mode == GPUTextureMode::Palette4Bit) ? 1 : 4; +} + +ALWAYS_INLINE static constexpr u32 PalettePageNumber(GPUTexturePaletteReg reg) +{ + return VRAMCoordinateToPage(reg.GetXBase(), reg.GetYBase()); +} + +ALWAYS_INLINE static constexpr GSVector4i GetTextureRect(u32 pn, GPUTextureMode mode) +{ + u32 left = VRAMPageStartX(pn); + u32 top = VRAMPageStartY(pn); + u32 right = left + TexturePageWidthForMode(mode); + u32 bottom = top + VRAM_PAGE_HEIGHT; + if (right > VRAM_WIDTH) [[unlikely]] + { + left = 0; + right = VRAM_WIDTH; + } + if (bottom > VRAM_HEIGHT) [[unlikely]] + { + top = 0; + bottom = VRAM_HEIGHT; + } + + return GSVector4i::cxpr(left, top, right, bottom); +} + +/// Returns the maximum index for a paletted texture. +ALWAYS_INLINE static constexpr u32 GetPaletteWidth(GPUTextureMode mode) +{ + return (mode == GPUTextureMode::Palette4Bit ? 16 : ((mode == GPUTextureMode::Palette8Bit) ? 256 : 0)); +} + +/// Returns a rectangle comprising the texture palette area. +ALWAYS_INLINE static constexpr GSVector4i GetPaletteRect(GPUTexturePaletteReg palette, GPUTextureMode mode, + bool clamp_instead_of_wrapping = false) +{ + const u32 width = GetPaletteWidth(mode); + u32 left = palette.GetXBase(); + u32 top = palette.GetYBase(); + u32 right = left + width; + u32 bottom = top + 1; + if (right > VRAM_WIDTH) [[unlikely]] + { + right = VRAM_WIDTH; + left = clamp_instead_of_wrapping ? left : 0; + } + return GSVector4i::cxpr(left, top, right, bottom); +} + // 4x4 dither matrix. static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{-4, +0, -3, +1}, // row 0 {+2, -2, +3, -1}, // row 1 diff --git a/src/core/save_state_version.h b/src/core/save_state_version.h index 1001d0345..c6e6aceae 100644 --- a/src/core/save_state_version.h +++ b/src/core/save_state_version.h @@ -6,7 +6,7 @@ #include "common/types.h" static constexpr u32 SAVE_STATE_MAGIC = 0x43435544; -static constexpr u32 SAVE_STATE_VERSION = 71; +static constexpr u32 SAVE_STATE_VERSION = 72; static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42; static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 1e9151a35..3a63a790b 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -234,6 +234,7 @@ void Settings::Load(SettingsInterface& si, SettingsInterface& controller_si) si.GetStringValue("GPU", "ForceVideoTiming", GetForceVideoTimingName(DEFAULT_FORCE_VIDEO_TIMING_MODE)).c_str()) .value_or(DEFAULT_FORCE_VIDEO_TIMING_MODE); gpu_widescreen_hack = si.GetBoolValue("GPU", "WidescreenHack", false); + gpu_texture_cache = si.GetBoolValue("GPU", "EnableTextureCache", false); display_24bit_chroma_smoothing = si.GetBoolValue("GPU", "ChromaSmoothing24Bit", false); gpu_pgxp_enable = si.GetBoolValue("GPU", "PGXPEnable", false); gpu_pgxp_culling = si.GetBoolValue("GPU", "PGXPCulling", true); @@ -438,16 +439,38 @@ void Settings::Load(SettingsInterface& si, SettingsInterface& controller_si) debugging.show_mdec_state = si.GetBoolValue("Debug", "ShowMDECState"); debugging.show_dma_state = si.GetBoolValue("Debug", "ShowDMAState"); + texture_replacements.enable_texture_replacements = + si.GetBoolValue("TextureReplacements", "EnableTextureReplacements", false); texture_replacements.enable_vram_write_replacements = si.GetBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", false); texture_replacements.preload_textures = si.GetBoolValue("TextureReplacements", "PreloadTextures", false); + texture_replacements.dump_textures = si.GetBoolValue("TextureReplacements", "DumpTextures", false); texture_replacements.dump_vram_writes = si.GetBoolValue("TextureReplacements", "DumpVRAMWrites", false); - texture_replacements.dump_vram_write_force_alpha_channel = + + texture_replacements.config.dump_texture_pages = si.GetBoolValue("TextureReplacements", "DumpTexturePages", false); + texture_replacements.config.dump_full_texture_pages = + si.GetBoolValue("TextureReplacements", "DumpFullTexturePages", false); + texture_replacements.config.dump_texture_force_alpha_channel = + si.GetBoolValue("TextureReplacements", "DumpTextureForceAlphaChannel", false); + texture_replacements.config.dump_vram_write_force_alpha_channel = si.GetBoolValue("TextureReplacements", "DumpVRAMWriteForceAlphaChannel", true); - texture_replacements.dump_vram_write_width_threshold = - si.GetIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", 128); - texture_replacements.dump_vram_write_height_threshold = - si.GetIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", 128); + texture_replacements.config.dump_c16_textures = si.GetBoolValue("TextureReplacements", "DumpC16Textures", false); + texture_replacements.config.reduce_palette_range = si.GetBoolValue("TextureReplacements", "ReducePaletteRange", true); + texture_replacements.config.convert_copies_to_writes = + si.GetBoolValue("TextureReplacements", "ConvertCopiesToWrites", false); + texture_replacements.config.replacement_scale_linear_filter = + si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", true); + + texture_replacements.config.max_vram_write_splits = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", 0u); + + texture_replacements.config.texture_dump_width_threshold = + si.GetUIntValue("TextureReplacements", "DumpTextureWidthThreshold", 16); + texture_replacements.config.texture_dump_height_threshold = + si.GetUIntValue("TextureReplacements", "DumpTextureHeightThreshold", 16); + texture_replacements.config.vram_write_dump_width_threshold = + si.GetUIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", 128); + texture_replacements.config.vram_write_dump_height_threshold = + si.GetUIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", 128); #ifdef __ANDROID__ // Android users are incredibly silly and don't understand that stretch is in the aspect ratio list... @@ -536,6 +559,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetStringValue("GPU", "WireframeMode", GetGPUWireframeModeName(gpu_wireframe_mode)); si.SetStringValue("GPU", "ForceVideoTiming", GetForceVideoTimingName(gpu_force_video_timing)); si.SetBoolValue("GPU", "WidescreenHack", gpu_widescreen_hack); + si.SetBoolValue("GPU", "EnableTextureCache", gpu_texture_cache); si.SetBoolValue("GPU", "ChromaSmoothing24Bit", display_24bit_chroma_smoothing); si.SetBoolValue("GPU", "PGXPEnable", gpu_pgxp_enable); si.SetBoolValue("GPU", "PGXPCulling", gpu_pgxp_culling); @@ -686,16 +710,36 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetBoolValue("Debug", "ShowDMAState", debugging.show_dma_state); } + si.SetBoolValue("TextureReplacements", "EnableTextureReplacements", texture_replacements.enable_texture_replacements); si.SetBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", texture_replacements.enable_vram_write_replacements); si.SetBoolValue("TextureReplacements", "PreloadTextures", texture_replacements.preload_textures); si.SetBoolValue("TextureReplacements", "DumpVRAMWrites", texture_replacements.dump_vram_writes); + si.SetBoolValue("TextureReplacements", "DumpTextures", texture_replacements.dump_textures); + + si.SetBoolValue("TextureReplacements", "DumpTexturePages", texture_replacements.config.dump_texture_pages); + si.SetBoolValue("TextureReplacements", "DumpFullTexturePages", texture_replacements.config.dump_full_texture_pages); + si.SetBoolValue("TextureReplacements", "DumpTextureForceAlphaChannel", + texture_replacements.config.dump_texture_force_alpha_channel); + si.SetBoolValue("TextureReplacements", "DumpVRAMWriteForceAlphaChannel", - texture_replacements.dump_vram_write_force_alpha_channel); - si.SetIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", - texture_replacements.dump_vram_write_width_threshold); - si.SetIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", - texture_replacements.dump_vram_write_height_threshold); + texture_replacements.config.dump_vram_write_force_alpha_channel); + si.SetBoolValue("TextureReplacements", "DumpC16Textures", texture_replacements.config.dump_c16_textures); + si.SetBoolValue("TextureReplacements", "ReducePaletteRange", texture_replacements.config.reduce_palette_range); + si.SetBoolValue("TextureReplacements", "ConvertCopiesToWrites", texture_replacements.config.convert_copies_to_writes); + si.SetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", + texture_replacements.config.replacement_scale_linear_filter); + + si.SetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", texture_replacements.config.max_vram_write_splits); + + si.SetUIntValue("TextureReplacements", "DumpTextureWidthThreshold", + texture_replacements.config.texture_dump_width_threshold); + si.SetUIntValue("TextureReplacements", "DumpTextureHeightThreshold", + texture_replacements.config.texture_dump_height_threshold); + si.SetUIntValue("TextureReplacements", "DumpVRAMWriteWidthThreshold", + texture_replacements.config.vram_write_dump_width_threshold); + si.SetUIntValue("TextureReplacements", "DumpVRAMWriteHeightThreshold", + texture_replacements.config.vram_write_dump_height_threshold); } void Settings::Clear(SettingsInterface& si) @@ -725,6 +769,39 @@ void Settings::Clear(SettingsInterface& si) si.ClearSection("TextureReplacements"); } +bool Settings::TextureReplacementSettings::Configuration::operator==(const Configuration& rhs) const +{ + return (dump_texture_pages == rhs.dump_texture_pages && dump_full_texture_pages == rhs.dump_full_texture_pages && + dump_texture_force_alpha_channel == rhs.dump_texture_force_alpha_channel && + dump_vram_write_force_alpha_channel == rhs.dump_vram_write_force_alpha_channel && + dump_c16_textures == rhs.dump_c16_textures && reduce_palette_range == rhs.reduce_palette_range && + convert_copies_to_writes == rhs.convert_copies_to_writes && + replacement_scale_linear_filter == rhs.replacement_scale_linear_filter && + max_vram_write_splits == rhs.max_vram_write_splits && + texture_dump_width_threshold == rhs.texture_dump_width_threshold && + texture_dump_height_threshold == rhs.texture_dump_height_threshold && + vram_write_dump_width_threshold == rhs.vram_write_dump_width_threshold && + vram_write_dump_height_threshold == rhs.vram_write_dump_height_threshold); +} + +bool Settings::TextureReplacementSettings::Configuration::operator!=(const Configuration& rhs) const +{ + return !operator==(rhs); +} + +bool Settings::TextureReplacementSettings::operator==(const TextureReplacementSettings& rhs) const +{ + return (enable_texture_replacements == rhs.enable_texture_replacements && + enable_vram_write_replacements == rhs.enable_vram_write_replacements && + preload_textures == rhs.preload_textures && dump_textures == rhs.dump_textures && + dump_vram_writes == rhs.dump_vram_writes && config == rhs.config); +} + +bool Settings::TextureReplacementSettings::operator!=(const TextureReplacementSettings& rhs) const +{ + return !operator==(rhs); +} + void Settings::FixIncompatibleSettings(bool display_osd_messages) { if (g_settings.disable_all_enhancements) @@ -2049,7 +2126,6 @@ bool EmuFolders::EnsureFoldersExist() result = FileSystem::EnsureDirectoryExists(Covers.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(Dumps.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(Path::Combine(Dumps, "audio").c_str(), false) && result; - result = FileSystem::EnsureDirectoryExists(Path::Combine(Dumps, "textures").c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(GameIcons.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(GameSettings.c_str(), false) && result; result = FileSystem::EnsureDirectoryExists(InputProfiles.c_str(), false) && result; diff --git a/src/core/settings.h b/src/core/settings.h index e4c2358c9..155a55bcf 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -121,6 +121,7 @@ struct Settings bool gpu_force_round_texcoords : 1 = false; bool gpu_accurate_blending : 1 = false; bool gpu_widescreen_hack : 1 = false; + bool gpu_texture_cache : 1 = false; bool gpu_pgxp_enable : 1 = false; bool gpu_pgxp_culling : 1 = true; bool gpu_pgxp_texture_correction : 1 = true; @@ -243,20 +244,41 @@ struct Settings // texture replacements struct TextureReplacementSettings { + struct Configuration + { + constexpr Configuration() = default; + + bool dump_texture_pages : 1 = false; + bool dump_full_texture_pages : 1 = false; + bool dump_texture_force_alpha_channel : 1 = false; + bool dump_vram_write_force_alpha_channel : 1 = true; + bool dump_c16_textures : 1 = false; + bool reduce_palette_range : 1 = true; + bool convert_copies_to_writes : 1 = false; + bool replacement_scale_linear_filter = true; + + u32 max_vram_write_splits = 0; + u32 texture_dump_width_threshold = 16; + u32 texture_dump_height_threshold = 16; + + u32 vram_write_dump_width_threshold = 128; + u32 vram_write_dump_height_threshold = 128; + + bool operator==(const Configuration& rhs) const; + bool operator!=(const Configuration& rhs) const; + }; + + bool enable_texture_replacements : 1 = false; bool enable_vram_write_replacements : 1 = false; bool preload_textures : 1 = false; + bool dump_textures : 1 = false; bool dump_vram_writes : 1 = false; - bool dump_vram_write_force_alpha_channel : 1 = true; - u32 dump_vram_write_width_threshold = 128; - u32 dump_vram_write_height_threshold = 128; - ALWAYS_INLINE bool AnyReplacementsEnabled() const { return enable_vram_write_replacements; } + Configuration config; - ALWAYS_INLINE bool ShouldDumpVRAMWrite(u32 width, u32 height) - { - return dump_vram_writes && width >= dump_vram_write_width_threshold && height >= dump_vram_write_height_threshold; - } + bool operator==(const TextureReplacementSettings& rhs) const; + bool operator!=(const TextureReplacementSettings& rhs) const; } texture_replacements; bool bios_tty_logging : 1 = false; @@ -348,8 +370,6 @@ struct Settings DEFAULT_DMA_HALT_TICKS = 100, DEFAULT_GPU_FIFO_SIZE = 16, DEFAULT_GPU_MAX_RUN_AHEAD = 128, - DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD = 128, - DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD = 128, }; void Load(SettingsInterface& si, SettingsInterface& controller_si); diff --git a/src/core/system.cpp b/src/core/system.cpp index 8dbf557b1..afa79c805 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -1959,8 +1959,6 @@ void System::DestroySystem() ClearMemorySaveStates(); - TextureReplacements::Shutdown(); - PCDrv::Shutdown(); SIO::Shutdown(); MDEC::Shutdown(); @@ -1974,6 +1972,7 @@ void System::DestroySystem() CPU::Shutdown(); Bus::Shutdown(); TimingEvents::Shutdown(); + TextureReplacements::Shutdown(); ClearRunningGame(); // Restore present-all-frames behavior. @@ -4368,6 +4367,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale || g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode || + g_settings.gpu_texture_cache != old_settings.gpu_texture_cache || g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing || g_settings.display_crop_mode != old_settings.display_crop_mode || @@ -4383,7 +4383,10 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.display_line_start_offset != old_settings.display_line_start_offset || g_settings.display_line_end_offset != old_settings.display_line_end_offset || g_settings.rewind_enable != old_settings.rewind_enable || - g_settings.runahead_frames != old_settings.runahead_frames) + g_settings.runahead_frames != old_settings.runahead_frames || + g_settings.texture_replacements.dump_textures != old_settings.texture_replacements.dump_textures || + g_settings.texture_replacements.enable_texture_replacements != + old_settings.texture_replacements.enable_texture_replacements) { g_gpu->UpdateSettings(old_settings); if (IsPaused()) @@ -4436,10 +4439,14 @@ void System::CheckForSettingsChanges(const Settings& old_settings) if (g_settings.texture_replacements.enable_vram_write_replacements != old_settings.texture_replacements.enable_vram_write_replacements || + g_settings.texture_replacements.enable_texture_replacements != + old_settings.texture_replacements.enable_texture_replacements || g_settings.texture_replacements.preload_textures != old_settings.texture_replacements.preload_textures) { TextureReplacements::Reload(); } + if (g_settings.texture_replacements.config != old_settings.texture_replacements.config) + TextureReplacements::UpdateConfiguration(); if (g_settings.audio_backend != old_settings.audio_backend || g_settings.increase_timer_resolution != old_settings.increase_timer_resolution || @@ -4564,53 +4571,66 @@ void System::WarnAboutUnsafeSettings() LargeString messages; auto append = [&messages](const char* icon, std::string_view msg) { messages.append_format("{} {}\n", icon, msg); }; - if (!g_settings.disable_all_enhancements && ImGuiManager::IsShowingOSDMessages()) + if (!g_settings.disable_all_enhancements) { - if (g_settings.cpu_overclock_active) + if (ImGuiManager::IsShowingOSDMessages()) { - append(ICON_EMOJI_WARNING, - SmallString::from_format( - TRANSLATE_FS("System", "CPU clock speed is set to {}% ({} / {}). This may crash games."), - g_settings.GetCPUOverclockPercent(), g_settings.cpu_overclock_numerator, - g_settings.cpu_overclock_denominator)); - } - if (g_settings.cdrom_read_speedup > 1) - { - append(ICON_EMOJI_WARNING, - SmallString::from_format( - TRANSLATE_FS("System", "CD-ROM read speedup set to {}x (effective speed {}x). This may crash games."), - g_settings.cdrom_read_speedup, g_settings.cdrom_read_speedup * 2)); - } - if (g_settings.cdrom_seek_speedup != 1) - { - append(ICON_EMOJI_WARNING, - SmallString::from_format(TRANSLATE_FS("System", "CD-ROM seek speedup set to {}. This may crash games."), - (g_settings.cdrom_seek_speedup == 0) ? - TinyString(TRANSLATE_SV("System", "Instant")) : - TinyString::from_format("{}x", g_settings.cdrom_seek_speedup))); - } - if (g_settings.gpu_force_video_timing != ForceVideoTimingMode::Disabled) - { - append(ICON_FA_TV, TRANSLATE_SV("System", "Force frame timings is enabled. Games may run at incorrect speeds.")); - } - if (!g_settings.IsUsingSoftwareRenderer()) - { - if (g_settings.gpu_multisamples != 1) + if (g_settings.cpu_overclock_active) { append(ICON_EMOJI_WARNING, - TRANSLATE_SV("System", "Multisample anti-aliasing is enabled, some games may not render correctly.")); + SmallString::from_format( + TRANSLATE_FS("System", "CPU clock speed is set to {}% ({} / {}). This may crash games."), + g_settings.GetCPUOverclockPercent(), g_settings.cpu_overclock_numerator, + g_settings.cpu_overclock_denominator)); } - if (g_settings.gpu_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) + if (g_settings.cdrom_read_speedup > 1) { - append( - ICON_EMOJI_WARNING, - TRANSLATE_SV("System", "Round upscaled texture coordinates is enabled. This may cause rendering errors.")); + append(ICON_EMOJI_WARNING, + SmallString::from_format( + TRANSLATE_FS("System", "CD-ROM read speedup set to {}x (effective speed {}x). This may crash games."), + g_settings.cdrom_read_speedup, g_settings.cdrom_read_speedup * 2)); + } + if (g_settings.cdrom_seek_speedup != 1) + { + append(ICON_EMOJI_WARNING, + SmallString::from_format(TRANSLATE_FS("System", "CD-ROM seek speedup set to {}. This may crash games."), + (g_settings.cdrom_seek_speedup == 0) ? + TinyString(TRANSLATE_SV("System", "Instant")) : + TinyString::from_format("{}x", g_settings.cdrom_seek_speedup))); + } + if (g_settings.gpu_force_video_timing != ForceVideoTimingMode::Disabled) + { + append(ICON_FA_TV, + TRANSLATE_SV("System", "Force frame timings is enabled. Games may run at incorrect speeds.")); + } + if (!g_settings.IsUsingSoftwareRenderer()) + { + if (g_settings.gpu_multisamples != 1) + { + append(ICON_EMOJI_WARNING, + TRANSLATE_SV("System", "Multisample anti-aliasing is enabled, some games may not render correctly.")); + } + if (g_settings.gpu_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) + { + append( + ICON_EMOJI_WARNING, + TRANSLATE_SV("System", "Round upscaled texture coordinates is enabled. This may cause rendering errors.")); + } + } + if (g_settings.enable_8mb_ram) + { + append(ICON_EMOJI_WARNING, + TRANSLATE_SV("System", "8MB RAM is enabled, this may be incompatible with some games.")); } } - if (g_settings.enable_8mb_ram) + + // Always display TC warning. + if (g_settings.gpu_texture_cache) { - append(ICON_EMOJI_WARNING, - TRANSLATE_SV("System", "8MB RAM is enabled, this may be incompatible with some games.")); + append( + ICON_FA_PAINT_ROLLER, + TRANSLATE_SV("System", + "Texture cache is enabled. This feature is experimental, some games may not render correctly.")); } } diff --git a/src/core/texture_replacements.cpp b/src/core/texture_replacements.cpp index 03f6dc566..f9dc3ca9a 100644 --- a/src/core/texture_replacements.cpp +++ b/src/core/texture_replacements.cpp @@ -2,12 +2,19 @@ // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "texture_replacements.h" +#include "gpu.h" +#include "gpu_hw_texture_cache.h" #include "gpu_types.h" #include "host.h" #include "settings.h" +#include "util/image.h" +#include "util/ini_settings_interface.h" + #include "common/bitutils.h" +#include "common/error.h" #include "common/file_system.h" +#include "common/gsvector_formatter.h" #include "common/hash_combine.h" #include "common/log.h" #include "common/path.h" @@ -15,84 +22,186 @@ #include "common/timer.h" #include "fmt/format.h" +#include "ryml.hpp" + +#define XXH_STATIC_LINKING_ONLY #include "xxhash.h" -#if defined(CPU_ARCH_X86) || defined(CPU_ARCH_X64) +#ifdef CPU_ARCH_SSE #include "xxh_x86dispatch.h" #endif #include +#include +#include #include +#include #include +#include #include Log_SetChannel(TextureReplacements); +#include "common/ryml_helpers.h" + namespace TextureReplacements { namespace { -struct VRAMReplacementHash +struct VRAMReplacementName { u64 low; u64 high; TinyString ToString() const; - bool ParseString(std::string_view sv); + bool Parse(const std::string_view file_title); - bool operator<(const VRAMReplacementHash& rhs) const { return std::tie(low, high) < std::tie(rhs.low, rhs.high); } - bool operator==(const VRAMReplacementHash& rhs) const { return low == rhs.low && high == rhs.high; } - bool operator!=(const VRAMReplacementHash& rhs) const { return low != rhs.low || high != rhs.high; } + bool operator<(const VRAMReplacementName& rhs) const { return std::tie(low, high) < std::tie(rhs.low, rhs.high); } + bool operator==(const VRAMReplacementName& rhs) const { return low == rhs.low && high == rhs.high; } + bool operator!=(const VRAMReplacementName& rhs) const { return low != rhs.low || high != rhs.high; } }; -struct VRAMReplacementHashMapHash +struct VRAMReplacementNameHash { - size_t operator()(const VRAMReplacementHash& hash) const; + size_t operator()(const VRAMReplacementName& hash) const; +}; + +struct TextureReplacementIndex +{ + u64 src_hash; + GPUTextureMode mode; + + bool operator<(const TextureReplacementIndex& rhs) const + { + return std::tie(src_hash, mode) < std::tie(src_hash, mode); + } + bool operator==(const TextureReplacementIndex& rhs) const { return src_hash == rhs.src_hash && mode == rhs.mode; } + bool operator!=(const TextureReplacementIndex& rhs) const { return src_hash != rhs.src_hash || mode != rhs.mode; } +}; + +struct TextureReplacementIndexHash +{ + size_t operator()(const TextureReplacementIndex& hash) const; +}; + +struct TextureReplacementName +{ + u64 src_hash; + u64 pal_hash; + u16 src_width; + u16 src_height; + ReplacementType type; + u8 texture_mode; + u16 offset_x; + u16 offset_y; + u16 width; + u16 height; + u8 pal_min; + u8 pal_max; + + TinyString ToString() const; + bool Parse(const std::string_view file_title); + TextureReplacementIndex GetIndex() const; + GPUTextureMode GetTextureMode() const; + bool IsSemitransparent() const; + + bool operator<(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) < 0); } + bool operator==(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) == 0); } + bool operator!=(const TextureReplacementName& rhs) const { return (std::memcmp(this, &rhs, sizeof(*this)) != 0); } + + ALWAYS_INLINE GSVector2i GetSizeVec() const { return GSVector2i(width, height); } + ALWAYS_INLINE GSVector2i GetOffsetVec() const { return GSVector2i(offset_x, offset_y); } + ALWAYS_INLINE GSVector4i GetDestRect() const + { + return GSVector4i(GSVector4i(GetOffsetVec()).xyxy().add32(GSVector4i(GetSizeVec()).zwxy())); + } +}; + +struct DumpedTextureKey +{ + TextureSourceHash tex_hash; + TexturePaletteHash pal_hash; + u16 offset_x, offset_y; + u16 width, height; + ReplacementType type; + u8 texture_mode; + u8 pad[6]; + + ALWAYS_INLINE bool operator==(const DumpedTextureKey& k) const + { + return (std::memcmp(&k, this, sizeof(DumpedTextureKey)) == 0); + } + ALWAYS_INLINE bool operator!=(const DumpedTextureKey& k) const + { + return (std::memcmp(&k, this, sizeof(DumpedTextureKey)) != 0); + } +}; +struct DumpedTextureKeyHash +{ + size_t operator()(const DumpedTextureKey& k) const; }; } // namespace -using VRAMWriteReplacementMap = std::unordered_map; using TextureCache = std::unordered_map; -static bool ParseReplacementFilename(const std::string& filename, VRAMReplacementHash* replacement_hash, - ReplacmentType* replacement_type); +using VRAMReplacementMap = std::unordered_map; +using TextureReplacementMap = + std::unordered_multimap, + TextureReplacementIndexHash>; +static std::optional GetReplacementTypeFromFileTitle(const std::string_view file_title); +static bool HasValidReplacementExtension(const std::string_view path); + +static bool EnsureGameDirectoryExists(); static std::string GetSourceDirectory(); static std::string GetDumpDirectory(); -static VRAMReplacementHash GetVRAMWriteHash(u32 width, u32 height, const void* pixels); -static std::string GetVRAMWriteDumpFilename(u32 width, u32 height, const void* pixels); +static VRAMReplacementName GetVRAMWriteHash(u32 width, u32 height, const void* pixels); +static std::string GetVRAMWriteDumpFilename(const VRAMReplacementName& name); -static void FindTextures(const std::string& dir); +static bool IsMatchingReplacementPalette(TexturePaletteHash full_palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette, const TextureReplacementName& name); +static bool LoadLocalConfiguration(bool load_vram_write_replacement_aliases, bool load_texture_replacement_aliases); +static std::string GetDefaultConfigurationString(); + +static void FindTextures(bool load_vram_write_replacements, bool load_texture_replacements); +static void LoadTextureAliases(const ryml::ConstNodeRef& root, bool load_vram_write_replacement_aliases, + bool load_texture_replacement_aliases); static const ReplacementImage* LoadTexture(const std::string& filename); static void PreloadTextures(); static void PurgeUnreferencedTexturesFromCache(); static std::string s_game_id; +static Settings::TextureReplacementSettings::Configuration s_config; // TODO: Check the size, purge some when it gets too large. static TextureCache s_texture_cache; -static VRAMWriteReplacementMap s_vram_write_replacements; +static VRAMReplacementMap s_vram_replacements; + +// TODO: Combine these into one map? +static TextureReplacementMap s_vram_write_texture_replacements; +static TextureReplacementMap s_texture_page_texture_replacements; + +static std::unordered_set s_dumped_vram_writes; +static std::unordered_set s_dumped_textures; } // namespace TextureReplacements -size_t TextureReplacements::VRAMReplacementHashMapHash::operator()(const VRAMReplacementHash& hash) const -{ - size_t hash_hash = std::hash{}(hash.low); - hash_combine(hash_hash, hash.high); - return hash_hash; -} +static_assert(std::is_same_v); +static_assert(std::is_same_v); -TinyString TextureReplacements::VRAMReplacementHash::ToString() const +static constexpr const char LOCAL_CONFIG_FILENAME[] = "config.yaml"; + +TinyString TextureReplacements::VRAMReplacementName::ToString() const { return TinyString::from_format("{:08X}{:08X}", high, low); } -bool TextureReplacements::VRAMReplacementHash::ParseString(std::string_view sv) +bool TextureReplacements::VRAMReplacementName::Parse(const std::string_view file_title) { - if (sv.length() != 32) + if (file_title.length() != 43) return false; - std::optional high_value = StringUtil::FromChars(sv.substr(0, 16), 16); - std::optional low_value = StringUtil::FromChars(sv.substr(16), 16); + const std::optional high_value = StringUtil::FromChars(file_title.substr(11, 16), 16); + const std::optional low_value = StringUtil::FromChars(file_title.substr(11 + 16), 16); if (!high_value.has_value() || !low_value.has_value()) return false; @@ -101,6 +210,286 @@ bool TextureReplacements::VRAMReplacementHash::ParseString(std::string_view sv) return true; } +size_t TextureReplacements::VRAMReplacementNameHash::operator()(const VRAMReplacementName& name) const +{ + size_t seed = std::hash{}(name.low); + hash_combine(seed, name.high); + return seed; +} + +static constexpr const char* s_texture_replacement_mode_names[] = {"P4", "P8", "C16", "C16", + "STP4", "STP8", "STC16", "STC16"}; + +TinyString TextureReplacements::TextureReplacementName::ToString() const +{ + const char* type_str = (type == ReplacementType::TextureFromVRAMWrite) ? "texupload" : "texpage"; + const char* mode_str = s_texture_replacement_mode_names[texture_mode]; + if (GetTextureMode() < GPUTextureMode::Direct16Bit) + { + return TinyString::from_format("{}-{}-{:016X}-{:016X}-{}x{}-{}-{}-{}x{}-P{}-{}", type_str, mode_str, src_hash, + pal_hash, src_width, src_height, offset_x, offset_y, width, height, pal_min, + pal_max); + } + else + { + return TinyString::from_format("{}-{}-{:016X}-{}x{}-{}-{}-{}x{}", type_str, mode_str, src_hash, src_width, + src_height, offset_x, offset_y, width, height); + } +} + +bool TextureReplacements::TextureReplacementName::Parse(const std::string_view file_title) +{ + // TODO: Swap to https://github.com/eliaskosunen/scnlib + + std::string_view::size_type start_pos = 0; + std::string_view::size_type end_pos = file_title.find("-", start_pos); + if (end_pos == std::string_view::npos) + return false; + + // type + std::string_view token = file_title.substr(start_pos, end_pos); + if (token == "texupload") + type = ReplacementType::TextureFromVRAMWrite; + else if (token == "texpage") + type = ReplacementType::TextureFromPage; + else + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // mode + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional mode_opt; + for (size_t i = 0; i < std::size(s_texture_replacement_mode_names); i++) + { + if (token == s_texture_replacement_mode_names[i]) + { + mode_opt = static_cast(i); + break; + } + } + if (!mode_opt.has_value()) + return false; + texture_mode = mode_opt.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_hash + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional val64; + if (token.size() != 16 || !(val64 = StringUtil::FromChars(token, 16)).has_value()) + return false; + src_hash = val64.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + if (GetTextureMode() < GPUTextureMode::Direct16Bit) + { + // pal_hash + token = file_title.substr(start_pos, end_pos - start_pos); + if (token.size() != 16 || !(val64 = StringUtil::FromChars(token, 16)).has_value()) + return false; + pal_hash = val64.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("x", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_width + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional val16; + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_width = val16.value(); + if (src_width == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_height + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_height = val16.value(); + if (src_height == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_x + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_x = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_y + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_y = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("x", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // width + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + width = val16.value(); + if (width == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // height + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + height = val16.value(); + if (height == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos || file_title[start_pos] != 'P') + return false; + + // pal_min + token = file_title.substr(start_pos + 1, end_pos - start_pos - 1); + std::optional val8; + if (!(val8 = StringUtil::FromChars(token)).has_value()) + return false; + pal_min = val8.value(); + start_pos = end_pos + 1; + + // pal_max + token = file_title.substr(start_pos); + if (!(val8 = StringUtil::FromChars(token)).has_value()) + return false; + pal_max = val8.value(); + if (pal_min > pal_max) + return false; + } + else + { + // src_width + token = file_title.substr(start_pos, end_pos - start_pos); + std::optional val16; + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_width = val16.value(); + if (src_width == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // src_height + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + src_height = val16.value(); + if (src_height == 0) + return false; + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_x + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_x = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("-", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // offset_y + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + offset_y = val16.value(); + start_pos = end_pos + 1; + end_pos = file_title.find("x", start_pos + 1); + if (end_pos == std::string_view::npos) + return false; + + // width + token = file_title.substr(start_pos, end_pos - start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + width = val16.value(); + if (width == 0) + return false; + start_pos = end_pos + 1; + + // height + token = file_title.substr(start_pos); + if (!(val16 = StringUtil::FromChars(token)).has_value()) + return false; + height = val16.value(); + if (height == 0) + return false; + } + + return true; +} + +TextureReplacements::TextureReplacementIndex TextureReplacements::TextureReplacementName::GetIndex() const +{ + return {src_hash, GetTextureMode()}; +} + +GPUTextureMode TextureReplacements::TextureReplacementName::GetTextureMode() const +{ + return static_cast(texture_mode & 3u); +} + +bool TextureReplacements::TextureReplacementName::IsSemitransparent() const +{ + return (texture_mode >= 4); +} + +size_t TextureReplacements::TextureReplacementIndexHash::operator()(const TextureReplacementIndex& name) const +{ + // TODO: This sucks ass, do better. + size_t seed = std::hash{}(name.src_hash); + hash_combine(seed, static_cast(name.mode)); + return seed; +} + +size_t TextureReplacements::DumpedTextureKeyHash::operator()(const DumpedTextureKey& k) const +{ + // TODO: This is slow + std::size_t hash = 0; + hash_combine(hash, k.tex_hash, k.pal_hash, k.width, k.height, k.texture_mode); + return hash; +} + +const Settings::TextureReplacementSettings::Configuration& TextureReplacements::GetConfig() +{ + return s_config; +} + void TextureReplacements::SetGameID(std::string game_id) { if (s_game_id == game_id) @@ -110,22 +499,44 @@ void TextureReplacements::SetGameID(std::string game_id) Reload(); } +void TextureReplacements::Shutdown() +{ + s_texture_cache.clear(); + s_vram_replacements.clear(); + s_vram_write_texture_replacements.clear(); + s_texture_page_texture_replacements.clear(); + s_dumped_textures.clear(); + s_game_id = {}; +} + const TextureReplacements::ReplacementImage* TextureReplacements::GetVRAMReplacement(u32 width, u32 height, const void* pixels) { - const VRAMReplacementHash hash = GetVRAMWriteHash(width, height, pixels); + const VRAMReplacementName hash = GetVRAMWriteHash(width, height, pixels); - const auto it = s_vram_write_replacements.find(hash); - if (it == s_vram_write_replacements.end()) + const auto it = s_vram_replacements.find(hash); + if (it == s_vram_replacements.end()) return nullptr; return LoadTexture(it->second); } +bool TextureReplacements::ShouldDumpVRAMWrite(u32 width, u32 height) +{ + return (g_settings.texture_replacements.dump_vram_writes && width >= s_config.vram_write_dump_width_threshold && + height >= s_config.vram_write_dump_height_threshold); +} + void TextureReplacements::DumpVRAMWrite(u32 width, u32 height, const void* pixels) { - const std::string filename = GetVRAMWriteDumpFilename(width, height, pixels); - if (filename.empty()) + const VRAMReplacementName name = GetVRAMWriteHash(width, height, pixels); + if (s_dumped_vram_writes.find(name) != s_dumped_vram_writes.end()) + return; + + s_dumped_vram_writes.insert(name); + + const std::string filename = GetVRAMWriteDumpFilename(name); + if (filename.empty() || FileSystem::FileExists(filename.c_str())) return; RGBA8Image image; @@ -142,7 +553,7 @@ void TextureReplacements::DumpVRAMWrite(u32 width, u32 height, const void* pixel } } - if (g_settings.texture_replacements.dump_vram_write_force_alpha_channel) + if (s_config.dump_vram_write_force_alpha_channel) { for (u32 y = 0; y < height; y++) { @@ -156,137 +567,433 @@ void TextureReplacements::DumpVRAMWrite(u32 width, u32 height, const void* pixel ERROR_LOG("Failed to dump {}x{} VRAM write to '{}'", width, height, filename); } -void TextureReplacements::Shutdown() +void TextureReplacements::DumpTexture(ReplacementType type, u32 offset_x, u32 offset_y, u32 src_width, u32 src_height, + GPUTextureMode mode, TextureSourceHash src_hash, TexturePaletteHash pal_hash, + u32 pal_min, u32 pal_max, const u16* palette_data, const GSVector4i rect, + GPUTextureCache::PaletteRecordFlags flags) { - s_texture_cache.clear(); - s_vram_write_replacements.clear(); - s_game_id.clear(); -} + const u32 width = ApplyTextureModeShift(mode, rect.width()); + const u32 height = rect.height(); -// TODO: Organize into PCSX2-style. -std::string TextureReplacements::GetSourceDirectory() -{ - return Path::Combine(EmuFolders::Textures, s_game_id); -} + if (width < s_config.texture_dump_width_threshold || height < s_config.texture_dump_height_threshold) + return; -std::string TextureReplacements::GetDumpDirectory() -{ - return Path::Combine(EmuFolders::Dumps, Path::Combine("textures", s_game_id)); -} + const bool semitransparent = ((flags & GPUTextureCache::PaletteRecordFlags::HasSemiTransparentDraws) != + GPUTextureCache::PaletteRecordFlags::None && + !s_config.dump_texture_force_alpha_channel); + const u8 dumped_texture_mode = static_cast(mode) | (semitransparent ? 4 : 0); -TextureReplacements::VRAMReplacementHash TextureReplacements::GetVRAMWriteHash(u32 width, u32 height, - const void* pixels) -{ - XXH128_hash_t hash = XXH3_128bits(pixels, width * height * sizeof(u16)); - return {hash.low64, hash.high64}; -} + const DumpedTextureKey key = {src_hash, + pal_hash, + Truncate16(offset_x), + Truncate16(offset_y), + Truncate16(width), + Truncate16(height), + type, + dumped_texture_mode, + {}}; + if (s_dumped_textures.find(key) != s_dumped_textures.end()) + return; -std::string TextureReplacements::GetVRAMWriteDumpFilename(u32 width, u32 height, const void* pixels) -{ - if (s_game_id.empty()) - return {}; - - const VRAMReplacementHash hash = GetVRAMWriteHash(width, height, pixels); - const std::string dump_directory(GetDumpDirectory()); - std::string filename(Path::Combine(dump_directory, fmt::format("vram-write-{}.png", hash.ToString()))); - - if (FileSystem::FileExists(filename.c_str())) - return {}; + if (!EnsureGameDirectoryExists()) + return; + const std::string dump_directory = GetDumpDirectory(); if (!FileSystem::EnsureDirectoryExists(dump_directory.c_str(), false)) - return {}; + return; - return filename; -} + s_dumped_textures.insert(key); -void TextureReplacements::Reload() -{ - s_vram_write_replacements.clear(); + const TextureReplacementName name = { + .src_hash = src_hash, + .pal_hash = pal_hash, + .src_width = Truncate16(src_width), + .src_height = Truncate16(src_height), + .type = type, + .texture_mode = dumped_texture_mode, + .offset_x = Truncate16(offset_x), + .offset_y = Truncate16(offset_y), + .width = Truncate16(width), + .height = Truncate16(height), + .pal_min = Truncate8(pal_min), + .pal_max = Truncate8(pal_max), + }; - if (g_settings.texture_replacements.AnyReplacementsEnabled()) - FindTextures(GetSourceDirectory()); + SmallString filename = name.ToString(); + filename.append(".png"); - if (g_settings.texture_replacements.preload_textures) - PreloadTextures(); + const std::string path = Path::Combine(dump_directory, filename); + if (FileSystem::FileExists(path.c_str())) + return; - PurgeUnreferencedTexturesFromCache(); -} + DEV_LOG("Dumping VRAM write {:016X} [{}x{}] at {}", src_hash, width, height, rect); -void TextureReplacements::PurgeUnreferencedTexturesFromCache() -{ - TextureCache old_map = std::move(s_texture_cache); - s_texture_cache = {}; + RGBA8Image image(width, height); + GPUTextureCache::DecodeTexture(mode, &g_vram[rect.top * VRAM_WIDTH + rect.left], palette_data, image.GetPixels(), + image.GetPitch(), width, height); - for (const auto& it : s_vram_write_replacements) + u32* image_pixels = image.GetPixels(); + const u32* image_pixels_end = image.GetPixels() + (width * height); + if (s_config.dump_texture_force_alpha_channel) { - auto it2 = old_map.find(it.second); - if (it2 != old_map.end()) + for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++) + *pixel |= 0xFF000000u; + } + else + { + if (semitransparent) { - s_texture_cache[it.second] = std::move(it2->second); - old_map.erase(it2); + // Alpha channel should be inverted, because 0 means opaque, 1 is semitransparent. + // Pixel value of 0000 is still completely transparent. + for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++) + { + const u32 val = *pixel; + *pixel = (val == 0u) ? 0u : ((val & 0xFFFFFFFu) | ((val & 0x80000000u) ? 0x80000000u : 0xFF000000u)); + } } + else + { + // Only cut out 0000 pixels. + for (u32* pixel = image_pixels; pixel != image_pixels_end; pixel++) + { + const u32 val = *pixel; + *pixel = (val == 0u) ? 0u : (val | 0xFF000000u); + } + } + } + + if (!image.SaveToFile(path.c_str())) + ERROR_LOG("Failed to write texture dump to {}.", Path::GetFileName(path)); +} + +bool TextureReplacements::IsMatchingReplacementPalette(TexturePaletteHash full_palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette, const TextureReplacementName& name) +{ + if (!TextureModeHasPalette(mode)) + return true; + + const u32 full_pal_max = GetPaletteWidth(mode) - 1; + if (name.pal_min == 0 && name.pal_max == full_pal_max) + return (name.pal_hash == full_palette_hash); + + // If the range goes off the edge of VRAM, it's not a match. + if ((palette.GetXBase() + name.pal_max) >= VRAM_WIDTH) + return false; + + // This needs to re-hash every lookup, which is a bit of a bummer. + // But at least there's the hash cache, so it shouldn't be too painful... + const TexturePaletteHash partial_hash = + GPUTextureCache::HashPartialPalette(palette, mode, name.pal_min, name.pal_max); + return (partial_hash == name.pal_hash); +} + +bool TextureReplacements::HasVRAMWriteTextureReplacements() +{ + return !s_vram_write_texture_replacements.empty(); +} + +void TextureReplacements::GetVRAMWriteTextureReplacements(std::vector& replacements, + TextureSourceHash vram_write_hash, + TextureSourceHash palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette, + const GSVector2i& offset_to_page) +{ + const TextureReplacementIndex index = {vram_write_hash, mode}; + const auto& [begin, end] = s_vram_write_texture_replacements.equal_range(index); + if (begin == end) + return; + + const GSVector4i offset_to_page_v = GSVector4i(offset_to_page).xyxy(); + + for (auto it = begin; it != end; ++it) + { + if (!IsMatchingReplacementPalette(palette_hash, mode, palette, it->second.first)) + continue; + + const ReplacementImage* image = LoadTexture(it->second.second); + if (!image) + continue; + + const TextureReplacementName& name = it->second.first; + const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); + const GSVector4i rect_in_write_space = name.GetDestRect(); + const GSVector4i rect_in_page_space = rect_in_write_space.sub32(offset_to_page_v); + + // zw <= 0 or zw >= TEXTURE_PAGE_SIZE + if (!(rect_in_page_space.le32( + GSVector4i::cxpr(std::numeric_limits::min(), std::numeric_limits::min(), 0, 0)) | + rect_in_page_space.ge32(GSVector4i::cxpr(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, + std::numeric_limits::max(), std::numeric_limits::max()))) + .allfalse()) + { + // Rect is out of bounds. + continue; + } + + // TODO: This fails in Wild Arms 2, writes that are wider than a page. + DebugAssert(rect_in_page_space.width() == name.width && rect_in_page_space.height() == name.height); + DebugAssert(rect_in_page_space.width() <= static_cast(TEXTURE_PAGE_WIDTH)); + DebugAssert(rect_in_page_space.height() <= static_cast(TEXTURE_PAGE_HEIGHT)); + + replacements.push_back( + ReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, name.IsSemitransparent()}); } } -bool TextureReplacements::ParseReplacementFilename(const std::string& filename, VRAMReplacementHash* replacement_hash, - ReplacmentType* replacement_type) +bool TextureReplacements::HasTexturePageTextureReplacements() { - const std::string_view file_title = Path::GetFileTitle(filename); - if (!file_title.starts_with("vram-write-")) - return false; + return !s_texture_page_texture_replacements.empty(); +} - const std::string_view hashpart = file_title.substr(11); - if (!replacement_hash->ParseString(hashpart)) - return false; +void TextureReplacements::GetTexturePageTextureReplacements(std::vector& replacements, + u32 start_page_number, TextureSourceHash page_hash, + TextureSourceHash palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette) +{ + // This is truely awful. Because we can dump a sub-page worth of texture, we need to examine the entire replacement + // list, because any of them could match up... - const std::string_view file_extension = Path::GetExtension(filename); - bool valid_extension = false; + const u8 shift = GetTextureModeShift(mode); + const GSVector4i page_start_in_vram = + GSVector4i(VRAMPageStartX(start_page_number), VRAMPageStartY(start_page_number)).xyxy(); + + for (TextureReplacementMap::const_iterator it = s_texture_page_texture_replacements.begin(); + it != s_texture_page_texture_replacements.end(); ++it) + { + if (it->first.mode != mode) + continue; + + // Early-out if the palette mismatches, at least that'll save some cycles... + if (!IsMatchingReplacementPalette(palette_hash, mode, palette, it->second.first)) + continue; + + const TextureReplacementName& name = it->second.first; + GSVector4i rect_in_page_space; + if (name.width == TEXTURE_PAGE_WIDTH && name.height == TEXTURE_PAGE_HEIGHT) + { + // This replacement is an entire page, so we can simply check the already-computed page hash. + DebugAssert(name.offset_x == 0 && name.offset_y == 0); + if (it->first.src_hash != page_hash) + continue; + + rect_in_page_space = GSVector4i::cxpr(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT); + } + else + { + // Unlike write replacements, the + // Replacement is part of a page, need to re-hash. + rect_in_page_space = name.GetDestRect(); + const GSVector4i hash_rect = + rect_in_page_space.blend32<0x5>(rect_in_page_space.srl32(shift)).add32(page_start_in_vram); + const GPUTextureCache::HashType hash = GPUTextureCache::HashRect(hash_rect); + if (it->first.src_hash != hash) + continue; + } + + const ReplacementImage* image = LoadTexture(it->second.second); + if (!image) + continue; + + const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); + replacements.push_back( + ReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, name.IsSemitransparent()}); + } +} + +std::optional +TextureReplacements::GetReplacementTypeFromFileTitle(const std::string_view path) +{ + if (path.starts_with("vram-write-")) + return ReplacementType::VRAMReplacement; + + if (path.starts_with("texupload-")) + return ReplacementType::TextureFromVRAMWrite; + + if (path.starts_with("texpage-")) + return ReplacementType::TextureFromPage; + + return std::nullopt; +} + +bool TextureReplacements::HasValidReplacementExtension(const std::string_view path) +{ + const std::string_view extension = Path::GetExtension(path); for (const char* test_extension : {"png", "jpg", "webp"}) { - if (StringUtil::EqualNoCase(file_extension, test_extension)) - { - valid_extension = true; - break; - } + if (StringUtil::EqualNoCase(extension, test_extension)) + return true; } - *replacement_type = ReplacmentType::VRAMWrite; - return valid_extension; + return false; } -void TextureReplacements::FindTextures(const std::string& dir) +void TextureReplacements::FindTextures(bool load_vram_write_replacements, bool load_texture_replacements) { FileSystem::FindResultsArray files; - FileSystem::FindFiles(dir.c_str(), "*", FILESYSTEM_FIND_FILES | FILESYSTEM_FIND_RECURSIVE, &files); + FileSystem::FindFiles(GetSourceDirectory().c_str(), "*", FILESYSTEM_FIND_FILES | FILESYSTEM_FIND_RECURSIVE, &files); for (FILESYSTEM_FIND_DATA& fd : files) { - if (fd.Attributes & FILESYSTEM_FILE_ATTRIBUTE_DIRECTORY) + if ((fd.Attributes & FILESYSTEM_FILE_ATTRIBUTE_DIRECTORY) || !HasValidReplacementExtension(fd.FileName)) continue; - VRAMReplacementHash hash; - ReplacmentType type; - if (!ParseReplacementFilename(fd.FileName, &hash, &type)) + const std::string_view file_title = Path::GetFileTitle(fd.FileName); + const std::optional type = GetReplacementTypeFromFileTitle(file_title); + if (!type.has_value()) continue; - switch (type) + switch (type.value()) { - case ReplacmentType::VRAMWrite: + case ReplacementType::VRAMReplacement: { - auto it = s_vram_write_replacements.find(hash); - if (it != s_vram_write_replacements.end()) + VRAMReplacementName name; + if (!load_vram_write_replacements || !name.Parse(file_title)) + continue; + + if (const auto it = s_vram_replacements.find(name); it != s_vram_replacements.end()) { - WARNING_LOG("Duplicate VRAM write replacement: '{}' and '{}'", it->second, fd.FileName); + WARNING_LOG("Duplicate VRAM replacement: '{}' and '{}'", Path::GetFileName(it->second), + Path::GetFileName(fd.FileName)); continue; } - s_vram_write_replacements.emplace(hash, std::move(fd.FileName)); + s_vram_replacements.emplace(name, std::move(fd.FileName)); } break; + + case ReplacementType::TextureFromVRAMWrite: + case ReplacementType::TextureFromPage: + { + TextureReplacementName name; + if (!load_texture_replacements || !name.Parse(file_title)) + continue; + + DebugAssert(name.type == type.value()); + + const TextureReplacementIndex index = name.GetIndex(); + TextureReplacementMap& dest_map = (type.value() == ReplacementType::TextureFromVRAMWrite) ? + s_vram_write_texture_replacements : + s_texture_page_texture_replacements; + + // Multiple replacements in the same write are fine. But they should have different rects. + const auto range = dest_map.equal_range(index); + bool duplicate = false; + for (auto it = range.first; it != range.second; ++it) + { + if (it->second.first == name) [[unlikely]] + { + WARNING_LOG("Duplicate texture replacement: '{}' and '{}'", Path::GetFileName(it->second.second), + Path::GetFileName(fd.FileName)); + duplicate = true; + } + } + if (duplicate) [[unlikely]] + continue; + + dest_map.emplace(index, std::make_pair(name, std::move(fd.FileName))); + } + break; + + DefaultCaseIsUnreachable() } } - INFO_LOG("Found {} replacement VRAM writes for '{}'", s_vram_write_replacements.size(), s_game_id); + if (g_settings.texture_replacements.enable_texture_replacements) + { + INFO_LOG("Found {} replacement upload textures for '{}'", s_vram_write_texture_replacements.size(), s_game_id); + INFO_LOG("Found {} replacement page textures for '{}'", s_texture_page_texture_replacements.size(), s_game_id); + } + + if (g_settings.texture_replacements.enable_vram_write_replacements) + INFO_LOG("Found {} replacement VRAM for '{}'", s_vram_replacements.size(), s_game_id); +} + +void TextureReplacements::LoadTextureAliases(const ryml::ConstNodeRef& root, bool load_vram_write_replacement_aliases, + bool load_texture_replacement_aliases) +{ + const std::string source_dir = GetSourceDirectory(); + + for (const ryml::ConstNodeRef& current : root.cchildren()) + { + const std::string_view key = to_stringview(current.key()); + const std::optional type = GetReplacementTypeFromFileTitle(key); + if (!type.has_value()) + continue; + + const std::string_view replacement_filename = to_stringview(current.val()); + std::string replacement_path = Path::Combine(source_dir, replacement_filename); + if (!FileSystem::FileExists(replacement_path.c_str())) + { + ERROR_LOG("File '{}' for alias '{}' does not exist.", key, replacement_filename); + continue; + } + + switch (type.value()) + { + case ReplacementType::VRAMReplacement: + { + VRAMReplacementName name; + if (!load_vram_write_replacement_aliases || !name.Parse(key)) + continue; + + if (const auto it = s_vram_replacements.find(name); it != s_vram_replacements.end()) + { + WARNING_LOG("Duplicate VRAM replacement alias: '{}' and '{}'", Path::GetFileName(it->second), + replacement_filename); + continue; + } + + s_vram_replacements.emplace(name, std::move(replacement_path)); + } + break; + + case ReplacementType::TextureFromVRAMWrite: + case ReplacementType::TextureFromPage: + { + TextureReplacementName name; + if (!load_texture_replacement_aliases || !name.Parse(key)) + continue; + + DebugAssert(name.type == type.value()); + + const TextureReplacementIndex index = name.GetIndex(); + TextureReplacementMap& dest_map = (type.value() == ReplacementType::TextureFromVRAMWrite) ? + s_vram_write_texture_replacements : + s_texture_page_texture_replacements; + + // Multiple replacements in the same write are fine. But they should have different rects. + const auto range = dest_map.equal_range(index); + bool duplicate = false; + for (auto it = range.first; it != range.second; ++it) + { + if (it->second.first == name) [[unlikely]] + { + WARNING_LOG("Duplicate texture replacement alias: '{}' and '{}'", Path::GetFileName(it->second.second), + replacement_filename); + duplicate = true; + } + } + if (duplicate) [[unlikely]] + continue; + + dest_map.emplace(index, std::make_pair(name, std::move(replacement_path))); + } + break; + + DefaultCaseIsUnreachable() + } + } + + if (g_settings.texture_replacements.enable_texture_replacements) + { + INFO_LOG("Found {} replacement upload textures after applying aliases for '{}'", + s_vram_write_texture_replacements.size(), s_game_id); + INFO_LOG("Found {} replacement page textures after applying aliases for '{}'", + s_texture_page_texture_replacements.size(), s_game_id); + } + + if (g_settings.texture_replacements.enable_vram_write_replacements) + INFO_LOG("Found {} replacement VRAM after applying aliases for '{}'", s_vram_replacements.size(), s_game_id); } const TextureReplacements::ReplacementImage* TextureReplacements::LoadTexture(const std::string& filename) @@ -313,7 +1020,8 @@ void TextureReplacements::PreloadTextures() Common::Timer last_update_time; u32 num_textures_loaded = 0; - const u32 total_textures = static_cast(s_vram_write_replacements.size()); + const size_t total_textures = + s_vram_replacements.size() + s_vram_write_texture_replacements.size() + s_texture_page_texture_replacements.size(); #define UPDATE_PROGRESS() \ if (last_update_time.GetTimeSeconds() >= UPDATE_INTERVAL) \ @@ -323,13 +1031,322 @@ void TextureReplacements::PreloadTextures() last_update_time.Reset(); \ } - for (const auto& it : s_vram_write_replacements) + for (const auto& it : s_vram_replacements) { UPDATE_PROGRESS(); - LoadTexture(it.second); num_textures_loaded++; } +#define PROCESS_MAP(map) \ + for (const auto& it : map) \ + { \ + UPDATE_PROGRESS(); \ + LoadTexture(it.second.second); \ + num_textures_loaded++; \ + } + + PROCESS_MAP(s_vram_write_texture_replacements); + PROCESS_MAP(s_texture_page_texture_replacements); +#undef PROCESS_MAP #undef UPDATE_PROGRESS } + +bool TextureReplacements::EnsureGameDirectoryExists() +{ + if (s_game_id.empty()) + return false; + + const std::string game_directory = Path::Combine(EmuFolders::Textures, s_game_id); + if (FileSystem::DirectoryExists(game_directory.c_str())) + return true; + + Error error; + if (!FileSystem::CreateDirectory(game_directory.c_str(), false, &error)) + { + ERROR_LOG("Failed to create game directory: {}", error.GetDescription()); + return false; + } + + if (const std::string config_path = Path::Combine(game_directory, LOCAL_CONFIG_FILENAME); + !FileSystem::FileExists(config_path.c_str()) && + !FileSystem::WriteStringToFile(config_path.c_str(), GetDefaultConfigurationString(), &error)) + { + ERROR_LOG("Failed to write configuration template: {}", error.GetDescription()); + return false; + } + + if (!FileSystem::CreateDirectory(Path::Combine(game_directory, "dumps").c_str(), false, &error)) + { + ERROR_LOG("Failed to create dumps directory: {}", error.GetDescription()); + return false; + } + + if (!FileSystem::CreateDirectory(Path::Combine(game_directory, "replacements").c_str(), false, &error)) + { + ERROR_LOG("Failed to create replacements directory: {}", error.GetDescription()); + return false; + } + + return true; +} + +std::string TextureReplacements::GetSourceDirectory() +{ + return Path::Combine(EmuFolders::Textures, + SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "replacements", s_game_id)); +} + +std::string TextureReplacements::GetDumpDirectory() +{ + return Path::Combine(EmuFolders::Textures, SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "dumps", s_game_id)); +} + +TextureReplacements::VRAMReplacementName TextureReplacements::GetVRAMWriteHash(u32 width, u32 height, + const void* pixels) +{ + const XXH128_hash_t hash = XXH3_128bits(pixels, width * height * sizeof(u16)); + return {hash.low64, hash.high64}; +} + +std::string TextureReplacements::GetVRAMWriteDumpFilename(const VRAMReplacementName& name) +{ + std::string ret; + if (!EnsureGameDirectoryExists()) + return ret; + + const std::string dump_directory = GetDumpDirectory(); + if (!FileSystem::EnsureDirectoryExists(dump_directory.c_str(), false)) + return ret; + + return Path::Combine(dump_directory, SmallString::from_format("vram-write-{}.png", name.ToString())); +} + +bool TextureReplacements::LoadLocalConfiguration(bool load_vram_write_replacement_aliases, + bool load_texture_replacement_aliases) +{ + const Settings::TextureReplacementSettings::Configuration old_config = s_config; + + // load settings from ini + s_config = g_settings.texture_replacements.config; + + if (s_game_id.empty()) + return (s_config != old_config); + + const std::optional ini_data = FileSystem::ReadFileToString( + Path::Combine(EmuFolders::Textures, + SmallString::from_format("{}" FS_OSPATH_SEPARATOR_STR "{}", s_game_id, LOCAL_CONFIG_FILENAME)) + .c_str()); + if (!ini_data.has_value() || ini_data->empty()) + return (s_config != old_config); + + const ryml::Tree tree = ryml::parse_in_arena(LOCAL_CONFIG_FILENAME, to_csubstr(ini_data.value())); + const ryml::ConstNodeRef root = tree.rootref(); + + // This is false if all we have are comments + if (!root.is_map()) + return (s_config != old_config); + + s_config.dump_texture_pages = + GetOptionalTFromObject(root, "DumpTexturePages").value_or(static_cast(s_config.dump_texture_pages)); + s_config.dump_full_texture_pages = GetOptionalTFromObject(root, "DumpFullTexturePages") + .value_or(static_cast(s_config.dump_full_texture_pages)); + s_config.dump_texture_force_alpha_channel = GetOptionalTFromObject(root, "DumpTextureForceAlphaChannel") + .value_or(static_cast(s_config.dump_texture_force_alpha_channel)); + s_config.dump_vram_write_force_alpha_channel = + GetOptionalTFromObject(root, "DumpVRAMWriteForceAlphaChannel") + .value_or(static_cast(s_config.dump_vram_write_force_alpha_channel)); + s_config.dump_c16_textures = + GetOptionalTFromObject(root, "DumpC16Textures").value_or(static_cast(s_config.dump_c16_textures)); + s_config.reduce_palette_range = + GetOptionalTFromObject(root, "ReducePaletteRange").value_or(static_cast(s_config.reduce_palette_range)); + s_config.convert_copies_to_writes = GetOptionalTFromObject(root, "ConvertCopiesToWrites") + .value_or(static_cast(s_config.convert_copies_to_writes)); + s_config.replacement_scale_linear_filter = GetOptionalTFromObject(root, "ReplacementScaleLinearFilter") + .value_or(static_cast(s_config.replacement_scale_linear_filter)); + s_config.max_vram_write_splits = GetOptionalTFromObject(root, "MaxVRAMWriteSplits") + .value_or(static_cast(s_config.max_vram_write_splits)); + s_config.texture_dump_width_threshold = GetOptionalTFromObject(root, "DumpTextureWidthThreshold") + .value_or(static_cast(s_config.texture_dump_width_threshold)); + s_config.texture_dump_height_threshold = GetOptionalTFromObject(root, "DumpTextureHeightThreshold") + .value_or(static_cast(s_config.texture_dump_height_threshold)); + s_config.vram_write_dump_width_threshold = GetOptionalTFromObject(root, "DumpVRAMWriteWidthThreshold") + .value_or(static_cast(s_config.vram_write_dump_width_threshold)); + s_config.vram_write_dump_height_threshold = GetOptionalTFromObject(root, "DumpVRAMWriteHeightThreshold") + .value_or(static_cast(s_config.vram_write_dump_height_threshold)); + + if (load_vram_write_replacement_aliases || load_texture_replacement_aliases) + { + const ryml::ConstNodeRef aliases = root.find_child("Aliases"); + if (aliases.valid() && aliases.has_children()) + LoadTextureAliases(aliases, load_vram_write_replacement_aliases, load_texture_replacement_aliases); + } + + // Any change? + return (s_config != old_config); +} + +std::string TextureReplacements::ExportConfiguration(const Settings::TextureReplacementSettings::Configuration& config, + bool comment) +{ + static constexpr const char LOCAL_CONFIG_TEMPLATE[] = R"(# DuckStation Texture Replacement Configuration +# This file allows you to set a per-game configuration for the dumping and +# replacement system, avoiding the need to use the normal per-game settings +# when moving files to a different computer. It also allows for the definition +# of texture aliases, for reducing duplicate files. +# +# All options are commented out by default. If an option is commented, the user's +# current setting will be used instead. If an option is defined in this file, it +# will always take precedence over the user's choice. + +# Enables texture page dumping mode. +# Instead of tracking VRAM writes and attempting to identify the "real" size of +# textures, create sub-rectangles from pages based on how they are drawn. In +# most games, this will lead to significant duplication in dumps, and reduce +# replacement reliability. However, some games are incompatible with write +# tracking, and must use page mode. +{}DumpTexturePages: {} + +# Dumps full texture pages instead of sub-rectangles. +# 256x256 pages will be dumped/replaced instead. +{}DumpFullTexturePages: {} + +# Enables the dumping of direct textures (i.e. C16 format). +# Most games do not use direct textures, and when they do, it is usually for +# post-processing or FMVs. Ignoring C16 textures typically reduces garbage/false +# positive texture dumps, however, some games may require it. +{}DumpC16Textures: {} + +# Reduces the size of palettes (i.e. CLUTs) to only those indices that are used. +# This can help reduce duplication and improve replacement reliability in games +# that use 8-bit textures, but do not reserve or use the full 1x256 region in +# video memory for storage of the palette. When replacing textures dumped with +# this option enabled, CPU usage on the GPU thread does increase trivially, +# however, generally it is worthwhile for the reliability improvement. Games +# that require this option include Metal Gear Solid. +{}ReducePaletteRange: {} + +# Converts VRAM copies to VRAM writes, when a copy of performed into a previously +# tracked VRAM write. This is required for some games that construct animated +# textures by copying and replacing small portions of the texture with the parts +# that are animated. Generally this option will cause duplication when dumping, +# but it is required in some games, such as Final Fantasy VIII. +{}ConvertCopiesToWrites: {} + +# Determines the maximum number of times a VRAM write/upload can be split, before +# it is discarded and no longer tracked. This is required for games that partially +# overwrite texture data, such as Gran Turismo. +{}MaxVRAMWriteSplits: {} + +# Determines the minimum size of a texture that will be dumped. Textures with a +# width smaller than this value will be ignored. +{}DumpTextureWidthThreshold: {} + +# Determines the minimum size of a texture that will be dumped. Textures with a +# height smaller than this value will be ignored. +{}DumpTextureHeightThreshold: {} + +# Determines the minimum size of a VRAM write that will be dumped, in background +# dumping mode. Uploads smaller than this size will be ignored. +{}DumpVRAMWriteWidthThreshold: {} + +# Determines the minimum size of a VRAM write that will be dumped, in background +# dumping mode. Uploads smaller than this size will be ignored. +{}DumpVRAMWriteHeightThreshold: {} + +# Enables the use of a bilinear filter when scaling replacement textures. +# If more than one replacement texture in a 256x256 texture page has a different +# scaling over the native resolution, or the texture page is not covered, a +# bilinear filter will be used to resize/stretch the replacement texture, and/or +# the original native data. +{}ReplacementScaleLinearFilter: {} + +# Use this section to define replacement aliases. One line per replacement +# texture, with the key set to the source ID, and the value set to the filename +# which should be loaded as a replacement. For example, without the newline, +# or keep the multi-line separator. +#Aliases: + # Alias-Texture-Name: Path-To-Texture + # texupload-P4-AAAAAAAAAAAAAAAA-BBBBBBBBBBBBBBBB-64x256-0-192-64x64-P0-14: | + # texupload-P4-BBBBBBBBBBBBBBBB-BBBBBBBBBBBBBBBB-64x256-0-64-64x64-P0-13.png + # texupload-P4-AAAAAAAAAAAAAAAA-BBBBBBBBBBBBBBBB-64x256-0-192-64x64-P0-14: mytexture.png +)"; + + const std::string_view comment_str = comment ? "#" : ""; + return fmt::format(LOCAL_CONFIG_TEMPLATE, comment_str, config.dump_texture_pages, // DumpTexturePages + comment_str, config.dump_full_texture_pages, // DumpFullTexturePages + comment_str, config.dump_c16_textures, // DumpC16Textures + comment_str, config.reduce_palette_range, // ReducePaletteRange + comment_str, config.convert_copies_to_writes, // ConvertCopiesToWrites + comment_str, config.max_vram_write_splits, // MaxVRAMWriteSplits + comment_str, config.texture_dump_width_threshold, // DumpTextureWidthThreshold + comment_str, config.texture_dump_height_threshold, // DumpTextureHeightThreshold + comment_str, config.vram_write_dump_width_threshold, // DumpVRAMWriteWidthThreshold + comment_str, config.vram_write_dump_height_threshold, // DumpVRAMWriteHeightThreshold + comment_str, config.replacement_scale_linear_filter); // ReplacementScaleLinearFilter +} + +std::string TextureReplacements::GetDefaultConfigurationString() +{ + Settings::TextureReplacementSettings::Configuration config; + return ExportConfiguration(config, true); +} + +void TextureReplacements::UpdateConfiguration() +{ + // Reload textures if configuration changes. + if (LoadLocalConfiguration(false, false)) + Reload(); +} + +void TextureReplacements::Reload() +{ + s_vram_replacements.clear(); + s_vram_write_texture_replacements.clear(); + s_texture_page_texture_replacements.clear(); + + const bool load_vram_write_replacements = (g_settings.texture_replacements.enable_vram_write_replacements); + const bool load_texture_replacements = + (g_settings.gpu_texture_cache && g_settings.texture_replacements.enable_texture_replacements); + if (load_vram_write_replacements || load_texture_replacements) + FindTextures(load_vram_write_replacements, load_texture_replacements); + + LoadLocalConfiguration(load_vram_write_replacements, load_texture_replacements); + + if (g_settings.texture_replacements.preload_textures) + PreloadTextures(); + + PurgeUnreferencedTexturesFromCache(); + + DebugAssert(g_gpu); + GPUTextureCache::UpdateVRAMTrackingState(); +} + +void TextureReplacements::PurgeUnreferencedTexturesFromCache() +{ + TextureCache old_map = std::move(s_texture_cache); + s_texture_cache = {}; + + for (const auto& it : s_vram_replacements) + { + const auto it2 = old_map.find(it.second); + if (it2 != old_map.end()) + { + s_texture_cache[it.second] = std::move(it2->second); + old_map.erase(it2); + } + } + + for (const auto& map : {s_vram_write_texture_replacements, s_texture_page_texture_replacements}) + { + for (const auto& it : map) + { + const auto it2 = old_map.find(it.second.second); + if (it2 != old_map.end()) + { + s_texture_cache[it.second.second] = std::move(it2->second); + old_map.erase(it2); + } + } + } +} \ No newline at end of file diff --git a/src/core/texture_replacements.h b/src/core/texture_replacements.h index acac05a36..18d4ac5f2 100644 --- a/src/core/texture_replacements.h +++ b/src/core/texture_replacements.h @@ -3,27 +3,67 @@ #pragma once -#include "types.h" +#include "gpu_types.h" +#include "settings.h" -#include "util/image.h" +#include "common/gsvector.h" #include +#include + +class RGBA8Image; + +namespace GPUTextureCache { +enum class PaletteRecordFlags : u32; +} namespace TextureReplacements { -using ReplacementImage = RGBA8Image; - -enum class ReplacmentType +enum class ReplacementType : u8 { - VRAMWrite, + VRAMReplacement, + TextureFromVRAMWrite, + TextureFromPage, }; +using ReplacementImage = RGBA8Image; +using TextureSourceHash = u64; +using TexturePaletteHash = u64; + +struct ReplacementSubImage +{ + GSVector4i dst_rect; + GSVector4i src_rect; + const ReplacementImage& image; + float scale_x; + float scale_y; + bool invert_alpha; +}; + +const Settings::TextureReplacementSettings::Configuration& GetConfig(); +std::string ExportConfiguration(const Settings::TextureReplacementSettings::Configuration& config, bool comment = false); void SetGameID(std::string game_id); +void UpdateConfiguration(); void Reload(); const ReplacementImage* GetVRAMReplacement(u32 width, u32 height, const void* pixels); void DumpVRAMWrite(u32 width, u32 height, const void* pixels); +bool ShouldDumpVRAMWrite(u32 width, u32 height); + +void DumpTexture(ReplacementType type, u32 offset_x, u32 offset_y, u32 src_width, u32 src_height, GPUTextureMode mode, + TextureSourceHash src_hash, TexturePaletteHash pal_hash, u32 pal_min, u32 pal_max, const u16* palette, + const GSVector4i rect, GPUTextureCache::PaletteRecordFlags flags); + +bool HasVRAMWriteTextureReplacements(); +void GetVRAMWriteTextureReplacements(std::vector& replacements, TextureSourceHash vram_write_hash, + TextureSourceHash palette_hash, GPUTextureMode mode, GPUTexturePaletteReg palette, + const GSVector2i& offset_to_page); + +bool HasTexturePageTextureReplacements(); +void GetTexturePageTextureReplacements(std::vector& replacements, u32 start_page_number, + TextureSourceHash page_hash, TextureSourceHash palette_hash, GPUTextureMode mode, + GPUTexturePaletteReg palette); void Shutdown(); diff --git a/src/duckstation-qt/CMakeLists.txt b/src/duckstation-qt/CMakeLists.txt index 2ef7ef404..80007c60f 100644 --- a/src/duckstation-qt/CMakeLists.txt +++ b/src/duckstation-qt/CMakeLists.txt @@ -143,6 +143,7 @@ set(SRCS setupwizarddialog.cpp setupwizarddialog.h setupwizarddialog.ui + texturereplacementsettingsdialog.ui ) set(TS_FILES diff --git a/src/duckstation-qt/duckstation-qt.vcxproj b/src/duckstation-qt/duckstation-qt.vcxproj index c4c424dfa..9a5d77201 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj +++ b/src/duckstation-qt/duckstation-qt.vcxproj @@ -339,6 +339,9 @@ Document + + Document + diff --git a/src/duckstation-qt/duckstation-qt.vcxproj.filters b/src/duckstation-qt/duckstation-qt.vcxproj.filters index 7417689d1..c46c6434f 100644 --- a/src/duckstation-qt/duckstation-qt.vcxproj.filters +++ b/src/duckstation-qt/duckstation-qt.vcxproj.filters @@ -284,6 +284,7 @@ + diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index 7e7797e8a..8b01dd916 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -5,13 +5,22 @@ #include "qtutils.h" #include "settingswindow.h" #include "settingwidgetbinder.h" +#include "ui_texturereplacementsettingsdialog.h" #include "core/game_database.h" #include "core/gpu.h" #include "core/settings.h" +#include "core/texture_replacements.h" +#include "util/ini_settings_interface.h" #include "util/media_capture.h" +#include "common/error.h" + +#include +#include +#include +#include #include static QVariant GetMSAAModeValue(uint multisamples, bool ssaa) @@ -234,26 +243,29 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* // Texture Replacements Tab - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vramWriteReplacement, "TextureReplacements", - "EnableVRAMWriteReplacements", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureCache, "GPU", "EnableTextureCache", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useOldMDECRoutines, "Hacks", "UseOldMDECRoutines", false); + + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureReplacements, "TextureReplacements", + "EnableTextureReplacements", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.preloadTextureReplacements, "TextureReplacements", "PreloadTextures", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useOldMDECRoutines, "Hacks", "UseOldMDECRoutines", false); + + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureDumping, "TextureReplacements", "DumpTextures", + false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vramWriteReplacement, "TextureReplacements", + "EnableVRAMWriteReplacements", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vramWriteDumping, "TextureReplacements", "DumpVRAMWrites", false); - SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.setVRAMWriteAlphaChannel, "TextureReplacements", - "DumpVRAMWriteForceAlphaChannel", true); - SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.minDumpedVRAMWriteWidth, "TextureReplacements", - "DumpVRAMWriteWidthThreshold", - Settings::DEFAULT_VRAM_WRITE_DUMP_WIDTH_THRESHOLD); - SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.minDumpedVRAMWriteHeight, "TextureReplacements", - "DumpVRAMWriteHeightThreshold", - Settings::DEFAULT_VRAM_WRITE_DUMP_HEIGHT_THRESHOLD); + connect(m_ui.enableTextureCache, &QCheckBox::checkStateChanged, this, + &GraphicsSettingsWidget::onEnableTextureCacheChanged); + connect(m_ui.enableTextureReplacements, &QCheckBox::checkStateChanged, this, + &GraphicsSettingsWidget::onEnableAnyTextureReplacementsChanged); connect(m_ui.vramWriteReplacement, &QCheckBox::checkStateChanged, this, &GraphicsSettingsWidget::onEnableAnyTextureReplacementsChanged); - connect(m_ui.vramWriteDumping, &QCheckBox::checkStateChanged, this, - &GraphicsSettingsWidget::onEnableVRAMWriteDumpingChanged); + connect(m_ui.textureReplacementOptions, &QPushButton::clicked, this, + &GraphicsSettingsWidget::onTextureReplacementOptionsClicked); // Debugging Tab @@ -274,8 +286,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* onMediaCaptureBackendChanged(); onMediaCaptureAudioEnabledChanged(); onMediaCaptureVideoEnabledChanged(); + onEnableTextureCacheChanged(); onEnableAnyTextureReplacementsChanged(); - onEnableVRAMWriteDumpingChanged(); onShowDebugSettingsChanged(QtHost::ShouldShowDebugOptions()); // Rendering Tab @@ -555,14 +567,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* dialog->registerWidgetHelp(m_ui.useOldMDECRoutines, tr("Use Old MDEC Routines"), tr("Unchecked"), tr("Enables the older, less accurate MDEC decoding routines. May be required for old " "replacement backgrounds to match/load.")); - dialog->registerWidgetHelp(m_ui.setVRAMWriteAlphaChannel, tr("Set Alpha Channel"), tr("Checked"), - tr("Clears the mask/transparency bit in VRAM write dumps.")); dialog->registerWidgetHelp(m_ui.vramWriteDumping, tr("Enable VRAM Write Dumping"), tr("Unchecked"), tr("Writes backgrounds that can be replaced to the dump directory.")); - dialog->registerWidgetHelp(m_ui.minDumpedVRAMWriteWidth, tr("Dump Size Threshold"), tr("128px"), - tr("Determines the threshold that triggers a VRAM write to be dumped.")); - dialog->registerWidgetHelp(m_ui.minDumpedVRAMWriteHeight, tr("Dump Size Threshold"), tr("128px"), - tr("Determines the threshold that triggers a VRAM write to be dumped.")); // Debugging Tab @@ -1108,19 +1114,115 @@ void GraphicsSettingsWidget::onMediaCaptureAudioEnabledChanged() m_ui.audioCaptureArguments->setEnabled(enabled); } +void GraphicsSettingsWidget::onEnableTextureCacheChanged() +{ + const bool tc_enabled = m_dialog->getEffectiveBoolValue("GPU", "EnableTextureCache", false); + m_ui.enableTextureReplacements->setEnabled(tc_enabled); + m_ui.enableTextureDumping->setEnabled(tc_enabled); +} + void GraphicsSettingsWidget::onEnableAnyTextureReplacementsChanged() { const bool any_replacements_enabled = - m_dialog->getEffectiveBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", false); + (m_dialog->getEffectiveBoolValue("TextureReplacements", "EnableVRAMWriteReplacements", false) || + (m_dialog->getEffectiveBoolValue("GPU", "EnableTextureCache", false) && + m_dialog->getEffectiveBoolValue("TextureReplacements", "EnableTextureReplacements", false))); m_ui.preloadTextureReplacements->setEnabled(any_replacements_enabled); } -void GraphicsSettingsWidget::onEnableVRAMWriteDumpingChanged() +void GraphicsSettingsWidget::onTextureReplacementOptionsClicked() { - const bool enabled = m_dialog->getEffectiveBoolValue("TextureReplacements", "DumpVRAMWrites", false); - m_ui.setVRAMWriteAlphaChannel->setEnabled(enabled); - m_ui.minDumpedVRAMWriteWidth->setEnabled(enabled); - m_ui.minDumpedVRAMWriteHeight->setEnabled(enabled); - m_ui.vramWriteDumpThresholdLabel->setEnabled(enabled); - m_ui.vramWriteDumpThresholdSeparator->setEnabled(enabled); + QDialog dlg(QtUtils::GetRootWidget(this)); + + Ui::TextureReplacementSettingsDialog dlgui; + dlgui.setupUi(&dlg); + dlgui.icon->setPixmap(QIcon::fromTheme(QStringLiteral("image-fill")).pixmap(32, 32)); + + constexpr Settings::TextureReplacementSettings::Configuration default_replacement_config; + SettingsInterface* const sif = m_dialog->getSettingsInterface(); + + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.dumpTexturePages, "TextureReplacements", "DumpTexturePages", + default_replacement_config.dump_texture_pages); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.dumpFullTexturePages, "TextureReplacements", + "DumpFullTexturePages", + default_replacement_config.dump_full_texture_pages); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.dumpC16Textures, "TextureReplacements", "DumpC16Textures", + default_replacement_config.dump_c16_textures); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.reducePaletteRange, "TextureReplacements", + "ReducePaletteRange", default_replacement_config.reduce_palette_range); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.convertCopiesToWrites, "TextureReplacements", + "ConvertCopiesToWrites", + default_replacement_config.convert_copies_to_writes); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteSplits, "TextureReplacements", + "MaxVRAMWriteSplits", default_replacement_config.max_vram_write_splits); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedTextureWidth, "TextureReplacements", + "DumpTextureWidthThreshold", + default_replacement_config.texture_dump_width_threshold); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedTextureHeight, "TextureReplacements", + "DumpTextureHeightThreshold", + default_replacement_config.texture_dump_height_threshold); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.setTextureDumpAlphaChannel, "TextureReplacements", + "DumpTextureForceAlphaChannel", + default_replacement_config.dump_texture_force_alpha_channel); + + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedVRAMWriteWidth, "TextureReplacements", + "DumpVRAMWriteWidthThreshold", + default_replacement_config.vram_write_dump_width_threshold); + SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.minDumpedVRAMWriteHeight, "TextureReplacements", + "DumpVRAMWriteHeightThreshold", + default_replacement_config.vram_write_dump_height_threshold); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.setVRAMWriteAlphaChannel, "TextureReplacements", + "DumpVRAMWriteForceAlphaChannel", + default_replacement_config.dump_vram_write_force_alpha_channel); + + dlgui.dumpFullTexturePages->setEnabled( + m_dialog->getEffectiveBoolValue("TextureReplacements", "DumpTexturePages", false)); + dlgui.dumpFullTexturePagesLabel->setEnabled(dlgui.dumpFullTexturePages->isEnabled()); + connect(dlgui.dumpTexturePages, &QCheckBox::checkStateChanged, this, + [this, full_cb = dlgui.dumpFullTexturePages, full_label = dlgui.dumpFullTexturePagesLabel]() { + full_cb->setEnabled(m_dialog->getEffectiveBoolValue("TextureReplacements", "DumpTexturePages", false)); + full_label->setEnabled(full_cb->isEnabled()); + }); + connect(dlgui.closeButton, &QPushButton::clicked, &dlg, &QDialog::accept); + connect(dlgui.exportButton, &QPushButton::clicked, &dlg, [&dlg, &dlgui]() { + Settings::TextureReplacementSettings::Configuration config; + + config.dump_texture_pages = dlgui.dumpTexturePages->isChecked(); + config.dump_full_texture_pages = dlgui.dumpFullTexturePages->isChecked(); + config.dump_c16_textures = dlgui.dumpC16Textures->isChecked(); + config.reduce_palette_range = dlgui.reducePaletteRange->isChecked(); + config.convert_copies_to_writes = dlgui.convertCopiesToWrites->isChecked(); + config.max_vram_write_splits = dlgui.maxVRAMWriteSplits->value(); + config.texture_dump_width_threshold = dlgui.minDumpedTextureWidth->value(); + config.texture_dump_height_threshold = dlgui.minDumpedTextureHeight->value(); + config.dump_texture_force_alpha_channel = dlgui.setTextureDumpAlphaChannel->isChecked(); + config.vram_write_dump_width_threshold = dlgui.minDumpedVRAMWriteWidth->value(); + config.vram_write_dump_height_threshold = dlgui.minDumpedVRAMWriteHeight->value(); + config.dump_vram_write_force_alpha_channel = dlgui.setTextureDumpAlphaChannel->isChecked(); + + QInputDialog idlg(&dlg); + idlg.resize(600, 400); + idlg.setWindowTitle(tr("Texture Replacement Configuration")); + idlg.setInputMode(QInputDialog::TextInput); + idlg.setOption(QInputDialog::UsePlainTextEditForTextInput); + idlg.setLabelText(tr("Texture Replacement Configuration (config.yaml)")); + idlg.setTextValue(QString::fromStdString(TextureReplacements::ExportConfiguration(config, false))); + idlg.setOkButtonText(tr("Save")); + if (idlg.exec()) + { + const QString path = QFileDialog::getSaveFileName(&dlg, tr("Save Configuration"), QString(), + tr("Configuration Files (config.yaml)")); + if (path.isEmpty()) + return; + + Error error; + if (!FileSystem::WriteStringToFile(QDir::toNativeSeparators(path).toUtf8().constData(), + idlg.textValue().toStdString(), &error)) + { + QMessageBox::critical(&dlg, tr("Write Failed"), QString::fromStdString(error.GetDescription())); + } + } + }); + + dlg.exec(); } diff --git a/src/duckstation-qt/graphicssettingswidget.h b/src/duckstation-qt/graphicssettingswidget.h index 6752ac2f1..d698742a2 100644 --- a/src/duckstation-qt/graphicssettingswidget.h +++ b/src/duckstation-qt/graphicssettingswidget.h @@ -39,8 +39,9 @@ private Q_SLOTS: void onMediaCaptureVideoAutoResolutionChanged(); void onMediaCaptureAudioEnabledChanged(); + void onEnableTextureCacheChanged(); void onEnableAnyTextureReplacementsChanged(); - void onEnableVRAMWriteDumpingChanged(); + void onTextureReplacementOptionsClicked(); private: static constexpr int TAB_INDEX_RENDERING = 0; diff --git a/src/duckstation-qt/graphicssettingswidget.ui b/src/duckstation-qt/graphicssettingswidget.ui index 6e7ebfcd4..a361c2ce4 100644 --- a/src/duckstation-qt/graphicssettingswidget.ui +++ b/src/duckstation-qt/graphicssettingswidget.ui @@ -1081,34 +1081,94 @@ 0 - + General Settings + + + + + Enable Texture Cache + + + + + + + The texture cache is currently experimental, and may cause rendering errors in some games. + + + true + + + + + + + Use Old MDEC Routines + + + + + + + + + + Texture Replacement + - + - + - Enable VRAM Write Replacement + Enable Texture Replacements - + Preload Texture Replacements - - + + - Use Old MDEC Routines + Enable Texture Dumping + + + + + + + + + Qt::Orientation::Horizontal + + + + 40 + 20 + + + + + + + + Options... + + + + + @@ -1117,66 +1177,22 @@ - VRAM Write Dumping + VRAM Write (Background) Replacement - + - Enable VRAM Write Dumping + Enable VRAM Write Replacement - + - Set Alpha Channel - - - - - - - - - Dump Size Threshold: - - - - - - - - - px - - - 1 - - - 1024 - - - - - - - x - - - - - - - px - - - 1 - - - 512 + Enable VRAM Write Dumping diff --git a/src/duckstation-qt/texturereplacementsettingsdialog.ui b/src/duckstation-qt/texturereplacementsettingsdialog.ui new file mode 100644 index 000000000..c6d2e11bd --- /dev/null +++ b/src/duckstation-qt/texturereplacementsettingsdialog.ui @@ -0,0 +1,359 @@ + + + TextureReplacementSettingsDialog + + + + 0 + 0 + 861 + 726 + + + + Texture Replacement Settings + + + + + + 10 + + + + + + 32 + 32 + + + + + 32 + 32 + + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + + + + + <html><head/><body><p><span style=" font-weight:700;">Texture Replacement Settings</span><br/>These settings fine-tune the behavior of the texture replacement system. You can also export a game-specific configuration file.</p></body></html> + + + Qt::TextFormat::RichText + + + Qt::AlignmentFlag::AlignLeading|Qt::AlignmentFlag::AlignLeft|Qt::AlignmentFlag::AlignTop + + + true + + + + + + + + + Texture Dumping + + + + + + Dump Texture Pages + + + + + + + Instead of tracking VRAM writes and attempting to identify the "real" size of textures, create sub-rectangles from pages based on how they are drawn. In most games, this will lead to significant duplication in dumps, and reduce replacement reliability. However, some games are incompatible with write tracking, and must use page mode. + + + true + + + + + + + Dump C16 Textures + + + + + + + Most games do not use direct textures, and when they do, it is usually for post-processing or FMVs. Ignoring C16 textures typically reduces garbage/false positive texture dumps, however, some games may require it. + + + true + + + + + + + Reduce Palette Range + + + + + + + Reduces the size of palettes (i.e. CLUTs) to only those indices that are used. This can help reduce duplication and improve replacement reliability in games that use 8-bit textures, but do not reserve or use the full 1x256 region in video memory for storage of the palette. When replacing textures dumped with this option enabled, CPU usage on the GPU thread does increase trivially, however, generally it is worthwhile for the reliability improvement. Games that require this option include Metal Gear Solid. + + + true + + + + + + + Convert Copies To Writes + + + + + + + Converts VRAM copies to VRAM writes, when a copy of performed into a previously tracked VRAM write. This is required for some games that construct animated textures by copying and replacing small portions of the texture with the parts that are animated. Generally this option will cause duplication when dumping, but it is required in some games, such as Final Fantasy VIII. + + + true + + + + + + + Max Write Splits: + + + + + + + 0 + + + 32 + + + + + + + Determines the maximum number of times a VRAM write/upload can be split, before it is discarded and no longer tracked. This is required for games that partially overwrite texture data, such as Gran Turismo. + + + true + + + + + + + Dump Size Threshold: + + + + + + + + + px + + + 1 + + + 1024 + + + + + + + x + + + + + + + px + + + 1 + + + 512 + + + + + + + Set Alpha Channel + + + + + + + + + Determines the minimum size of a texture that will be dumped. Textures with a size smaller than this value will be ignored. + + + true + + + + + + + Dumps full texture pages instead of sub-rectangles. 256x256 pages will be dumped/replaced instead. + + + + + + + Dump Full Texture Pages + + + + + + + + + + Background Dumping + + + + + + Dump Size Threshold: + + + + + + + + + px + + + 1 + + + 1024 + + + + + + + x + + + + + + + px + + + 1 + + + 512 + + + + + + + Set Alpha Channel + + + + + + + + + Determines the minimum size of a VRAM write that will be dumped, in background dumping mode. Uploads smaller than this size will be ignored. + + + true + + + + + + + + + + Qt::Orientation::Vertical + + + + 20 + 40 + + + + + + + + + + Qt::Orientation::Horizontal + + + + 198 + 20 + + + + + + + + Export... + + + + + + + Close + + + true + + + + + + + + + +