From 8446856ecf763728f1fa7defb0fda61684a4558f Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 26 Nov 2023 15:15:58 +1000 Subject: [PATCH] GPU/HW: Trace min/max UVs for hazards --- src/core/gpu_commands.cpp | 1 + src/core/gpu_hw.cpp | 114 +++++++++++++++++++++++++++++--------- src/core/gpu_hw.h | 14 +++-- src/core/gpu_types.h | 5 +- 4 files changed, 100 insertions(+), 34 deletions(-) diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 1f46049e0..080e2322e 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -198,6 +198,7 @@ bool GPU::HandleNOPCommand() bool GPU::HandleClearCacheCommand() { Log_DebugPrintf("GP0 clear cache"); + m_draw_mode.SetTexturePageChanged(); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index fc2576a7e..a8d56a7f0 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -59,7 +59,7 @@ ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) return scale; } -ALWAYS_INLINE static bool ShouldUseUVLimits() +ALWAYS_INLINE static bool ShouldClampUVs() { // We only need UV limits if PGXP is enabled, or texture filtering is enabled. return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filter != GPUTextureFilter::Nearest; @@ -214,7 +214,8 @@ bool GPU_HW::Initialize() m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; m_texture_filtering = g_settings.gpu_texture_filter; - m_using_uv_limits = ShouldUseUVLimits(); + m_clamp_uvs = ShouldClampUVs(); + m_compute_uv_range = m_clamp_uvs; m_chroma_smoothing = g_settings.gpu_24bit_chroma_smoothing; m_downsample_mode = GetDownsampleMode(m_resolution_scale); m_wireframe_mode = g_settings.gpu_wireframe_mode; @@ -334,7 +335,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) const GPUDownsampleMode downsample_mode = GetDownsampleMode(resolution_scale); const GPUWireframeMode wireframe_mode = features.geometry_shaders ? g_settings.gpu_wireframe_mode : GPUWireframeMode::Disabled; - const bool use_uv_limits = ShouldUseUVLimits(); + const bool clamp_uvs = ShouldClampUVs(); const bool disable_color_perspective = features.noperspective_interpolation && ShouldDisableColorPerspective(); // TODO: Use old_settings @@ -346,7 +347,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_true_color != g_settings.gpu_true_color || m_per_sample_shading != per_sample_shading || m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter || - m_using_uv_limits != use_uv_limits || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing || + m_clamp_uvs != clamp_uvs || m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing || m_downsample_mode != downsample_mode || (m_downsample_mode == GPUDownsampleMode::Box && g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale) || @@ -396,7 +397,8 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; m_texture_filtering = g_settings.gpu_texture_filter; - m_using_uv_limits = use_uv_limits; + m_clamp_uvs = clamp_uvs; + m_compute_uv_range = m_clamp_uvs; m_chroma_smoothing = g_settings.gpu_24bit_chroma_smoothing; m_downsample_mode = downsample_mode; m_wireframe_mode = wireframe_mode; @@ -604,7 +606,7 @@ void GPU_HW::PrintSettingsToLog() (!m_true_color && m_scaled_dithering) ? " (Scaled)" : ""); Log_InfoFmt("Texture Filtering: {}", Settings::GetTextureFilterDisplayName(m_texture_filtering)); Log_InfoFmt("Dual-source blending: {}", m_supports_dual_source_blend ? "Supported" : "Not supported"); - Log_InfoFmt("Using UV limits: {}", m_using_uv_limits ? "YES" : "NO"); + Log_InfoFmt("Clamping UVs: {}", m_clamp_uvs ? "YES" : "NO"); Log_InfoFmt("Depth buffer: {}", m_pgxp_depth_buffer ? "YES" : "NO"); Log_InfoFmt("Downsampling: {}", Settings::GetDownsampleModeDisplayName(m_downsample_mode)); Log_InfoFmt("Wireframe rendering: {}", Settings::GetGPUWireframeModeDisplayName(m_wireframe_mode)); @@ -738,9 +740,8 @@ bool GPU_HW::CompilePipelines() { const GPUDevice::Features features = g_gpu_device->GetFeatures(); GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend, - m_supports_framebuffer_fetch); + m_true_color, m_scaled_dithering, m_texture_filtering, m_clamp_uvs, m_pgxp_depth_buffer, + m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch); ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 5 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); @@ -872,10 +873,10 @@ bool GPU_HW::CompilePipelines() plconfig.input_layout.vertex_attributes = textured ? - (m_using_uv_limits ? std::span( - vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : - std::span( - vertex_attributes, NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : + (m_clamp_uvs ? std::span( + vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : + std::span(vertex_attributes, + NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : std::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); @@ -1273,6 +1274,10 @@ void GPU_HW::UpdateVRAMReadTexture() { GL_SCOPE("UpdateVRAMReadTexture()"); + if (m_texpage_dirty) + GL_INS("Texpage is no longer dirty"); + m_texpage_dirty = false; + const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; if (m_vram_texture->IsMultisampled()) { @@ -1464,15 +1469,15 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) } } -void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) +void GPU_HW::ComputePolygonUVLimits(u32 texpage, BatchVertex* vertices, u32 num_vertices) { - u16 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v; + u32 min_u = vertices[0].u, max_u = vertices[0].u, min_v = vertices[0].v, max_v = vertices[0].v; for (u32 i = 1; i < num_vertices; i++) { - min_u = std::min(min_u, vertices[i].u); - max_u = std::max(max_u, vertices[i].u); - min_v = std::min(min_v, vertices[i].v); - max_v = std::max(max_v, vertices[i].v); + min_u = std::min(min_u, vertices[i].u); + max_u = std::max(max_u, vertices[i].u); + min_v = std::min(min_v, vertices[i].v); + max_v = std::max(max_v, vertices[i].v); } if (min_u != max_u) @@ -1480,6 +1485,8 @@ void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) if (min_v != max_v) max_v--; + CheckForTexPageOverlap(texpage, min_u, min_v, max_u, max_v); + for (u32 i = 0; i < num_vertices; i++) vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); } @@ -1689,8 +1696,8 @@ void GPU_HW::LoadVertices() if (rc.quad_polygon && m_resolution_scale > 1) HandleFlippedQuadTextureCoordinates(vertices.data()); - if (m_using_uv_limits && textured) - ComputePolygonUVLimits(vertices.data(), num_vertices); + if (m_compute_uv_range && textured) + ComputePolygonUVLimits(texpage, vertices.data(), num_vertices); if (!IsDrawingAreaIsValid()) return; @@ -1848,6 +1855,8 @@ void GPU_HW::LoadVertices() const u16 tex_right = tex_left + static_cast(quad_width); const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); + CheckForTexPageOverlap(texpage, tex_left, tex_top, tex_right - 1, tex_bottom - 1); + AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits); AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); @@ -2077,6 +2086,44 @@ void GPU_HW::IncludeVRAMDirtyRectangle(const Common::Rectangle& rect) } } +ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v) +{ + if (!m_texpage_dirty) + return; + + static constexpr std::array, 4> uv_shifts_adds = {{{2, 3}, {1, 1}, {0, 0}, {0, 0}}}; + + const u32 xoffs = (texpage & 0xFu) * 64u; + const u32 yoffs = ((texpage >> 4) & 1u) * 256u; + const u32 xshift = uv_shifts_adds[(texpage >> 7) & 2][0]; + const u32 xadd = uv_shifts_adds[(texpage >> 7) & 2][1]; + + const u32 vram_min_u = + (((min_u & m_draw_mode.texture_window.and_x) | m_draw_mode.texture_window.or_x) >> xshift) + xoffs; + const u32 vram_max_u = + ((((max_u & m_draw_mode.texture_window.and_x) | m_draw_mode.texture_window.or_x) + xadd) >> xshift) + xoffs; + const u32 vram_min_v = ((min_v & m_draw_mode.texture_window.and_y) | m_draw_mode.texture_window.or_y) + yoffs; + const u32 vram_max_v = ((max_v & m_draw_mode.texture_window.and_y) | m_draw_mode.texture_window.or_y) + yoffs; + + // Log_InfoFmt("{}: {},{} => {},{}", s_draw_number, vram_min_u, vram_min_v, vram_max_u, vram_max_v); + + if (vram_min_u < m_current_uv_range.left || vram_min_v < m_current_uv_range.top || + vram_max_u >= m_current_uv_range.right || vram_max_v >= m_current_uv_range.bottom) + { + m_current_uv_range.Include(vram_min_u, vram_max_u, vram_min_v, vram_max_v); + + DebugAssert(m_vram_dirty_rect.Valid()); + if (m_current_uv_range.Intersects(m_vram_dirty_rect)) + { + GL_INS_FMT("Updating VRAM cache due to UV {{{},{} => {},{}}} intersection with dirty {{{},{} => {},{}}}", + m_current_uv_range.left, m_current_uv_range.top, m_current_uv_range.right, m_current_uv_range.bottom, + m_vram_dirty_rect.left, m_vram_dirty_rect.top, m_vram_dirty_rect.right, m_vram_dirty_rect.bottom); + + UpdateVRAMReadTexture(); + } + } +} + ALWAYS_INLINE bool GPU_HW::IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; @@ -2515,18 +2562,31 @@ void GPU_HW::DispatchRenderCommand() } #endif - if (m_vram_dirty_rect.Valid() && (m_draw_mode.mode_reg.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) || - (m_draw_mode.mode_reg.IsUsingPalette() && - m_draw_mode.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect)))) + if (m_vram_dirty_rect.Valid() && m_draw_mode.mode_reg.IsUsingPalette() && + m_draw_mode.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect)) { - GL_INS("Invalidating VRAM read cache due to drawing area overlap"); - - // Log_DevPrint("Invalidating VRAM read cache due to drawing area overlap"); + GL_INS("Palette in VRAM dirty area, flushing cache"); if (!IsFlushed()) FlushRender(); UpdateVRAMReadTexture(); } + + if (m_vram_dirty_rect.Valid() && m_draw_mode.mode_reg.GetTexturePageRectangle().Intersects(m_vram_dirty_rect)) + { + GL_INS("Texpage is in dirty area, checking UV ranges"); + m_compute_uv_range = true; + m_texpage_dirty = true; + m_current_uv_range.SetInvalid(); + } + else + { + m_compute_uv_range = m_clamp_uvs; + if (m_texpage_dirty) + GL_INS("Texpage is no longer dirty"); + + m_texpage_dirty = false; + } } texture_mode = m_draw_mode.mode_reg.texture_mode; diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index dac266249..73a595cce 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -151,6 +151,7 @@ private: void SetFullVRAMDirtyRectangle(); void ClearVRAMDirtyRectangle(); void IncludeVRAMDirtyRectangle(const Common::Rectangle& rect); + void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v); bool IsFlushed() const; u32 GetBatchVertexSpace() const; @@ -192,7 +193,7 @@ private: static void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices); /// Computes polygon U/V boundaries. - static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); + void ComputePolygonUVLimits(u32 texpage, BatchVertex* vertices, u32 num_vertices); /// Sets the depth test flag for PGXP depth buffering. void SetBatchDepthBuffer(bool enabled); @@ -251,17 +252,20 @@ private: GPUDownsampleMode m_downsample_mode = GPUDownsampleMode::Disabled; GPUWireframeMode m_wireframe_mode = GPUWireframeMode::Disabled; bool m_true_color = true; - bool m_using_uv_limits = false; + bool m_clamp_uvs = false; + bool m_compute_uv_range = false; bool m_pgxp_depth_buffer = false; + bool m_texpage_dirty = false; BatchConfig m_batch; + + // Changed state + bool m_batch_ubo_dirty = true; BatchUBOData m_batch_ubo_data = {}; // Bounding box of VRAM area that the GPU has drawn into. Common::Rectangle m_vram_dirty_rect; - - // Changed state - bool m_batch_ubo_dirty = true; + Common::Rectangle m_current_uv_range; // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] DimensionalArray, 2, 2, 5, 9, 4, 3> m_batch_pipelines{}; diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h index e0cb11e31..d9b8fc6a3 100644 --- a/src/core/gpu_types.h +++ b/src/core/gpu_types.h @@ -167,6 +167,9 @@ union GPUDrawModeReg // Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time. static constexpr u32 GPUSTAT_MASK = 0b11111111111; + static constexpr std::array texture_page_widths = { + {TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}}; + u16 bits; BitField texture_page_x_base; @@ -188,8 +191,6 @@ union GPUDrawModeReg /// Returns a rectangle comprising the texture page area. ALWAYS_INLINE_RELEASE Common::Rectangle GetTexturePageRectangle() const { - static constexpr std::array texture_page_widths = { - {TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}}; return Common::Rectangle::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(), texture_page_widths[static_cast(texture_mode.GetValue())], TEXTURE_PAGE_HEIGHT);