diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index cd80ab8e7..e3b4efa37 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -976,6 +976,10 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 color = VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color)); VRAMFillUBOData uniforms; + uniforms.u_dst_x = (x % VRAM_WIDTH) * m_resolution_scale; + uniforms.u_dst_y = (y % VRAM_HEIGHT) * m_resolution_scale; + uniforms.u_end_x = ((x + width) % VRAM_WIDTH) * m_resolution_scale; + uniforms.u_end_y = ((y + height) % VRAM_HEIGHT) * m_resolution_scale; std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = RGBA8ToFloat(color); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 56bcd7c4d..689176537 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -125,6 +125,10 @@ protected: struct VRAMFillUBOData { + u32 u_dst_x; + u32 u_dst_y; + u32 u_end_x; + u32 u_end_y; float u_fill_color[4]; u32 u_interlaced_displayed_field; }; @@ -268,13 +272,19 @@ protected: /// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled /// on a per-pixel basis, and the opaque pixels shouldn't be blended at all. - bool NeedsTwoPassRendering() const + ALWAYS_INLINE bool NeedsTwoPassRendering() const { return (m_batch.texture_mode != GPUTextureMode::Disabled && (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground || (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); } + /// Returns true if the specified VRAM fill is oversized. + ALWAYS_INLINE static bool IsVRAMFillOversized(u32 x, u32 y, u32 width, u32 height) + { + return ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT); + } + ALWAYS_INLINE bool IsUsingSoftwareRendererForReadbacks() { return static_cast(m_sw_renderer); } void FillBackendCommandParameters(GPUBackendCommand* cmd) const; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index cd0cb4f49..716f611f3 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -508,7 +508,8 @@ bool GPU_HW_D3D11::CompileShaders() m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend); - ShaderCompileProgressTracker progress("Compiling Shaders", 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3) + 1); + ShaderCompileProgressTracker progress("Compiling Shaders", + 1 + 1 + 2 + (4 * 9 * 2 * 2) + 1 + (2 * 2) + 4 + (2 * 3) + 1); // input layout { @@ -585,18 +586,19 @@ bool GPU_HW_D3D11::CompileShaders() progress.Increment(); - m_vram_fill_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateFillFragmentShader()); - if (!m_vram_fill_pixel_shader) - return false; + for (u8 wrapped = 0; wrapped < 2; wrapped++) + { + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + const std::string ps = + shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)); + m_vram_fill_pixel_shaders[wrapped][interlaced] = shader_cache.GetPixelShader(m_device.Get(), ps); + if (!m_vram_fill_pixel_shaders[wrapped][interlaced]) + return false; - progress.Increment(); - - m_vram_interlaced_fill_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateInterlacedFillFragmentShader()); - if (!m_vram_interlaced_fill_pixel_shader) - return false; - - progress.Increment(); + progress.Increment(); + } + } m_vram_read_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader()); if (!m_vram_read_pixel_shader) @@ -682,8 +684,7 @@ void GPU_HW_D3D11::DestroyShaders() m_vram_copy_pixel_shader.Reset(); m_vram_write_pixel_shader.Reset(); m_vram_read_pixel_shader.Reset(); - m_vram_interlaced_fill_pixel_shader.Reset(); - m_vram_fill_pixel_shader.Reset(); + m_vram_fill_pixel_shaders = {}; m_copy_pixel_shader.Reset(); m_uv_quad_vertex_shader.Reset(); m_screen_quad_vertex_shader.Reset(); @@ -976,26 +977,18 @@ void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) if (IsUsingSoftwareRendererForReadbacks()) FillSoftwareRendererVRAM(x, y, width, height, color); - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - return; - } - GPU_HW::FillVRAM(x, y, width, height, color); - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); - m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); - SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, - height * m_resolution_scale); - DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() : - m_vram_fill_pixel_shader.Get(), + const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); + SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, + bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); + + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); + DrawUtilityShader(m_vram_fill_pixel_shaders[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] + [BoolToUInt8(IsInterlacedRenderingEnabled())] + .Get(), &uniforms, sizeof(uniforms)); RestoreGraphicsAPIState(); diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index eec2867b2..4f0a2aed3 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -123,8 +123,7 @@ private: ComPtr m_screen_quad_vertex_shader; ComPtr m_uv_quad_vertex_shader; ComPtr m_copy_pixel_shader; - ComPtr m_vram_fill_pixel_shader; - ComPtr m_vram_interlaced_fill_pixel_shader; + std::array, 2>, 2> m_vram_fill_pixel_shaders; // [wrapped][interlaced] ComPtr m_vram_read_pixel_shader; ComPtr m_vram_write_pixel_shader; ComPtr m_vram_copy_pixel_shader; diff --git a/src/core/gpu_hw_d3d12.cpp b/src/core/gpu_hw_d3d12.cpp index 9a3988a0d..cad7bac93 100644 --- a/src/core/gpu_hw_d3d12.cpp +++ b/src/core/gpu_hw_d3d12.cpp @@ -420,8 +420,8 @@ bool GPU_HW_D3D12::CompilePipelines() m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend); - ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + - 2 + 2 + 1 + 1 + (2 * 3) + 1); + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + + (2 * 2) + 2 + 2 + 1 + 1 + (2 * 3) + 1); // vertex shaders - [textured] // fragment shaders - [render_mode][texture_mode][dithering][interlacing] @@ -561,23 +561,24 @@ bool GPU_HW_D3D12::CompilePipelines() gpbuilder.SetDepthStencilFormat(m_vram_depth_texture.GetFormat()); // VRAM fill + for (u8 wrapped = 0; wrapped < 2; wrapped++) { for (u8 interlaced = 0; interlaced < 2; interlaced++) { ComPtr fs = shader_cache.GetPixelShader( - (interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader()); + shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced))); if (!fs) return false; gpbuilder.SetPixelShader(fs.Get()); gpbuilder.SetDepthState(true, true, D3D12_COMPARISON_FUNC_ALWAYS); - m_vram_fill_pipelines[interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); - if (!m_vram_fill_pipelines[interlaced]) + m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(g_d3d12_context->GetDevice(), shader_cache, false); + if (!m_vram_fill_pipelines[wrapped][interlaced]) return false; - D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[interlaced].Get(), "VRAM Fill Pipeline Interlacing=%u", - interlaced); + D3D12::SetObjectNameFormatted(m_vram_fill_pipelines[wrapped][interlaced].Get(), + "VRAM Fill Pipeline Wrapped=%u,Interlacing=%u", wrapped, interlaced); progress.Increment(); } @@ -994,31 +995,22 @@ void GPU_HW_D3D12::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) FillSoftwareRendererVRAM(x, y, width, height, color); // TODO: Use fast clear - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), false, false); - return; - } - GPU_HW::FillVRAM(x, y, width, height, color); - x *= m_resolution_scale; - y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); cmdlist->SetGraphicsRootSignature(m_single_sampler_root_signature.Get()); cmdlist->SetGraphicsRoot32BitConstants(0, sizeof(uniforms) / sizeof(u32), &uniforms, 0); cmdlist->SetGraphicsRootDescriptorTable(1, g_d3d12_context->GetNullSRVDescriptor()); - cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())].Get()); - D3D12::SetViewportAndScissor(cmdlist, x, y, width, height); + cmdlist->SetPipelineState(m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] + [BoolToUInt8(IsInterlacedRenderingEnabled())] + .Get()); + + const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); + D3D12::SetViewportAndScissor(cmdlist, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, + bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); + cmdlist->DrawInstanced(3, 1, 0, 0); RestoreGraphicsAPIState(); diff --git a/src/core/gpu_hw_d3d12.h b/src/core/gpu_hw_d3d12.h index 4e50ce521..b01992839 100644 --- a/src/core/gpu_hw_d3d12.h +++ b/src/core/gpu_hw_d3d12.h @@ -92,8 +92,8 @@ private: // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] DimensionalArray, 2, 2, 5, 9, 4, 2> m_batch_pipelines; - // [interlaced] - std::array, 2> m_vram_fill_pipelines; + // [wrapped][interlaced] + DimensionalArray, 2, 2> m_vram_fill_pipelines; // [depth_test] std::array, 2> m_vram_write_pipelines; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index e1914645b..88c6f7699 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -517,7 +517,7 @@ bool GPU_HW_OpenGL::CompilePrograms() m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_pgxp_depth_buffer, m_supports_dual_source_blend); - ShaderCompileProgressTracker progress("Compiling Programs", (4 * 9 * 2 * 2) + (2 * 3) + 1 + 1 + 1 + 1 + 1 + 1); + ShaderCompileProgressTracker progress("Compiling Programs", (4 * 9 * 2 * 2) + (2 * 3) + (2 * 2) + 1 + 1 + 1 + 1 + 1); for (u32 render_mode = 0; render_mode < 4; render_mode++) { @@ -609,22 +609,29 @@ bool GPU_HW_OpenGL::CompilePrograms() } } - std::optional prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, - shadergen.GenerateInterlacedFillFragmentShader(), - [this, use_binding_layout](GL::Program& prog) { - if (!IsGLES() && !use_binding_layout) - prog.BindFragData(0, "o_col0"); - }); - if (!prog) - return false; + for (u8 wrapped = 0; wrapped < 2; wrapped++) + { + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + std::optional prog = shader_cache.GetProgram( + shadergen.GenerateScreenQuadVertexShader(), {}, + shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)), + [this, use_binding_layout](GL::Program& prog) { + if (!IsGLES() && !use_binding_layout) + prog.BindFragData(0, "o_col0"); + }); + if (!prog) + return false; - if (!use_binding_layout) - prog->BindUniformBlock("UBOBlock", 1); + if (!use_binding_layout) + prog->BindUniformBlock("UBOBlock", 1); - m_vram_interlaced_fill_program = std::move(*prog); - progress.Increment(); + m_vram_fill_programs[wrapped][interlaced] = std::move(*prog); + progress.Increment(); + } + } - prog = + std::optional prog = shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {}, shadergen.GenerateVRAMReadFragmentShader(), [this, use_binding_layout](GL::Program& prog) { if (!IsGLES() && !use_binding_layout) @@ -1014,28 +1021,17 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) if (IsUsingSoftwareRendererForReadbacks()) FillSoftwareRendererVRAM(x, y, width, height, color); - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - return; - } - GPU_HW::FillVRAM(x, y, width, height, color); - // scale coordinates - x *= m_resolution_scale; - y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - - glScissor(x, m_vram_texture.GetHeight() - y - height, width, height); + const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); + glScissor(bounds.left * m_resolution_scale, + m_vram_texture.GetHeight() - (bounds.top * m_resolution_scale) - (height * m_resolution_scale), + width * m_resolution_scale, height * m_resolution_scale); // fast path when not using interlaced rendering - if (!IsInterlacedRenderingEnabled()) + const bool wrapped = IsVRAMFillOversized(x, y, width, height); + const bool interlaced = IsInterlacedRenderingEnabled(); + if (!wrapped && !interlaced) { const auto [r, g, b, a] = RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); @@ -1048,7 +1044,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); - m_vram_interlaced_fill_program.Bind(); + m_vram_fill_programs[BoolToUInt8(wrapped)][BoolToUInt8(interlaced)].Bind(); UploadUniformBuffer(&uniforms, sizeof(uniforms)); glDisable(GL_BLEND); SetDepthFunc(GL_ALWAYS); diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 1d6751f7f..3f407b11f 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -99,7 +99,7 @@ private: std::array, 2>, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering][interlacing] std::array, 2> m_display_programs; // [depth_24][interlaced] - GL::Program m_vram_interlaced_fill_program; + std::array, 2> m_vram_fill_programs; GL::Program m_vram_read_program; GL::Program m_vram_write_program; GL::Program m_vram_copy_program; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index d35e53368..aaebb6288 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -997,27 +997,6 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader() -{ - std::stringstream ss; - WriteHeader(ss); - WriteCommonFunctions(ss); - DeclareUniformBuffer(ss, {"float4 u_fill_color", "uint u_interlaced_displayed_field"}, true); - DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true); - - ss << R"( -{ - if ((fixYCoord(uint(v_pos.y)) & 1u) == u_interlaced_displayed_field) - discard; - - o_col0 = u_fill_color; - o_depth = u_fill_color.a; -} -)"; - - return ss.str(); -} - std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode, bool smooth_chroma) @@ -1324,6 +1303,50 @@ std::string GPU_HW_ShaderGen::GenerateVRAMCopyFragmentShader() return ss.str(); } +std::string GPU_HW_ShaderGen::GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced) +{ + std::stringstream ss; + WriteHeader(ss); + WriteCommonFunctions(ss); + DefineMacro(ss, "PGXP_DEPTH", m_pgxp_depth); + DefineMacro(ss, "WRAPPED", wrapped); + DefineMacro(ss, "INTERLACED", interlaced); + + DeclareUniformBuffer( + ss, {"uint2 u_dst_coords", "uint2 u_end_coords", "float4 u_fill_color", "uint u_interlaced_displayed_field"}, true); + + DeclareFragmentEntryPoint(ss, 0, 1, {}, interlaced || wrapped, 1, true, false, false, false); + ss << R"( +{ +#if INTERLACED || WRAPPED + uint2 dst_coords = uint2(uint(v_pos.x), fixYCoord(uint(v_pos.y))); +#endif + +#if INTERLACED + if ((dst_coords.y & 1u) == u_interlaced_displayed_field) + discard; +#endif + +#if WRAPPED + // make sure it's not oversized and out of range + if ((dst_coords.x < u_dst_coords.x && dst_coords.x >= u_end_coords.x) || + (dst_coords.y < u_dst_coords.y && dst_coords.y >= u_end_coords.y)) + { + discard; + } +#endif + + o_col0 = u_fill_color; +#if !PGXP_DEPTH + o_depth = u_fill_color.a; +#else + o_depth = 1.0f; +#endif +})"; + + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader() { std::stringstream ss; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index dff617e97..92b863829 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -13,12 +13,12 @@ public: std::string GenerateBatchVertexShader(bool textured); std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode, bool dithering, bool interlacing); - std::string GenerateInterlacedFillFragmentShader(); std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode, bool smooth_chroma); std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMWriteFragmentShader(bool use_ssbo); std::string GenerateVRAMCopyFragmentShader(); + std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced); std::string GenerateVRAMUpdateDepthFragmentShader(); std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 6d9dd38c0..258aeba3c 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -840,7 +840,7 @@ bool GPU_HW_Vulkan::CompilePipelines() m_pgxp_depth_buffer, m_supports_dual_source_blend); ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + - 2 + 2 + 1 + 1 + (2 * 3) + 1); + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); // vertex shaders - [textured] // fragment shaders - [render_mode][texture_mode][dithering][interlacing] @@ -1010,11 +1010,12 @@ bool GPU_HW_Vulkan::CompilePipelines() gpbuilder.SetMultisamples(m_multisamples, false); // VRAM fill + for (u8 wrapped = 0; wrapped < 2; wrapped++) { for (u8 interlaced = 0; interlaced < 2; interlaced++) { VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader( - (interlaced == 0) ? shadergen.GenerateFillFragmentShader() : shadergen.GenerateInterlacedFillFragmentShader()); + shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced))); if (fs == VK_NULL_HANDLE) return false; @@ -1022,9 +1023,9 @@ bool GPU_HW_Vulkan::CompilePipelines() gpbuilder.SetFragmentShader(fs); gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); - m_vram_fill_pipelines[interlaced] = gpbuilder.Create(device, pipeline_cache, false); + m_vram_fill_pipelines[wrapped][interlaced] = gpbuilder.Create(device, pipeline_cache, false); vkDestroyShaderModule(device, fs, nullptr); - if (m_vram_fill_pipelines[interlaced] == VK_NULL_HANDLE) + if (m_vram_fill_pipelines[wrapped][interlaced] == VK_NULL_HANDLE) return false; progress.Increment(); @@ -1249,8 +1250,7 @@ void GPU_HW_Vulkan::DestroyPipelines() { m_batch_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); - for (VkPipeline& p : m_vram_fill_pipelines) - Vulkan::Util::SafeDestroyPipeline(p); + m_vram_fill_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); for (VkPipeline& p : m_vram_write_pipelines) Vulkan::Util::SafeDestroyPipeline(p); @@ -1482,23 +1482,8 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) if (IsUsingSoftwareRendererForReadbacks()) FillSoftwareRendererVRAM(x, y, width, height, color); - if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) - { - // CPU round trip if oversized for now. - Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - return; - } - GPU_HW::FillVRAM(x, y, width, height, color); - x *= m_resolution_scale; - y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - BeginVRAMRenderPass(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); @@ -1506,8 +1491,12 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), &uniforms); vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())]); - Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height); + m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] + [BoolToUInt8(IsInterlacedRenderingEnabled())]); + + const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); + Vulkan::Util::SetViewportAndScissor(cmdbuf, bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, + bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); vkCmdDraw(cmdbuf, 3, 1, 0, 0); RestoreGraphicsAPIState(); diff --git a/src/core/gpu_hw_vulkan.h b/src/core/gpu_hw_vulkan.h index f48b9b102..4ab4fb058 100644 --- a/src/core/gpu_hw_vulkan.h +++ b/src/core/gpu_hw_vulkan.h @@ -129,8 +129,8 @@ private: // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] DimensionalArray m_batch_pipelines{}; - // [interlaced] - std::array m_vram_fill_pipelines{}; + // [wrapped][interlaced] + DimensionalArray m_vram_fill_pipelines{}; // [depth_test] std::array m_vram_write_pipelines{};