From 20dbcfd2eb9f42d2256787dacf74ec7294e80658 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 11 May 2024 01:59:55 +1000 Subject: [PATCH] GS/HW: Avoid barriers on second alpha pass when only writing to Z Completely redundant. We also don't need to use the drawlist. --- pcsx2/GS/Renderers/Common/GSDevice.h | 6 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 12 ++- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 102 +++++++++++++---------- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h | 3 +- 4 files changed, 75 insertions(+), 48 deletions(-) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 38878156ee..c990f1683c 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -387,7 +387,7 @@ struct alignas(16) GSHWDrawConfig __fi bool IsFeedbackLoop() const { const u32 sw_blend_bits = blend_a | blend_b | blend_d; - const bool sw_blend_needs_rt = sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u); + const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0); return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt; } @@ -688,7 +688,9 @@ struct alignas(16) GSHWDrawConfig struct AlphaPass { alignas(8) PSSelector ps; - bool enable; + bool enable : 1; + bool require_one_barrier : 1; + bool require_full_barrier : 1; ColorMaskSelector colormask; DepthStencilSelector depth; float ps_aref; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 83bcc02226..d5ecc893a6 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5917,11 +5917,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta SetupIA(rtscale, sx, sy); - m_conf.alpha_second_pass.enable = ate_second_pass; - if (ate_second_pass) { pxAssert(!env.PABE.PABE); + std::memcpy(&m_conf.alpha_second_pass.ps, &m_conf.ps, sizeof(m_conf.ps)); std::memcpy(&m_conf.alpha_second_pass.colormask, &m_conf.colormask, sizeof(m_conf.colormask)); std::memcpy(&m_conf.alpha_second_pass.depth, &m_conf.depth, sizeof(m_conf.depth)); @@ -5965,6 +5964,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta r = g = b = a = false; } + m_conf.alpha_second_pass.enable = true; + if (z || r || g || b || a) { m_conf.alpha_second_pass.depth.zwe = z; @@ -5973,7 +5974,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.alpha_second_pass.colormask.wb = b; m_conf.alpha_second_pass.colormask.wa = a; if (m_conf.alpha_second_pass.colormask.wrgba == 0) + { m_conf.alpha_second_pass.ps.DisableColorOutput(); + } + if (m_conf.alpha_second_pass.ps.IsFeedbackLoop()) + { + m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier; + m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier; + } } else { diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 884258a8d4..72ec313468 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -5839,7 +5839,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) // now we can do the actual draw if (BindDrawPipeline(pipe)) - SendHWDraw(config, draw_rt, skip_first_barrier); + SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier); // blend second pass if (config.blend_second_pass.enable) @@ -5851,7 +5851,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) pipe.ps.blend_hw = config.blend_second_pass.blend_hw; pipe.ps.dither = config.blend_second_pass.dither; if (BindDrawPipeline(pipe)) + { + // TODO: This probably should have barriers, in case we want to use it conditionally. DrawIndexedPrimitive(); + } } // and the alpha pass @@ -5869,7 +5872,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) pipe.dss = config.alpha_second_pass.depth; pipe.bs = config.blend; if (BindDrawPipeline(pipe)) - SendHWDraw(config, draw_rt, false); + { + SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier, + config.alpha_second_pass.require_full_barrier, false); + } } if (draw_rt_clone) @@ -5972,49 +5978,57 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; } -void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier) +void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, + bool one_barrier, bool full_barrier, bool skip_first_barrier) { - if (config.drawlist) + if (!m_features.texture_barrier) [[unlikely]] { - GL_PUSH("Split the draw (SPRITE)"); - g_perfmon.Put( - GSPerfMon::Barriers, static_cast(config.drawlist->size()) - static_cast(skip_first_barrier)); - - const u32 indices_per_prim = config.indices_per_prim; - const u32 draw_list_size = static_cast(config.drawlist->size()); - const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); - u32 p = 0; - u32 n = 0; - - if (skip_first_barrier) - { - const u32 count = (*config.drawlist)[n] * indices_per_prim; - DrawIndexedPrimitive(p, count); - p += count; - ++n; - } - - for (; n < draw_list_size; n++) - { - vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); - - const u32 count = (*config.drawlist)[n] * indices_per_prim; - DrawIndexedPrimitive(p, count); - p += count; - } - + DrawIndexedPrimitive(); return; } - if (m_features.texture_barrier && m_pipeline_selector.ps.IsFeedbackLoop()) +#ifdef PCSX2_DEVBUILD + if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]] + Console.Warning("GS: Possible unnecessary barrier detected."); +#endif + + if (full_barrier) { const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + const u32 indices_per_prim = config.indices_per_prim; - if (config.require_full_barrier) + if (config.drawlist) { - const u32 indices_per_prim = config.indices_per_prim; + GL_PUSH("Split the draw (SPRITE)"); + g_perfmon.Put( + GSPerfMon::Barriers, static_cast(config.drawlist->size()) - static_cast(skip_first_barrier)); + const u32 indices_per_prim = config.indices_per_prim; + const u32 draw_list_size = static_cast(config.drawlist->size()); + const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + u32 p = 0; + u32 n = 0; + + if (skip_first_barrier) + { + const u32 count = (*config.drawlist)[n] * indices_per_prim; + DrawIndexedPrimitive(p, count); + p += count; + ++n; + } + + for (; n < draw_list_size; n++) + { + vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); + + const u32 count = (*config.drawlist)[n] * indices_per_prim; + DrawIndexedPrimitive(p, count); + p += count; + } + } + else + { GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim); g_perfmon.Put( GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast(skip_first_barrier)); @@ -6033,16 +6047,18 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, DrawIndexedPrimitive(p, indices_per_prim); } - - return; } - if (config.require_one_barrier && !skip_first_barrier) - { - g_perfmon.Put(GSPerfMon::Barriers, 1); - vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); - } + return; + } + + if (one_barrier && !skip_first_barrier) + { + g_perfmon.Put(GSPerfMon::Barriers, 1); + + const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); } DrawIndexedPrimitive(); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 0f7871a360..6fff081482 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -588,7 +588,8 @@ public: void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe); void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config); VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const; - void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier); + void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, + bool one_barrier, bool full_barrier, bool skip_first_barrier); ////////////////////////////////////////////////////////////////////////// // Vulkan State