GS/HW: Avoid barriers on second alpha pass when only writing to Z

Completely redundant. We also don't need to use the drawlist.
This commit is contained in:
Stenzek 2024-05-11 01:59:55 +10:00 committed by Connor McLaughlin
parent ba7096c9fa
commit 20dbcfd2eb
4 changed files with 75 additions and 48 deletions

View File

@ -387,7 +387,7 @@ struct alignas(16) GSHWDrawConfig
__fi bool IsFeedbackLoop() const __fi bool IsFeedbackLoop() const
{ {
const u32 sw_blend_bits = blend_a | blend_b | blend_d; const u32 sw_blend_bits = blend_a | blend_b | blend_d;
const bool sw_blend_needs_rt = sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u); const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0);
return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt; return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt;
} }
@ -688,7 +688,9 @@ struct alignas(16) GSHWDrawConfig
struct AlphaPass struct AlphaPass
{ {
alignas(8) PSSelector ps; alignas(8) PSSelector ps;
bool enable; bool enable : 1;
bool require_one_barrier : 1;
bool require_full_barrier : 1;
ColorMaskSelector colormask; ColorMaskSelector colormask;
DepthStencilSelector depth; DepthStencilSelector depth;
float ps_aref; float ps_aref;

View File

@ -5917,11 +5917,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
SetupIA(rtscale, sx, sy); SetupIA(rtscale, sx, sy);
m_conf.alpha_second_pass.enable = ate_second_pass;
if (ate_second_pass) if (ate_second_pass)
{ {
pxAssert(!env.PABE.PABE); pxAssert(!env.PABE.PABE);
std::memcpy(&m_conf.alpha_second_pass.ps, &m_conf.ps, sizeof(m_conf.ps)); std::memcpy(&m_conf.alpha_second_pass.ps, &m_conf.ps, sizeof(m_conf.ps));
std::memcpy(&m_conf.alpha_second_pass.colormask, &m_conf.colormask, sizeof(m_conf.colormask)); std::memcpy(&m_conf.alpha_second_pass.colormask, &m_conf.colormask, sizeof(m_conf.colormask));
std::memcpy(&m_conf.alpha_second_pass.depth, &m_conf.depth, sizeof(m_conf.depth)); std::memcpy(&m_conf.alpha_second_pass.depth, &m_conf.depth, sizeof(m_conf.depth));
@ -5965,6 +5964,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
r = g = b = a = false; r = g = b = a = false;
} }
m_conf.alpha_second_pass.enable = true;
if (z || r || g || b || a) if (z || r || g || b || a)
{ {
m_conf.alpha_second_pass.depth.zwe = z; m_conf.alpha_second_pass.depth.zwe = z;
@ -5973,7 +5974,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.alpha_second_pass.colormask.wb = b; m_conf.alpha_second_pass.colormask.wb = b;
m_conf.alpha_second_pass.colormask.wa = a; m_conf.alpha_second_pass.colormask.wa = a;
if (m_conf.alpha_second_pass.colormask.wrgba == 0) if (m_conf.alpha_second_pass.colormask.wrgba == 0)
{
m_conf.alpha_second_pass.ps.DisableColorOutput(); m_conf.alpha_second_pass.ps.DisableColorOutput();
}
if (m_conf.alpha_second_pass.ps.IsFeedbackLoop())
{
m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier;
m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier;
}
} }
else else
{ {

View File

@ -5839,7 +5839,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
// now we can do the actual draw // now we can do the actual draw
if (BindDrawPipeline(pipe)) if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, skip_first_barrier); SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
// blend second pass // blend second pass
if (config.blend_second_pass.enable) if (config.blend_second_pass.enable)
@ -5851,7 +5851,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.ps.blend_hw = config.blend_second_pass.blend_hw; pipe.ps.blend_hw = config.blend_second_pass.blend_hw;
pipe.ps.dither = config.blend_second_pass.dither; pipe.ps.dither = config.blend_second_pass.dither;
if (BindDrawPipeline(pipe)) if (BindDrawPipeline(pipe))
{
// TODO: This probably should have barriers, in case we want to use it conditionally.
DrawIndexedPrimitive(); DrawIndexedPrimitive();
}
} }
// and the alpha pass // and the alpha pass
@ -5869,7 +5872,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.dss = config.alpha_second_pass.depth; pipe.dss = config.alpha_second_pass.depth;
pipe.bs = config.blend; pipe.bs = config.blend;
if (BindDrawPipeline(pipe)) if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, false); {
SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier,
config.alpha_second_pass.require_full_barrier, false);
}
} }
if (draw_rt_clone) if (draw_rt_clone)
@ -5972,49 +5978,57 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
} }
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier) void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
bool one_barrier, bool full_barrier, bool skip_first_barrier)
{ {
if (config.drawlist) if (!m_features.texture_barrier) [[unlikely]]
{ {
GL_PUSH("Split the draw (SPRITE)"); DrawIndexedPrimitive();
g_perfmon.Put(
GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
u32 p = 0;
u32 n = 0;
if (skip_first_barrier)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
++n;
}
for (; n < draw_list_size; n++)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
}
return; return;
} }
if (m_features.texture_barrier && m_pipeline_selector.ps.IsFeedbackLoop()) #ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]]
Console.Warning("GS: Possible unnecessary barrier detected.");
#endif
if (full_barrier)
{ {
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
const u32 indices_per_prim = config.indices_per_prim;
if (config.require_full_barrier) if (config.drawlist)
{ {
const u32 indices_per_prim = config.indices_per_prim; GL_PUSH("Split the draw (SPRITE)");
g_perfmon.Put(
GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
u32 p = 0;
u32 n = 0;
if (skip_first_barrier)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
++n;
}
for (; n < draw_list_size; n++)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
}
}
else
{
GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim); GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim);
g_perfmon.Put( g_perfmon.Put(
GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast<u32>(skip_first_barrier)); GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast<u32>(skip_first_barrier));
@ -6033,16 +6047,18 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
DrawIndexedPrimitive(p, indices_per_prim); DrawIndexedPrimitive(p, indices_per_prim);
} }
return;
} }
if (config.require_one_barrier && !skip_first_barrier) return;
{ }
g_perfmon.Put(GSPerfMon::Barriers, 1);
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, if (one_barrier && !skip_first_barrier)
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); {
} g_perfmon.Put(GSPerfMon::Barriers, 1);
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
} }
DrawIndexedPrimitive(); DrawIndexedPrimitive();

View File

@ -588,7 +588,8 @@ public:
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe); void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config); void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const; VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const;
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier); void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
bool one_barrier, bool full_barrier, bool skip_first_barrier);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Vulkan State // Vulkan State