GS/HW: Avoid barriers on second alpha pass when only writing to Z

Completely redundant. We also don't need to use the drawlist.
This commit is contained in:
Stenzek 2024-05-11 01:59:55 +10:00 committed by Connor McLaughlin
parent ba7096c9fa
commit 20dbcfd2eb
4 changed files with 75 additions and 48 deletions

View File

@ -387,7 +387,7 @@ struct alignas(16) GSHWDrawConfig
__fi bool IsFeedbackLoop() const
{
const u32 sw_blend_bits = blend_a | blend_b | blend_d;
const bool sw_blend_needs_rt = sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u);
const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0);
return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt;
}
@ -688,7 +688,9 @@ struct alignas(16) GSHWDrawConfig
struct AlphaPass
{
alignas(8) PSSelector ps;
bool enable;
bool enable : 1;
bool require_one_barrier : 1;
bool require_full_barrier : 1;
ColorMaskSelector colormask;
DepthStencilSelector depth;
float ps_aref;

View File

@ -5917,11 +5917,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
SetupIA(rtscale, sx, sy);
m_conf.alpha_second_pass.enable = ate_second_pass;
if (ate_second_pass)
{
pxAssert(!env.PABE.PABE);
std::memcpy(&m_conf.alpha_second_pass.ps, &m_conf.ps, sizeof(m_conf.ps));
std::memcpy(&m_conf.alpha_second_pass.colormask, &m_conf.colormask, sizeof(m_conf.colormask));
std::memcpy(&m_conf.alpha_second_pass.depth, &m_conf.depth, sizeof(m_conf.depth));
@ -5965,6 +5964,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
r = g = b = a = false;
}
m_conf.alpha_second_pass.enable = true;
if (z || r || g || b || a)
{
m_conf.alpha_second_pass.depth.zwe = z;
@ -5973,7 +5974,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.alpha_second_pass.colormask.wb = b;
m_conf.alpha_second_pass.colormask.wa = a;
if (m_conf.alpha_second_pass.colormask.wrgba == 0)
{
m_conf.alpha_second_pass.ps.DisableColorOutput();
}
if (m_conf.alpha_second_pass.ps.IsFeedbackLoop())
{
m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier;
m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier;
}
}
else
{

View File

@ -5839,7 +5839,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
// now we can do the actual draw
if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, skip_first_barrier);
SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier);
// blend second pass
if (config.blend_second_pass.enable)
@ -5851,7 +5851,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.ps.blend_hw = config.blend_second_pass.blend_hw;
pipe.ps.dither = config.blend_second_pass.dither;
if (BindDrawPipeline(pipe))
{
// TODO: This probably should have barriers, in case we want to use it conditionally.
DrawIndexedPrimitive();
}
}
// and the alpha pass
@ -5869,7 +5872,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.dss = config.alpha_second_pass.depth;
pipe.bs = config.blend;
if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, false);
{
SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier,
config.alpha_second_pass.require_full_barrier, false);
}
}
if (draw_rt_clone)
@ -5972,49 +5978,57 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
}
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier)
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
bool one_barrier, bool full_barrier, bool skip_first_barrier)
{
if (config.drawlist)
if (!m_features.texture_barrier) [[unlikely]]
{
GL_PUSH("Split the draw (SPRITE)");
g_perfmon.Put(
GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
u32 p = 0;
u32 n = 0;
if (skip_first_barrier)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
++n;
}
for (; n < draw_list_size; n++)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
}
DrawIndexedPrimitive();
return;
}
if (m_features.texture_barrier && m_pipeline_selector.ps.IsFeedbackLoop())
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]]
Console.Warning("GS: Possible unnecessary barrier detected.");
#endif
if (full_barrier)
{
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
const u32 indices_per_prim = config.indices_per_prim;
if (config.require_full_barrier)
if (config.drawlist)
{
const u32 indices_per_prim = config.indices_per_prim;
GL_PUSH("Split the draw (SPRITE)");
g_perfmon.Put(
GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));
const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
u32 p = 0;
u32 n = 0;
if (skip_first_barrier)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
++n;
}
for (; n < draw_list_size; n++)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
}
}
else
{
GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim);
g_perfmon.Put(
GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast<u32>(skip_first_barrier));
@ -6033,16 +6047,18 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
DrawIndexedPrimitive(p, indices_per_prim);
}
return;
}
if (config.require_one_barrier && !skip_first_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
}
return;
}
if (one_barrier && !skip_first_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
}
DrawIndexedPrimitive();

View File

@ -588,7 +588,8 @@ public:
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const;
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier);
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
bool one_barrier, bool full_barrier, bool skip_first_barrier);
//////////////////////////////////////////////////////////////////////////
// Vulkan State