GPU/HW: Add in-pass clear depth pipeline
Instead of clearing the entire buffer, we only need to wipe out the current drawing area. Saves a decent chunk of memory bandwidth in games that end up spamming clears.
This commit is contained in:
parent
ee6887b68a
commit
58dc7562a3
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
|
||||
|
||||
#include "gpu_hw.h"
|
||||
|
@ -8,6 +8,7 @@
|
|||
#include "gpu_hw_shadergen.h"
|
||||
#include "gpu_presenter.h"
|
||||
#include "gpu_sw_rasterizer.h"
|
||||
#include "gte_types.h"
|
||||
#include "host.h"
|
||||
#include "imgui_overlays.h"
|
||||
#include "settings.h"
|
||||
|
@ -580,7 +581,7 @@ bool GPU_HW::UpdateSettings(const GPUSettings& old_settings, Error* error)
|
|||
else if (m_vram_depth_texture && depth_buffer_changed)
|
||||
{
|
||||
if (m_pgxp_depth_buffer)
|
||||
ClearDepthBuffer();
|
||||
ClearDepthBuffer(false);
|
||||
else if (m_write_mask_as_depth)
|
||||
UpdateDepthBufferFromMaskBit();
|
||||
}
|
||||
|
@ -1121,6 +1122,7 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
p.reset();
|
||||
m_vram_update_depth_pipeline.reset();
|
||||
m_vram_write_replacement_pipeline.reset();
|
||||
m_clear_depth_pipeline.reset();
|
||||
m_copy_depth_pipeline.reset();
|
||||
|
||||
ShaderCompileProgressTracker progress("Compiling Pipelines", total_items);
|
||||
|
@ -1667,6 +1669,30 @@ bool GPU_HW::CompilePipelines(Error* error)
|
|||
plconfig.SetTargetFormats(VRAM_DS_COLOR_FORMAT);
|
||||
if (!(m_copy_depth_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
|
||||
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
|
||||
shadergen.GenerateVRAMClearDepthFragmentShader(m_use_rov_for_shader_blend), error);
|
||||
if (!fs)
|
||||
return false;
|
||||
|
||||
SetScreenQuadInputLayout(plconfig);
|
||||
plconfig.vertex_shader = m_screen_quad_vertex_shader.get();
|
||||
plconfig.fragment_shader = fs.get();
|
||||
if (!m_use_rov_for_shader_blend)
|
||||
{
|
||||
plconfig.SetTargetFormats(VRAM_RT_FORMAT, depth_buffer_format);
|
||||
plconfig.render_pass_flags =
|
||||
needs_feedback_loop ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags;
|
||||
plconfig.blend.write_mask = 0;
|
||||
plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState();
|
||||
}
|
||||
else
|
||||
{
|
||||
plconfig.SetTargetFormats(depth_buffer_format);
|
||||
}
|
||||
|
||||
if (!(m_clear_depth_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!CompileResolutionDependentPipelines(error) || !CompileDownsamplePipelines(error))
|
||||
|
@ -1949,7 +1975,7 @@ void GPU_HW::UpdateDepthBufferFromMaskBit()
|
|||
SetScissor();
|
||||
}
|
||||
|
||||
void GPU_HW::CopyAndClearDepthBuffer()
|
||||
void GPU_HW::CopyAndClearDepthBuffer(bool only_drawing_area)
|
||||
{
|
||||
if (!m_depth_was_copied)
|
||||
{
|
||||
|
@ -1976,17 +2002,39 @@ void GPU_HW::CopyAndClearDepthBuffer()
|
|||
m_depth_was_copied = true;
|
||||
}
|
||||
|
||||
ClearDepthBuffer();
|
||||
ClearDepthBuffer(only_drawing_area);
|
||||
}
|
||||
|
||||
void GPU_HW::ClearDepthBuffer()
|
||||
void GPU_HW::ClearDepthBuffer(bool only_drawing_area)
|
||||
{
|
||||
GL_SCOPE("GPU_HW::ClearDepthBuffer()");
|
||||
GL_SCOPE_FMT("GPU_HW::ClearDepthBuffer({})", only_drawing_area ? "Only Drawing Area" : "Full Buffer");
|
||||
DebugAssert(m_pgxp_depth_buffer);
|
||||
if (only_drawing_area)
|
||||
{
|
||||
g_gpu_device->SetPipeline(m_clear_depth_pipeline.get());
|
||||
|
||||
const GSVector4i clear_bounds = m_clamped_drawing_area.mul32l(GSVector4i(m_resolution_scale));
|
||||
|
||||
// need to re-bind for rov, because we can't turn colour writes off for only the first target
|
||||
if (!m_use_rov_for_shader_blend)
|
||||
{
|
||||
DrawScreenQuad(clear_bounds, m_vram_depth_texture->GetSizeVec());
|
||||
}
|
||||
else
|
||||
{
|
||||
g_gpu_device->SetRenderTarget(m_vram_depth_texture.get());
|
||||
DrawScreenQuad(clear_bounds, m_vram_depth_texture->GetSizeVec());
|
||||
SetVRAMRenderTarget();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_use_rov_for_shader_blend)
|
||||
g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF);
|
||||
else
|
||||
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f);
|
||||
}
|
||||
|
||||
m_last_depth_z = 1.0f;
|
||||
s_counters.num_depth_buffer_clears++;
|
||||
}
|
||||
|
@ -2405,8 +2453,12 @@ void GPU_HW::CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVer
|
|||
|
||||
if ((average_z - m_last_depth_z) >= g_gpu_settings.gpu_pgxp_depth_clear_threshold)
|
||||
{
|
||||
GL_INS_FMT("Clear depth buffer avg={} last={} threshold={}", average_z * static_cast<float>(GTE::MAX_Z),
|
||||
m_last_depth_z * static_cast<float>(GTE::MAX_Z),
|
||||
g_gpu_settings.gpu_pgxp_depth_clear_threshold * static_cast<float>(GTE::MAX_Z));
|
||||
|
||||
FlushRender();
|
||||
CopyAndClearDepthBuffer();
|
||||
CopyAndClearDepthBuffer(true);
|
||||
EnsureVertexBufferSpaceForCommand(cmd);
|
||||
}
|
||||
|
||||
|
@ -3723,7 +3775,7 @@ void GPU_HW::PrepareDraw(const GPUBackendDrawCommand* cmd)
|
|||
if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f)
|
||||
{
|
||||
FlushRender();
|
||||
CopyAndClearDepthBuffer();
|
||||
CopyAndClearDepthBuffer(false);
|
||||
EnsureVertexBufferSpaceForCommand(cmd);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -186,8 +186,8 @@ private:
|
|||
|
||||
void UpdateVRAMReadTexture(bool drawn, bool written);
|
||||
void UpdateDepthBufferFromMaskBit();
|
||||
void CopyAndClearDepthBuffer();
|
||||
void ClearDepthBuffer();
|
||||
void CopyAndClearDepthBuffer(bool only_drawing_area);
|
||||
void ClearDepthBuffer(bool only_drawing_area);
|
||||
void SetScissor();
|
||||
void SetVRAMRenderTarget();
|
||||
void DeactivateROV();
|
||||
|
@ -365,6 +365,7 @@ private:
|
|||
std::unique_ptr<GPUTexture> m_vram_extract_texture;
|
||||
std::unique_ptr<GPUTexture> m_vram_extract_depth_texture;
|
||||
std::unique_ptr<GPUPipeline> m_copy_depth_pipeline;
|
||||
std::unique_ptr<GPUPipeline> m_clear_depth_pipeline;
|
||||
std::unique_ptr<PostProcessing::Chain> m_internal_postfx;
|
||||
|
||||
std::unique_ptr<GPUTexture> m_downsample_texture;
|
||||
|
|
|
@ -1694,6 +1694,24 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader(bool msaa) c
|
|||
return std::move(ss).str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateVRAMClearDepthFragmentShader(bool write_depth_as_rt) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
DefineMacro(ss, "WRITE_DEPTH_AS_RT", write_depth_as_rt);
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, BoolToUInt32(write_depth_as_rt), false, false, false, false, false);
|
||||
|
||||
ss << R"(
|
||||
{
|
||||
#if WRITE_DEPTH_AS_RT
|
||||
o_col0 = float4(1.0f, 0.0f, 0.0f, 0.0f);
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return std::move(ss).str();
|
||||
}
|
||||
|
||||
void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss) const
|
||||
{
|
||||
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_pixel_size", "float u_lod"}, true);
|
||||
|
|
|
@ -34,6 +34,7 @@ public:
|
|||
std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced, bool write_mask_as_depth,
|
||||
bool write_depth_as_rt) const;
|
||||
std::string GenerateVRAMUpdateDepthFragmentShader(bool msaa) const;
|
||||
std::string GenerateVRAMClearDepthFragmentShader(bool write_depth_as_rt) const;
|
||||
std::string GenerateVRAMExtractFragmentShader(u32 resolution_scale, u32 multisamples, bool color_24bit,
|
||||
bool depth_buffer) const;
|
||||
std::string GenerateVRAMReplacementBlitFragmentShader() const;
|
||||
|
|
Loading…
Reference in New Issue