GPU/HW: Fix adaptive downsampling

Also rewrite shaders to improve blurring around edges of 3D objects
(e.g. FF7).

As a trade-off, the background does blur slightly less, but (imo)
it looks better overall, since you'll notice the foreground being
blurred much more than the background.
This commit is contained in:
Stenzek 2024-09-26 12:37:49 +10:00
parent 5ed96fcfe4
commit fd8f97f4d3
No known key found for this signature in database
4 changed files with 74 additions and 108 deletions

View File

@ -1563,49 +1563,39 @@ bool GPU_HW::CompilePipelines(Error* error)
GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateAdaptiveDownsampleVertexShader(), error);
std::unique_ptr<GPUShader> fs =
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true), error);
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(), error);
if (!vs || !fs)
return false;
GL_OBJECT_NAME(fs, "Downsample Vertex Shader");
GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader");
GL_OBJECT_NAME(fs, "Downsample Fragment Shader");
plconfig.vertex_shader = vs.get();
plconfig.fragment_shader = fs.get();
if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader");
plconfig.fragment_shader = fs.get();
if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline");
GL_OBJECT_NAME(m_downsample_pass_pipeline, "Downsample First Pass Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleBlurFragmentShader(), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader");
GL_OBJECT_NAME(fs, "Downsample Blur Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.SetTargetFormats(GPUTexture::Format::R8);
if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
if (!(m_downsample_blur_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline");
GL_OBJECT_NAME(m_downsample_blur_pipeline, "Downsample Blur Pass Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader(), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader");
GL_OBJECT_NAME(fs, "Downsample Composite Fragment Shader");
plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
plconfig.fragment_shader = fs.get();
plconfig.SetTargetFormats(VRAM_RT_FORMAT);
if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
if (!(m_downsample_composite_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline");
GL_OBJECT_NAME(m_downsample_composite_pipeline, "Downsample Blur Pass Pipeline");
GPUSampler::Config config = GPUSampler::GetLinearConfig();
config.min_lod = 0;
@ -1638,10 +1628,10 @@ bool GPU_HW::CompilePipelines(Error* error)
GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader");
plconfig.fragment_shader = fs.get();
if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline");
GL_OBJECT_NAME(m_downsample_pass_pipeline, "Downsample First Pass Pipeline");
progress.Increment();
}
@ -1674,10 +1664,10 @@ void GPU_HW::DestroyPipelines()
destroy(m_vram_update_depth_pipeline);
destroy(m_vram_write_replacement_pipeline);
destroy(m_downsample_first_pass_pipeline);
destroy(m_downsample_mid_pass_pipeline);
destroy(m_downsample_blur_pass_pipeline);
destroy(m_downsample_composite_pass_pipeline);
destroy(m_downsample_pass_pipeline);
destroy(m_downsample_blur_pipeline);
destroy(m_downsample_composite_pipeline);
m_downsample_lod_sampler.reset();
m_downsample_composite_sampler.reset();
m_copy_depth_pipeline.reset();
@ -2772,8 +2762,8 @@ void GPU_HW::LoadVertices()
const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]};
const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y);
const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos);
const GSVector4i rect =
GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos))
.add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
{
@ -3874,15 +3864,14 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = static_cast<float>(level_width) * rcp_width;
uniforms.max_uv[1] = static_cast<float>(level_height) * rcp_height;
uniforms.rcp_size[0] = rcp_width;
uniforms.rcp_size[1] = rcp_height;
uniforms.rcp_size[0] = rcp_width * 0.25f;
uniforms.rcp_size[1] = rcp_height * 0.25f;
uniforms.lod = static_cast<float>(level - 1);
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, level_width, level_height));
g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() :
m_downsample_mid_pass_pipeline.get());
g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, level, m_downsample_texture.get(), 0, 0, 0, 0,
@ -3911,7 +3900,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
g_gpu_device->SetRenderTarget(weight_texture.get());
g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, last_width, last_height));
g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get());
g_gpu_device->SetPipeline(m_downsample_blur_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
weight_texture->MakeReadyForSampling();
@ -3925,13 +3914,14 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = 1.0f;
uniforms.max_uv[1] = 1.0f;
uniforms.lod = static_cast<float>(level_texture->GetLevels() - 1);
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_composite_sampler.get());
g_gpu_device->SetTextureSampler(1, weight_texture.get(), m_downsample_lod_sampler.get());
g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, width, height));
g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get());
g_gpu_device->SetPipeline(m_downsample_composite_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
m_downsample_texture->MakeReadyForSampling();
@ -3971,7 +3961,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get());
g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get());
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height);
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));

View File

@ -323,10 +323,9 @@ private:
std::unique_ptr<GPUPipeline> m_copy_depth_pipeline;
std::unique_ptr<GPUTexture> m_downsample_texture;
std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_composite_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_blur_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_composite_pipeline;
std::unique_ptr<GPUSampler> m_downsample_lod_sampler;
std::unique_ptr<GPUSampler> m_downsample_composite_sampler;
u32 m_downsample_scale_or_levels = 0;

View File

@ -1550,7 +1550,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss)
{
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float u_lod"}, true);
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_pixel_size", "float u_lod"}, true);
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader()
@ -1572,58 +1572,34 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader()
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass)
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader()
{
std::stringstream ss;
WriteHeader(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false);
DefineMacro(ss, "FIRST_PASS", first_pass);
// mipmap_energy.glsl ported from parallel-rsx.
ss << R"(
float4 get_bias(float3 c00, float3 c01, float3 c10, float3 c11)
{
// Measure the "energy" (variance) in the pixels.
// If the pixels are all the same (2D content), use maximum bias, otherwise, taper off quickly back to 0 (edges)
float3 avg = 0.25 * (c00 + c01 + c10 + c11);
float s00 = dot(c00 - avg, c00 - avg);
float s01 = dot(c01 - avg, c01 - avg);
float s10 = dot(c10 - avg, c10 - avg);
float s11 = dot(c11 - avg, c11 - avg);
return float4(avg, 1.0 - log2(1000.0 * (s00 + s01 + s10 + s11) + 1.0));
}
float4 get_bias(float4 c00, float4 c01, float4 c10, float4 c11)
{
// Measure the "energy" (variance) in the pixels.
// If the pixels are all the same (2D content), use maximum bias, otherwise, taper off quickly back to 0 (edges)
float avg = 0.25 * (c00.a + c01.a + c10.a + c11.a);
float4 bias = get_bias(c00.rgb, c01.rgb, c10.rgb, c11.rgb);
bias.a *= avg;
return bias;
}
)";
DeclareFragmentEntryPoint(ss, 0, 1);
ss << R"(
{
float2 uv = v_tex0 - (u_rcp_resolution * 0.25);
#ifdef FIRST_PASS
vec3 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)).rgb;
vec3 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)).rgb;
vec3 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)).rgb;
vec3 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)).rgb;
o_col0 = get_bias(c00, c01, c10, c11);
#else
vec4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0));
vec4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1));
vec4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0));
vec4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1));
o_col0 = get_bias(c00, c01, c10, c11);
#endif
// Gather 4 samples for bilinear filtering.
float2 uv = v_tex0 - u_pixel_size; // * 0.25 done on CPU
float4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0));
float4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1));
float4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0));
float4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1));
float3 cavg = (c00.rgb + c01.rgb + c10.rgb + c11.rgb) * 0.25;
// Compute variance between pixels with logarithmic scaling to aggressively reduce along the edges.
float variance =
1.0 - log2(1000.0 * (dot(c00.rgb - cavg.rgb, c00.rgb - cavg.rgb) + dot(c01.rgb - cavg, c01.rgb - cavg) +
dot(c10.rgb - cavg.rgb, c10.rgb - cavg.rgb) + dot(c11.rgb - cavg, c11.rgb - cavg)) +
1.0);
// Write variance to the alpha channel, weighted by the previous LOD's variance.
// There's no variance in the first LOD.
float aavg = (c00.a + c01.a + c10.a + c11.a) * 0.25;
o_col0.rgb = cavg.rgb;
o_col0.a = variance * ((u_lod == 0.0) ? 1.0 : aavg);
}
)";
@ -1637,26 +1613,30 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader()
WriteColorConversionFunctions(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false);
// mipmap_blur.glsl ported from parallel-rsx.
DeclareFragmentEntryPoint(ss, 0, 1);
ss << R"(
{
float bias = 0.0;
const float w0 = 0.25;
const float w1 = 0.125;
const float w2 = 0.0625;
#define UV(x, y) clamp((v_tex0 + float2(x, y) * u_rcp_resolution), u_uv_min, u_uv_max)
bias += w2 * SAMPLE_TEXTURE(samp0, UV(-1.0, -1.0)).a;
bias += w2 * SAMPLE_TEXTURE(samp0, UV(+1.0, -1.0)).a;
bias += w2 * SAMPLE_TEXTURE(samp0, UV(-1.0, +1.0)).a;
bias += w2 * SAMPLE_TEXTURE(samp0, UV(+1.0, +1.0)).a;
bias += w1 * SAMPLE_TEXTURE(samp0, UV( 0.0, -1.0)).a;
bias += w1 * SAMPLE_TEXTURE(samp0, UV(-1.0, 0.0)).a;
bias += w1 * SAMPLE_TEXTURE(samp0, UV(+1.0, 0.0)).a;
bias += w1 * SAMPLE_TEXTURE(samp0, UV( 0.0, +1.0)).a;
bias += w0 * SAMPLE_TEXTURE(samp0, UV( 0.0, 0.0)).a;
o_col0 = float4(bias, bias, bias, bias);
// Bog standard blur kernel unrolled for speed:
// [ 0.0625, 0.125, 0.0625
// 0.125, 0.25, 0.125
// 0.0625, 0.125, 0.0625 ]
//
// Can't use offset for sampling here, because we need to clamp, and the source texture is larger.
//
#define KERNEL_SAMPLE(weight, xoff, yoff) \
(weight) * SAMPLE_TEXTURE_LEVEL( \
samp0, clamp((v_tex0 + float2(float(xoff), float(yoff)) * u_pixel_size), u_uv_min, u_uv_max), 0.0) \
.a
float blur = KERNEL_SAMPLE(0.0625, -1, -1);
blur += KERNEL_SAMPLE(0.0625, 1, -1);
blur += KERNEL_SAMPLE(0.0625, -1, 1);
blur += KERNEL_SAMPLE(0.0625, 1, 1);
blur += KERNEL_SAMPLE(0.125, 0, -1);
blur += KERNEL_SAMPLE(0.125, -1, 0);
blur += KERNEL_SAMPLE(0.125, 1, 0);
blur += KERNEL_SAMPLE(0.125, 0, 1);
blur += KERNEL_SAMPLE(0.25, 0, 0);
o_col0 = float4(blur, blur, blur, blur);
}
)";
@ -1667,17 +1647,14 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleCompositeFragmentShader(
{
std::stringstream ss;
WriteHeader(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false);
DeclareTexture(ss, "samp1", 1, false);
// mipmap_resolve.glsl ported from parallel-rsx.
DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
ss << R"(
{
float bias = SAMPLE_TEXTURE(samp1, v_tex0).r;
float mip = float(RESOLUTION_SCALE - 1u) * bias;
float3 color = SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, mip).rgb;
o_col0 = float4(color, 1.0);
// Sample the mip level determined by the weight texture. samp0 is trilinear, so it will blend between levels.
o_col0 = float4(SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, SAMPLE_TEXTURE(samp1, v_tex0).r * u_lod).rgb, 1.0);
}
)";

View File

@ -31,7 +31,7 @@ public:
std::string GenerateVRAMExtractFragmentShader(bool color_24bit, bool depth_buffer);
std::string GenerateAdaptiveDownsampleVertexShader();
std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass);
std::string GenerateAdaptiveDownsampleMipFragmentShader();
std::string GenerateAdaptiveDownsampleBlurFragmentShader();
std::string GenerateAdaptiveDownsampleCompositeFragmentShader();
std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor);