GPU/HW: Fix adaptive downsampling

Also rewrite shaders to improve blurring around edges of 3D objects
(e.g. FF7).

As a trade-off, the background does blur slightly less, but (imo)
it looks better overall, since you'll notice the foreground being
blurred much more than the background.
This commit is contained in:
Stenzek 2024-09-26 12:37:49 +10:00
parent 5ed96fcfe4
commit fd8f97f4d3
No known key found for this signature in database
4 changed files with 74 additions and 108 deletions

View File

@ -1563,49 +1563,39 @@ bool GPU_HW::CompilePipelines(Error* error)
GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateAdaptiveDownsampleVertexShader(), error); GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateAdaptiveDownsampleVertexShader(), error);
std::unique_ptr<GPUShader> fs = std::unique_ptr<GPUShader> fs =
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true), error); shadergen.GenerateAdaptiveDownsampleMipFragmentShader(), error);
if (!vs || !fs) if (!vs || !fs)
return false; return false;
GL_OBJECT_NAME(fs, "Downsample Vertex Shader"); GL_OBJECT_NAME(fs, "Downsample Vertex Shader");
GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); GL_OBJECT_NAME(fs, "Downsample Fragment Shader");
plconfig.vertex_shader = vs.get(); plconfig.vertex_shader = vs.get();
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();
if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false; return false;
GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); GL_OBJECT_NAME(m_downsample_pass_pipeline, "Downsample First Pass Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader");
plconfig.fragment_shader = fs.get();
if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleBlurFragmentShader(), error); shadergen.GenerateAdaptiveDownsampleBlurFragmentShader(), error);
if (!fs) if (!fs)
return false; return false;
GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader"); GL_OBJECT_NAME(fs, "Downsample Blur Fragment Shader");
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();
plconfig.SetTargetFormats(GPUTexture::Format::R8); plconfig.SetTargetFormats(GPUTexture::Format::R8);
if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) if (!(m_downsample_blur_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false; return false;
GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline"); GL_OBJECT_NAME(m_downsample_blur_pipeline, "Downsample Blur Pass Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader(), error); shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader(), error);
if (!fs) if (!fs)
return false; return false;
GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader"); GL_OBJECT_NAME(fs, "Downsample Composite Fragment Shader");
plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();
plconfig.SetTargetFormats(VRAM_RT_FORMAT); plconfig.SetTargetFormats(VRAM_RT_FORMAT);
if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) if (!(m_downsample_composite_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false; return false;
GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline"); GL_OBJECT_NAME(m_downsample_composite_pipeline, "Downsample Blur Pass Pipeline");
GPUSampler::Config config = GPUSampler::GetLinearConfig(); GPUSampler::Config config = GPUSampler::GetLinearConfig();
config.min_lod = 0; config.min_lod = 0;
@ -1638,10 +1628,10 @@ bool GPU_HW::CompilePipelines(Error* error)
GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader");
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();
if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false; return false;
GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); GL_OBJECT_NAME(m_downsample_pass_pipeline, "Downsample First Pass Pipeline");
progress.Increment(); progress.Increment();
} }
@ -1674,10 +1664,10 @@ void GPU_HW::DestroyPipelines()
destroy(m_vram_update_depth_pipeline); destroy(m_vram_update_depth_pipeline);
destroy(m_vram_write_replacement_pipeline); destroy(m_vram_write_replacement_pipeline);
destroy(m_downsample_first_pass_pipeline); destroy(m_downsample_pass_pipeline);
destroy(m_downsample_mid_pass_pipeline); destroy(m_downsample_blur_pipeline);
destroy(m_downsample_blur_pass_pipeline); destroy(m_downsample_composite_pipeline);
destroy(m_downsample_composite_pass_pipeline); m_downsample_lod_sampler.reset();
m_downsample_composite_sampler.reset(); m_downsample_composite_sampler.reset();
m_copy_depth_pipeline.reset(); m_copy_depth_pipeline.reset();
@ -2772,8 +2762,8 @@ void GPU_HW::LoadVertices()
const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]};
const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y);
const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos);
const GSVector4i rect = const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos))
GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); .add32(GSVector4i::cxpr(0, 0, 1, 1));
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
{ {
@ -3874,15 +3864,14 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
uniforms.min_uv[1] = 0.0f; uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = static_cast<float>(level_width) * rcp_width; uniforms.max_uv[0] = static_cast<float>(level_width) * rcp_width;
uniforms.max_uv[1] = static_cast<float>(level_height) * rcp_height; uniforms.max_uv[1] = static_cast<float>(level_height) * rcp_height;
uniforms.rcp_size[0] = rcp_width; uniforms.rcp_size[0] = rcp_width * 0.25f;
uniforms.rcp_size[1] = rcp_height; uniforms.rcp_size[1] = rcp_height * 0.25f;
uniforms.lod = static_cast<float>(level - 1); uniforms.lod = static_cast<float>(level - 1);
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, level_width, level_height)); g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, level_width, level_height));
g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get());
m_downsample_mid_pass_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0); g_gpu_device->Draw(3, 0);
g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, level, m_downsample_texture.get(), 0, 0, 0, 0, g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, level, m_downsample_texture.get(), 0, 0, 0, 0,
@ -3911,7 +3900,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
g_gpu_device->SetRenderTarget(weight_texture.get()); g_gpu_device->SetRenderTarget(weight_texture.get());
g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, last_width, last_height)); g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, last_width, last_height));
g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); g_gpu_device->SetPipeline(m_downsample_blur_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0); g_gpu_device->Draw(3, 0);
weight_texture->MakeReadyForSampling(); weight_texture->MakeReadyForSampling();
@ -3925,13 +3914,14 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
uniforms.min_uv[1] = 0.0f; uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = 1.0f; uniforms.max_uv[0] = 1.0f;
uniforms.max_uv[1] = 1.0f; uniforms.max_uv[1] = 1.0f;
uniforms.lod = static_cast<float>(level_texture->GetLevels() - 1);
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_composite_sampler.get()); g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_composite_sampler.get());
g_gpu_device->SetTextureSampler(1, weight_texture.get(), m_downsample_lod_sampler.get()); g_gpu_device->SetTextureSampler(1, weight_texture.get(), m_downsample_lod_sampler.get());
g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, width, height)); g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, width, height));
g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get()); g_gpu_device->SetPipeline(m_downsample_composite_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0); g_gpu_device->Draw(3, 0);
m_downsample_texture->MakeReadyForSampling(); m_downsample_texture->MakeReadyForSampling();
@ -3971,7 +3961,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get());
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height); g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height);
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));

View File

@ -323,10 +323,9 @@ private:
std::unique_ptr<GPUPipeline> m_copy_depth_pipeline; std::unique_ptr<GPUPipeline> m_copy_depth_pipeline;
std::unique_ptr<GPUTexture> m_downsample_texture; std::unique_ptr<GPUTexture> m_downsample_texture;
std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline; std::unique_ptr<GPUPipeline> m_downsample_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline; std::unique_ptr<GPUPipeline> m_downsample_blur_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline; std::unique_ptr<GPUPipeline> m_downsample_composite_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_composite_pass_pipeline;
std::unique_ptr<GPUSampler> m_downsample_lod_sampler; std::unique_ptr<GPUSampler> m_downsample_lod_sampler;
std::unique_ptr<GPUSampler> m_downsample_composite_sampler; std::unique_ptr<GPUSampler> m_downsample_composite_sampler;
u32 m_downsample_scale_or_levels = 0; u32 m_downsample_scale_or_levels = 0;

View File

@ -1550,7 +1550,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss) void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss)
{ {
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float u_lod"}, true); DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_pixel_size", "float u_lod"}, true);
} }
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader() std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader()
@ -1572,58 +1572,34 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader()
return ss.str(); return ss.str();
} }
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass) std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteAdaptiveDownsampleUniformBuffer(ss); WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
DefineMacro(ss, "FIRST_PASS", first_pass);
// mipmap_energy.glsl ported from parallel-rsx.
ss << R"(
float4 get_bias(float3 c00, float3 c01, float3 c10, float3 c11)
{
// Measure the "energy" (variance) in the pixels.
// If the pixels are all the same (2D content), use maximum bias, otherwise, taper off quickly back to 0 (edges)
float3 avg = 0.25 * (c00 + c01 + c10 + c11);
float s00 = dot(c00 - avg, c00 - avg);
float s01 = dot(c01 - avg, c01 - avg);
float s10 = dot(c10 - avg, c10 - avg);
float s11 = dot(c11 - avg, c11 - avg);
return float4(avg, 1.0 - log2(1000.0 * (s00 + s01 + s10 + s11) + 1.0));
}
float4 get_bias(float4 c00, float4 c01, float4 c10, float4 c11)
{
// Measure the "energy" (variance) in the pixels.
// If the pixels are all the same (2D content), use maximum bias, otherwise, taper off quickly back to 0 (edges)
float avg = 0.25 * (c00.a + c01.a + c10.a + c11.a);
float4 bias = get_bias(c00.rgb, c01.rgb, c10.rgb, c11.rgb);
bias.a *= avg;
return bias;
}
)";
DeclareFragmentEntryPoint(ss, 0, 1); DeclareFragmentEntryPoint(ss, 0, 1);
ss << R"( ss << R"(
{ {
float2 uv = v_tex0 - (u_rcp_resolution * 0.25); // Gather 4 samples for bilinear filtering.
#ifdef FIRST_PASS float2 uv = v_tex0 - u_pixel_size; // * 0.25 done on CPU
vec3 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)).rgb; float4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0));
vec3 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)).rgb; float4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1));
vec3 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)).rgb; float4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0));
vec3 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)).rgb; float4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1));
o_col0 = get_bias(c00, c01, c10, c11); float3 cavg = (c00.rgb + c01.rgb + c10.rgb + c11.rgb) * 0.25;
#else
vec4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)); // Compute variance between pixels with logarithmic scaling to aggressively reduce along the edges.
vec4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)); float variance =
vec4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)); 1.0 - log2(1000.0 * (dot(c00.rgb - cavg.rgb, c00.rgb - cavg.rgb) + dot(c01.rgb - cavg, c01.rgb - cavg) +
vec4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)); dot(c10.rgb - cavg.rgb, c10.rgb - cavg.rgb) + dot(c11.rgb - cavg, c11.rgb - cavg)) +
o_col0 = get_bias(c00, c01, c10, c11); 1.0);
#endif
// Write variance to the alpha channel, weighted by the previous LOD's variance.
// There's no variance in the first LOD.
float aavg = (c00.a + c01.a + c10.a + c11.a) * 0.25;
o_col0.rgb = cavg.rgb;
o_col0.a = variance * ((u_lod == 0.0) ? 1.0 : aavg);
} }
)"; )";
@ -1637,26 +1613,30 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader()
WriteColorConversionFunctions(ss); WriteColorConversionFunctions(ss);
WriteAdaptiveDownsampleUniformBuffer(ss); WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
// mipmap_blur.glsl ported from parallel-rsx.
DeclareFragmentEntryPoint(ss, 0, 1); DeclareFragmentEntryPoint(ss, 0, 1);
ss << R"( ss << R"(
{ {
float bias = 0.0; // Bog standard blur kernel unrolled for speed:
const float w0 = 0.25; // [ 0.0625, 0.125, 0.0625
const float w1 = 0.125; // 0.125, 0.25, 0.125
const float w2 = 0.0625; // 0.0625, 0.125, 0.0625 ]
#define UV(x, y) clamp((v_tex0 + float2(x, y) * u_rcp_resolution), u_uv_min, u_uv_max) //
bias += w2 * SAMPLE_TEXTURE(samp0, UV(-1.0, -1.0)).a; // Can't use offset for sampling here, because we need to clamp, and the source texture is larger.
bias += w2 * SAMPLE_TEXTURE(samp0, UV(+1.0, -1.0)).a; //
bias += w2 * SAMPLE_TEXTURE(samp0, UV(-1.0, +1.0)).a; #define KERNEL_SAMPLE(weight, xoff, yoff) \
bias += w2 * SAMPLE_TEXTURE(samp0, UV(+1.0, +1.0)).a; (weight) * SAMPLE_TEXTURE_LEVEL( \
bias += w1 * SAMPLE_TEXTURE(samp0, UV( 0.0, -1.0)).a; samp0, clamp((v_tex0 + float2(float(xoff), float(yoff)) * u_pixel_size), u_uv_min, u_uv_max), 0.0) \
bias += w1 * SAMPLE_TEXTURE(samp0, UV(-1.0, 0.0)).a; .a
bias += w1 * SAMPLE_TEXTURE(samp0, UV(+1.0, 0.0)).a; float blur = KERNEL_SAMPLE(0.0625, -1, -1);
bias += w1 * SAMPLE_TEXTURE(samp0, UV( 0.0, +1.0)).a; blur += KERNEL_SAMPLE(0.0625, 1, -1);
bias += w0 * SAMPLE_TEXTURE(samp0, UV( 0.0, 0.0)).a; blur += KERNEL_SAMPLE(0.0625, -1, 1);
o_col0 = float4(bias, bias, bias, bias); blur += KERNEL_SAMPLE(0.0625, 1, 1);
blur += KERNEL_SAMPLE(0.125, 0, -1);
blur += KERNEL_SAMPLE(0.125, -1, 0);
blur += KERNEL_SAMPLE(0.125, 1, 0);
blur += KERNEL_SAMPLE(0.125, 0, 1);
blur += KERNEL_SAMPLE(0.25, 0, 0);
o_col0 = float4(blur, blur, blur, blur);
} }
)"; )";
@ -1667,17 +1647,14 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleCompositeFragmentShader(
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
DeclareTexture(ss, "samp1", 1, false); DeclareTexture(ss, "samp1", 1, false);
// mipmap_resolve.glsl ported from parallel-rsx.
DeclareFragmentEntryPoint(ss, 0, 1, {}, true); DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
ss << R"( ss << R"(
{ {
float bias = SAMPLE_TEXTURE(samp1, v_tex0).r; // Sample the mip level determined by the weight texture. samp0 is trilinear, so it will blend between levels.
float mip = float(RESOLUTION_SCALE - 1u) * bias; o_col0 = float4(SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, SAMPLE_TEXTURE(samp1, v_tex0).r * u_lod).rgb, 1.0);
float3 color = SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, mip).rgb;
o_col0 = float4(color, 1.0);
} }
)"; )";

View File

@ -31,7 +31,7 @@ public:
std::string GenerateVRAMExtractFragmentShader(bool color_24bit, bool depth_buffer); std::string GenerateVRAMExtractFragmentShader(bool color_24bit, bool depth_buffer);
std::string GenerateAdaptiveDownsampleVertexShader(); std::string GenerateAdaptiveDownsampleVertexShader();
std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); std::string GenerateAdaptiveDownsampleMipFragmentShader();
std::string GenerateAdaptiveDownsampleBlurFragmentShader(); std::string GenerateAdaptiveDownsampleBlurFragmentShader();
std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); std::string GenerateAdaptiveDownsampleCompositeFragmentShader();
std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor); std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor);