diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 745f0f088..f01fa0d2d 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1563,49 +1563,39 @@ bool GPU_HW::CompilePipelines(Error* error) GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateAdaptiveDownsampleVertexShader(), error); std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true), error); + shadergen.GenerateAdaptiveDownsampleMipFragmentShader(), error); if (!vs || !fs) return false; GL_OBJECT_NAME(fs, "Downsample Vertex Shader"); - GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); + GL_OBJECT_NAME(fs, "Downsample Fragment Shader"); plconfig.vertex_shader = vs.get(); plconfig.fragment_shader = fs.get(); - if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) return false; - GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); - - fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false), error); - if (!fs) - return false; - GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader"); - plconfig.fragment_shader = fs.get(); - if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) - return false; - GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline"); + GL_OBJECT_NAME(m_downsample_pass_pipeline, "Downsample First Pass Pipeline"); fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateAdaptiveDownsampleBlurFragmentShader(), error); if (!fs) return false; - GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader"); + GL_OBJECT_NAME(fs, "Downsample Blur Fragment Shader"); plconfig.fragment_shader = fs.get(); plconfig.SetTargetFormats(GPUTexture::Format::R8); - if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + if (!(m_downsample_blur_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) return false; - GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline"); + GL_OBJECT_NAME(m_downsample_blur_pipeline, "Downsample Blur Pass Pipeline"); fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader(), error); if (!fs) return false; - GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader"); + GL_OBJECT_NAME(fs, "Downsample Composite Fragment Shader"); plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; plconfig.fragment_shader = fs.get(); plconfig.SetTargetFormats(VRAM_RT_FORMAT); - if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + if (!(m_downsample_composite_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) return false; - GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline"); + GL_OBJECT_NAME(m_downsample_composite_pipeline, "Downsample Blur Pass Pipeline"); GPUSampler::Config config = GPUSampler::GetLinearConfig(); config.min_lod = 0; @@ -1638,10 +1628,10 @@ bool GPU_HW::CompilePipelines(Error* error) GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); plconfig.fragment_shader = fs.get(); - if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) + if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) return false; - GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); + GL_OBJECT_NAME(m_downsample_pass_pipeline, "Downsample First Pass Pipeline"); progress.Increment(); } @@ -1674,10 +1664,10 @@ void GPU_HW::DestroyPipelines() destroy(m_vram_update_depth_pipeline); destroy(m_vram_write_replacement_pipeline); - destroy(m_downsample_first_pass_pipeline); - destroy(m_downsample_mid_pass_pipeline); - destroy(m_downsample_blur_pass_pipeline); - destroy(m_downsample_composite_pass_pipeline); + destroy(m_downsample_pass_pipeline); + destroy(m_downsample_blur_pipeline); + destroy(m_downsample_composite_pipeline); + m_downsample_lod_sampler.reset(); m_downsample_composite_sampler.reset(); m_copy_depth_pipeline.reset(); @@ -2772,8 +2762,8 @@ void GPU_HW::LoadVertices() const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); - const GSVector4i rect = - GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) { @@ -3874,15 +3864,14 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top uniforms.min_uv[1] = 0.0f; uniforms.max_uv[0] = static_cast(level_width) * rcp_width; uniforms.max_uv[1] = static_cast(level_height) * rcp_height; - uniforms.rcp_size[0] = rcp_width; - uniforms.rcp_size[1] = rcp_height; + uniforms.rcp_size[0] = rcp_width * 0.25f; + uniforms.rcp_size[1] = rcp_height * 0.25f; uniforms.lod = static_cast(level - 1); g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, level_width, level_height)); - g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : - m_downsample_mid_pass_pipeline.get()); + g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, level, m_downsample_texture.get(), 0, 0, 0, 0, @@ -3911,7 +3900,7 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top g_gpu_device->SetRenderTarget(weight_texture.get()); g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, last_width, last_height)); - g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); + g_gpu_device->SetPipeline(m_downsample_blur_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); weight_texture->MakeReadyForSampling(); @@ -3925,13 +3914,14 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top uniforms.min_uv[1] = 0.0f; uniforms.max_uv[0] = 1.0f; uniforms.max_uv[1] = 1.0f; + uniforms.lod = static_cast(level_texture->GetLevels() - 1); g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_composite_sampler.get()); g_gpu_device->SetTextureSampler(1, weight_texture.get(), m_downsample_lod_sampler.get()); g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, width, height)); - g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get()); + g_gpu_device->SetPipeline(m_downsample_composite_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); m_downsample_texture->MakeReadyForSampling(); @@ -3971,7 +3961,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); g_gpu_device->SetRenderTarget(m_downsample_texture.get()); - g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); + g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get()); g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height); g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 8ee60c317..b2fd939a8 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -323,10 +323,9 @@ private: std::unique_ptr m_copy_depth_pipeline; std::unique_ptr m_downsample_texture; - std::unique_ptr m_downsample_first_pass_pipeline; - std::unique_ptr m_downsample_mid_pass_pipeline; - std::unique_ptr m_downsample_blur_pass_pipeline; - std::unique_ptr m_downsample_composite_pass_pipeline; + std::unique_ptr m_downsample_pass_pipeline; + std::unique_ptr m_downsample_blur_pipeline; + std::unique_ptr m_downsample_composite_pipeline; std::unique_ptr m_downsample_lod_sampler; std::unique_ptr m_downsample_composite_sampler; u32 m_downsample_scale_or_levels = 0; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 7d031efae..1c4ca21a4 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1550,7 +1550,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader() void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss) { - DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float u_lod"}, true); + DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_pixel_size", "float u_lod"}, true); } std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader() @@ -1572,58 +1572,34 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader() return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass) +std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader() { std::stringstream ss; WriteHeader(ss); WriteAdaptiveDownsampleUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); - DefineMacro(ss, "FIRST_PASS", first_pass); - - // mipmap_energy.glsl ported from parallel-rsx. - ss << R"( - -float4 get_bias(float3 c00, float3 c01, float3 c10, float3 c11) -{ - // Measure the "energy" (variance) in the pixels. - // If the pixels are all the same (2D content), use maximum bias, otherwise, taper off quickly back to 0 (edges) - float3 avg = 0.25 * (c00 + c01 + c10 + c11); - float s00 = dot(c00 - avg, c00 - avg); - float s01 = dot(c01 - avg, c01 - avg); - float s10 = dot(c10 - avg, c10 - avg); - float s11 = dot(c11 - avg, c11 - avg); - return float4(avg, 1.0 - log2(1000.0 * (s00 + s01 + s10 + s11) + 1.0)); -} - -float4 get_bias(float4 c00, float4 c01, float4 c10, float4 c11) -{ - // Measure the "energy" (variance) in the pixels. - // If the pixels are all the same (2D content), use maximum bias, otherwise, taper off quickly back to 0 (edges) - float avg = 0.25 * (c00.a + c01.a + c10.a + c11.a); - float4 bias = get_bias(c00.rgb, c01.rgb, c10.rgb, c11.rgb); - bias.a *= avg; - return bias; -} - -)"; - DeclareFragmentEntryPoint(ss, 0, 1); ss << R"( { - float2 uv = v_tex0 - (u_rcp_resolution * 0.25); -#ifdef FIRST_PASS - vec3 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)).rgb; - vec3 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)).rgb; - vec3 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)).rgb; - vec3 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)).rgb; - o_col0 = get_bias(c00, c01, c10, c11); -#else - vec4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)); - vec4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)); - vec4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)); - vec4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)); - o_col0 = get_bias(c00, c01, c10, c11); -#endif + // Gather 4 samples for bilinear filtering. + float2 uv = v_tex0 - u_pixel_size; // * 0.25 done on CPU + float4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)); + float4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)); + float4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)); + float4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)); + float3 cavg = (c00.rgb + c01.rgb + c10.rgb + c11.rgb) * 0.25; + + // Compute variance between pixels with logarithmic scaling to aggressively reduce along the edges. + float variance = + 1.0 - log2(1000.0 * (dot(c00.rgb - cavg.rgb, c00.rgb - cavg.rgb) + dot(c01.rgb - cavg, c01.rgb - cavg) + + dot(c10.rgb - cavg.rgb, c10.rgb - cavg.rgb) + dot(c11.rgb - cavg, c11.rgb - cavg)) + + 1.0); + + // Write variance to the alpha channel, weighted by the previous LOD's variance. + // There's no variance in the first LOD. + float aavg = (c00.a + c01.a + c10.a + c11.a) * 0.25; + o_col0.rgb = cavg.rgb; + o_col0.a = variance * ((u_lod == 0.0) ? 1.0 : aavg); } )"; @@ -1637,26 +1613,30 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader() WriteColorConversionFunctions(ss); WriteAdaptiveDownsampleUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); - - // mipmap_blur.glsl ported from parallel-rsx. DeclareFragmentEntryPoint(ss, 0, 1); ss << R"( { - float bias = 0.0; - const float w0 = 0.25; - const float w1 = 0.125; - const float w2 = 0.0625; -#define UV(x, y) clamp((v_tex0 + float2(x, y) * u_rcp_resolution), u_uv_min, u_uv_max) - bias += w2 * SAMPLE_TEXTURE(samp0, UV(-1.0, -1.0)).a; - bias += w2 * SAMPLE_TEXTURE(samp0, UV(+1.0, -1.0)).a; - bias += w2 * SAMPLE_TEXTURE(samp0, UV(-1.0, +1.0)).a; - bias += w2 * SAMPLE_TEXTURE(samp0, UV(+1.0, +1.0)).a; - bias += w1 * SAMPLE_TEXTURE(samp0, UV( 0.0, -1.0)).a; - bias += w1 * SAMPLE_TEXTURE(samp0, UV(-1.0, 0.0)).a; - bias += w1 * SAMPLE_TEXTURE(samp0, UV(+1.0, 0.0)).a; - bias += w1 * SAMPLE_TEXTURE(samp0, UV( 0.0, +1.0)).a; - bias += w0 * SAMPLE_TEXTURE(samp0, UV( 0.0, 0.0)).a; - o_col0 = float4(bias, bias, bias, bias); + // Bog standard blur kernel unrolled for speed: + // [ 0.0625, 0.125, 0.0625 + // 0.125, 0.25, 0.125 + // 0.0625, 0.125, 0.0625 ] + // + // Can't use offset for sampling here, because we need to clamp, and the source texture is larger. + // +#define KERNEL_SAMPLE(weight, xoff, yoff) \ + (weight) * SAMPLE_TEXTURE_LEVEL( \ + samp0, clamp((v_tex0 + float2(float(xoff), float(yoff)) * u_pixel_size), u_uv_min, u_uv_max), 0.0) \ + .a + float blur = KERNEL_SAMPLE(0.0625, -1, -1); + blur += KERNEL_SAMPLE(0.0625, 1, -1); + blur += KERNEL_SAMPLE(0.0625, -1, 1); + blur += KERNEL_SAMPLE(0.0625, 1, 1); + blur += KERNEL_SAMPLE(0.125, 0, -1); + blur += KERNEL_SAMPLE(0.125, -1, 0); + blur += KERNEL_SAMPLE(0.125, 1, 0); + blur += KERNEL_SAMPLE(0.125, 0, 1); + blur += KERNEL_SAMPLE(0.25, 0, 0); + o_col0 = float4(blur, blur, blur, blur); } )"; @@ -1667,17 +1647,14 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleCompositeFragmentShader( { std::stringstream ss; WriteHeader(ss); + WriteAdaptiveDownsampleUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp1", 1, false); - - // mipmap_resolve.glsl ported from parallel-rsx. DeclareFragmentEntryPoint(ss, 0, 1, {}, true); ss << R"( { - float bias = SAMPLE_TEXTURE(samp1, v_tex0).r; - float mip = float(RESOLUTION_SCALE - 1u) * bias; - float3 color = SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, mip).rgb; - o_col0 = float4(color, 1.0); + // Sample the mip level determined by the weight texture. samp0 is trilinear, so it will blend between levels. + o_col0 = float4(SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, SAMPLE_TEXTURE(samp1, v_tex0).r * u_lod).rgb, 1.0); } )"; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 1e708fff3..69b390494 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -31,7 +31,7 @@ public: std::string GenerateVRAMExtractFragmentShader(bool color_24bit, bool depth_buffer); std::string GenerateAdaptiveDownsampleVertexShader(); - std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); + std::string GenerateAdaptiveDownsampleMipFragmentShader(); std::string GenerateAdaptiveDownsampleBlurFragmentShader(); std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor);