From d02f5d401e43ae3084799cad16c4e73d0f87be3b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 20 Sep 2023 20:47:42 +1000 Subject: [PATCH] GPU/HW: Add framebuffer fetch blending path --- src/core/gpu.cpp | 3 +- src/core/gpu_hw.cpp | 98 ++++++++++++++++++------- src/core/gpu_hw.h | 9 ++- src/core/gpu_hw_shadergen.cpp | 67 ++++++++++++----- src/core/gpu_hw_shadergen.h | 7 +- src/core/gpu_shadergen.cpp | 7 +- src/core/gpu_shadergen.h | 2 +- src/util/d3d11_device.cpp | 1 + src/util/d3d12_device.cpp | 1 + src/util/gpu_device.cpp | 2 +- src/util/gpu_device.h | 1 + src/util/metal_device.mm | 1 + src/util/opengl_device.cpp | 4 +- src/util/postprocessing_shader_glsl.cpp | 10 ++- src/util/shadergen.cpp | 48 ++++++++++-- src/util/shadergen.h | 8 +- src/util/vulkan_device.cpp | 1 + 17 files changed, 199 insertions(+), 71 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 44ec76160..f5809ecfc 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1570,7 +1570,8 @@ void GPU::SetTextureWindow(u32 value) bool GPU::CompileDisplayPipeline() { - GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend); + GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); GPUPipeline::GraphicsConfig plconfig; plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 821164be2..a6dff378b 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -105,9 +105,9 @@ public: } ~ShaderCompileProgressTracker() = default; - void Increment() + void Increment(u32 progress = 1) { - m_progress++; + m_progress += progress; const u64 tv = Common::Timer::GetCurrentValue(); if ((tv - m_start_time) >= m_min_time && (tv - m_last_update_time) >= m_update_interval) @@ -209,6 +209,7 @@ bool GPU_HW::Initialize() m_resolution_scale = CalculateResolutionScale(); m_multisamples = std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples()); m_supports_dual_source_blend = features.dual_source_blend; + m_supports_framebuffer_fetch = features.framebuffer_fetch; m_per_sample_shading = g_settings.gpu_per_sample_shading && features.per_sample_shading; m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; @@ -727,16 +728,17 @@ bool GPU_HW::CompilePipelines() const GPUDevice::Features features = g_gpu_device->GetFeatures(); GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); + m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend, + m_supports_framebuffer_fetch); - ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + - (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 5 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + + 2 + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); // vertex shaders - [textured] // fragment shaders - [render_mode][texture_mode][dithering][interlacing] static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; DimensionalArray, 2> batch_vertex_shaders{}; - DimensionalArray, 2, 2, 9, 4> batch_fragment_shaders{}; + DimensionalArray, 2, 2, 9, 5, 4> batch_fragment_shaders{}; ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { batch_vertex_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader); @@ -753,23 +755,47 @@ bool GPU_HW::CompilePipelines() for (u8 render_mode = 0; render_mode < 4; render_mode++) { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { - for (u8 dithering = 0; dithering < 2; dithering++) + if (m_supports_framebuffer_fetch) { - for (u8 interlacing = 0; interlacing < 2; interlacing++) + // Don't need multipass shaders. + if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && + render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) { - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); + progress.Increment(2 * 2 * 9); + continue; + } + } + else + { + // Can't generate shader blending. + if (transparency_mode != static_cast(GPUTransparencyMode::Disabled)) + { + progress.Increment(2 * 2 * 9); + continue; + } + } - if (!(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) { - return false; - } + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(transparency_mode), + static_cast(texture_mode), ConvertToBoolUnchecked(dithering), + ConvertToBoolUnchecked(interlacing)); - progress.Increment(); + if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][dithering][interlacing] = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + { + return false; + } + + progress.Increment(); + } } } } @@ -807,6 +833,17 @@ bool GPU_HW::CompilePipelines() { for (u8 render_mode = 0; render_mode < 4; render_mode++) { + if (m_supports_framebuffer_fetch) + { + // Don't need multipass shaders. + if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && + render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) + { + progress.Increment(2 * 2 * 9 * 5); + continue; + } + } + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) @@ -819,6 +856,11 @@ bool GPU_HW::CompilePipelines() GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual, GPUPipeline::DepthFunc::LessEqual}; const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); + const bool use_shader_blending = + (textured && m_supports_framebuffer_fetch && + (transparency_mode == static_cast(GPUTransparencyMode::BackgroundMinusForeground) || + (!m_supports_dual_source_blend && + transparency_mode != static_cast(GPUTransparencyMode::Disabled)))); plconfig.input_layout.vertex_attributes = textured ? @@ -830,16 +872,21 @@ bool GPU_HW::CompilePipelines() plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); plconfig.fragment_shader = - batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing].get(); + batch_fragment_shaders[render_mode] + [use_shader_blending ? transparency_mode : + static_cast(GPUTransparencyMode::Disabled)] + [texture_mode][dithering][interlacing] + .get(); plconfig.depth.depth_test = depth_test_values[depth_test]; plconfig.depth.depth_write = !m_pgxp_depth_buffer || depth_test != 0; plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && - (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || - m_texture_filtering != GPUTextureFilter::Nearest) + if (!use_shader_blending && + ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + m_texture_filtering != GPUTextureFilter::Nearest)) { plconfig.blend.enable = true; plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; @@ -2041,8 +2088,7 @@ ALWAYS_INLINE bool GPU_HW::NeedsTwoPassRendering() const // We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled // on a per-pixel basis, and the opaque pixels shouldn't be blended at all. - // TODO: see if there's a better way we can do this. definitely can with fbfetch. - return (m_batch.texture_mode != GPUTextureMode::Disabled && + return (m_batch.texture_mode != GPUTextureMode::Disabled && !m_supports_framebuffer_fetch && (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground || (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); } @@ -2469,7 +2515,9 @@ void GPU_HW::DispatchRenderCommand() EnsureVertexBufferSpaceForCurrentCommand(); // transparency mode change - if (m_batch.transparency_mode != transparency_mode && transparency_mode != GPUTransparencyMode::Disabled) + if (m_batch.transparency_mode != transparency_mode && transparency_mode != GPUTransparencyMode::Disabled && + (!m_supports_framebuffer_fetch || + (transparency_mode != GPUTransparencyMode::BackgroundMinusForeground && !m_supports_dual_source_blend))) { static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index a2efd9a7f..0efce6d33 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -236,10 +236,11 @@ private: union { BitField m_supports_dual_source_blend; - BitField m_per_sample_shading; - BitField m_scaled_dithering; - BitField m_chroma_smoothing; - BitField m_disable_color_perspective; + BitField m_supports_framebuffer_fetch; + BitField m_per_sample_shading; + BitField m_scaled_dithering; + BitField m_chroma_smoothing; + BitField m_disable_color_perspective; u8 bits = 0; }; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index cb94eb431..ecbfa68c2 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -8,8 +8,9 @@ GPU_HW_ShaderGen::GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, bool pgxp_depth, - bool disable_color_perspective, bool supports_dual_source_blend) - : ShaderGen(render_api, supports_dual_source_blend), m_resolution_scale(resolution_scale), + bool disable_color_perspective, bool supports_dual_source_blend, + bool supports_framebuffer_fetch) + : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch), m_resolution_scale(resolution_scale), m_multisamples(multisamples), m_per_sample_shading(per_sample_shading), m_true_color(true_color), m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits), m_pgxp_depth(pgxp_depth), m_disable_color_perspective(disable_color_perspective) @@ -629,22 +630,27 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, } } -std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, - GPUTextureMode texture_mode, bool dithering, bool interlacing) +std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, + GPUTransparencyMode transparency, GPUTextureMode texture_mode, + bool dithering, bool interlacing) { const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit; const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit; const bool textured = (texture_mode != GPUTextureMode::Disabled); - const bool use_dual_source = - m_supports_dual_source_blend && ((transparency != GPU_HW::BatchRenderMode::TransparencyDisabled && - transparency != GPU_HW::BatchRenderMode::OnlyOpaque) || - m_texture_filter != GPUTextureFilter::Nearest); + const bool use_framebuffer_fetch = + (render_mode == GPU_HW::BatchRenderMode::TransparentAndOpaque && transparency != GPUTransparencyMode::Disabled); + const bool use_dual_source = !use_framebuffer_fetch && m_supports_dual_source_blend && + ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && + render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || + m_texture_filter != GPUTextureFilter::Nearest); std::stringstream ss; WriteHeader(ss); - DefineMacro(ss, "TRANSPARENCY", transparency != GPU_HW::BatchRenderMode::TransparencyDisabled); - DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == GPU_HW::BatchRenderMode::OnlyOpaque); - DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", transparency == GPU_HW::BatchRenderMode::OnlyTransparent); + DefineMacro(ss, "TRANSPARENCY", render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled); + DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque); + DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent); + DefineMacro(ss, "TRANSPARENCY_MODE", static_cast(transparency)); + DefineMacro(ss, "SHADER_BLENDING", use_framebuffer_fetch); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "PALETTE", actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit); @@ -771,19 +777,19 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), - false, m_disable_color_perspective); + false, m_disable_color_perspective, use_framebuffer_fetch); } else { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, - !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), false, - m_disable_color_perspective); + !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, + use_framebuffer_fetch); } } else { DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), - UsingPerSampleShading(), false, m_disable_color_perspective); + UsingPerSampleShading(), false, m_disable_color_perspective, use_framebuffer_fetch); } ss << R"( @@ -883,7 +889,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) // Premultiply alpha so we don't need to use a colour output for it. float premultiply_alpha = ialpha; - #if TRANSPARENCY + #if TRANSPARENCY && !SHADER_BLENDING premultiply_alpha = ialpha * (semitransparent ? u_src_alpha_factor : 1.0); #endif @@ -897,7 +903,34 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords) color = (float3(icolor) * premultiply_alpha) / float3(255.0, 255.0, 255.0); #endif - #if TRANSPARENCY && TEXTURED + #if SHADER_BLENDING + float4 bg_col = LAST_FRAG_COLOR; + float4 fg_col = float4(color, oalpha); + + #if TEXTURE_FILTERING + #if TRANSPARENCY_MODE == 0 || TRANSPARENCY_MODE == 3 + bg_col.rgb /= ialpha; + #endif + fg_col.rgb *= ialpha; + #endif + + o_col0.a = fg_col.a; + #if TRANSPARENCY_MODE == 0 // Half BG + Half FG. + o_col0.rgb = (bg_col.rgb * 0.5) + (fg_col.rgb * 0.5); + #elif TRANSPARENCY_MODE == 1 // BG + FG + o_col0.rgb = bg_col.rgb + fg_col.rgb; + #elif TRANSPARENCY_MODE == 2 // BG - FG + o_col0.rgb = bg_col.rgb - fg_col.rgb; + #elif TRANSPARENCY_MODE == 3 // BG + 1/4 FG. + o_col0.rgb = bg_col.rgb + (fg_col.rgb * 0.25); + #else + o_col0.rgb = fg_col.rgb; + #endif + #if TRANSPARENCY + // If pixel isn't marked as semitransparent, replace with previous colour. + o_col0 = semitransparent ? o_col0 : fg_col; + #endif + #elif TRANSPARENCY && TEXTURED // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored. if (semitransparent) { diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 96274dac3..cda16ce22 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -10,12 +10,13 @@ class GPU_HW_ShaderGen : public ShaderGen public: GPU_HW_ShaderGen(RenderAPI render_api, u32 resolution_scale, u32 multisamples, bool per_sample_shading, bool true_color, bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits, - bool pgxp_depth, bool disable_color_perspective, bool supports_dual_source_blend); + bool pgxp_depth, bool disable_color_perspective, bool supports_dual_source_blend, + bool supports_framebuffer_fetch); ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); - std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode, - bool dithering, bool interlacing); + std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, + GPUTextureMode texture_mode, bool dithering, bool interlacing); std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode, bool smooth_chroma); std::string GenerateWireframeGeometryShader(); diff --git a/src/core/gpu_shadergen.cpp b/src/core/gpu_shadergen.cpp index 138b26a38..76583063b 100644 --- a/src/core/gpu_shadergen.cpp +++ b/src/core/gpu_shadergen.cpp @@ -3,8 +3,8 @@ #include "gpu_shadergen.h" -GPUShaderGen::GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend) - : ShaderGen(render_api, supports_dual_source_blend) +GPUShaderGen::GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch) + : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch) { } @@ -62,7 +62,8 @@ std::string GPUShaderGen::GenerateDisplaySharpBilinearFragmentShader() WriteDisplayUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); - // Based on https://github.com/rsn8887/Sharp-Bilinear-Shaders/blob/master/Copy_To_RetroPie/shaders/sharp-bilinear-simple.glsl + // Based on + // https://github.com/rsn8887/Sharp-Bilinear-Shaders/blob/master/Copy_To_RetroPie/shaders/sharp-bilinear-simple.glsl DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, false, false, false, false); ss << R"( { diff --git a/src/core/gpu_shadergen.h b/src/core/gpu_shadergen.h index 2ccd13b7a..8a4d0cac7 100644 --- a/src/core/gpu_shadergen.h +++ b/src/core/gpu_shadergen.h @@ -8,7 +8,7 @@ class GPUShaderGen : public ShaderGen { public: - GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend); + GPUShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch); ~GPUShaderGen(); std::string GenerateDisplayVertexShader(); diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index c096b1643..c0c0e38e9 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -169,6 +169,7 @@ void D3D11Device::SetFeatures() } m_features.dual_source_blend = true; + m_features.framebuffer_fetch = false; m_features.per_sample_shading = (feature_level >= D3D_FEATURE_LEVEL_10_1); m_features.noperspective_interpolation = true; m_features.supports_texture_buffers = true; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 24c3203ad..e22d31ee3 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1188,6 +1188,7 @@ void D3D12Device::SetFeatures() } m_features.dual_source_blend = true; + m_features.framebuffer_fetch = false; m_features.noperspective_interpolation = true; m_features.per_sample_shading = true; m_features.supports_texture_buffers = true; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index ecb2ef401..666a48caa 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -392,7 +392,7 @@ bool GPUDevice::CreateResources() if (!(m_linear_sampler = CreateSampler(GPUSampler::GetLinearConfig()))) return false; - ShaderGen shadergen(GetRenderAPI(), m_features.dual_source_blend); + ShaderGen shadergen(GetRenderAPI(), m_features.dual_source_blend, m_features.framebuffer_fetch); std::unique_ptr imgui_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateImGuiVertexShader()); std::unique_ptr imgui_fs = CreateShader(GPUShaderStage::Fragment, shadergen.GenerateImGuiFragmentShader()); diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 55bf8e5df..4f388a121 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -441,6 +441,7 @@ public: struct Features { bool dual_source_blend : 1; + bool framebuffer_fetch : 1; bool per_sample_shading : 1; bool noperspective_interpolation : 1; bool supports_texture_buffers : 1; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 959d37da8..323ac6d5c 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -212,6 +212,7 @@ void MetalDevice::SetFeatures() } m_features.dual_source_blend = true; + m_features.framebuffer_fetch = false; // TODO m_features.per_sample_shading = true; m_features.noperspective_interpolation = true; m_features.supports_texture_buffers = true; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index dce4366e8..2acdce982 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -349,7 +349,7 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo) const bool is_gles = m_gl_context->IsGLES(); bool vendor_id_amd = false; - //bool vendor_id_nvidia = false; + // bool vendor_id_nvidia = false; bool vendor_id_intel = false; bool vendor_id_arm = false; bool vendor_id_qualcomm = false; @@ -412,6 +412,8 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo) (max_dual_source_draw_buffers > 0) && (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended || GLAD_GL_EXT_blend_func_extended); + m_features.framebuffer_fetch = (GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch); + #ifdef __APPLE__ // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver. m_features.supports_texture_buffers = false; diff --git a/src/util/postprocessing_shader_glsl.cpp b/src/util/postprocessing_shader_glsl.cpp index e713f1551..d85e804c3 100644 --- a/src/util/postprocessing_shader_glsl.cpp +++ b/src/util/postprocessing_shader_glsl.cpp @@ -18,7 +18,7 @@ namespace { class PostProcessingGLSLShaderGen : public ShaderGen { public: - PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend); + PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch); ~PostProcessingGLSLShaderGen(); std::string GeneratePostProcessingVertexShader(const PostProcessing::GLSLShader& shader); @@ -117,7 +117,8 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32 if (m_pipeline) m_pipeline.reset(); - PostProcessingGLSLShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend); + PostProcessingGLSLShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, + g_gpu_device->GetFeatures().framebuffer_fetch); std::unique_ptr vs = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GeneratePostProcessingVertexShader(*this)); @@ -319,8 +320,9 @@ void PostProcessing::GLSLShader::LoadOptions() } } -PostProcessingGLSLShaderGen::PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend) - : ShaderGen(render_api, supports_dual_source_blend) +PostProcessingGLSLShaderGen::PostProcessingGLSLShaderGen(RenderAPI render_api, bool supports_dual_source_blend, + bool supports_framebuffer_fetch) + : ShaderGen(render_api, supports_dual_source_blend, supports_framebuffer_fetch) { } diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index c1b80b241..50ab3e871 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #include "shadergen.h" @@ -13,10 +13,11 @@ Log_SetChannel(ShaderGen); -ShaderGen::ShaderGen(RenderAPI render_api, bool supports_dual_source_blend) +ShaderGen::ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch) : m_render_api(render_api), m_glsl(render_api != RenderAPI::D3D11 && render_api != RenderAPI::D3D12), m_spirv(render_api == RenderAPI::Vulkan || render_api == RenderAPI::Metal), - m_supports_dual_source_blend(supports_dual_source_blend), m_use_glsl_interface_blocks(false) + m_supports_dual_source_blend(supports_dual_source_blend), m_supports_framebuffer_fetch(supports_framebuffer_fetch), + m_use_glsl_interface_blocks(false) { #if defined(ENABLE_OPENGL) || defined(ENABLE_VULKAN) if (m_glsl) @@ -61,6 +62,11 @@ void ShaderGen::DefineMacro(std::stringstream& ss, const char* name, bool enable ss << "#define " << name << " " << BoolToUInt32(enabled) << "\n"; } +void ShaderGen::DefineMacro(std::stringstream& ss, const char* name, s32 value) +{ + ss << "#define " << name << " " << value << "\n"; +} + #ifdef ENABLE_OPENGL void ShaderGen::SetGLSLVersionString() { @@ -122,6 +128,8 @@ void ShaderGen::WriteHeader(std::stringstream& ss) ss << "#extension GL_EXT_blend_func_extended : require\n"; if (GLAD_GL_ARB_blend_func_extended) ss << "#extension GL_ARB_blend_func_extended : require\n"; + if (GLAD_GL_EXT_shader_framebuffer_fetch) + ss << "#extension GL_EXT_shader_framebuffer_fetch : require\n"; // Test for V3D driver - we have to fudge coordinates slightly. if (std::strstr(reinterpret_cast(glGetString(GL_VENDOR)), "Broadcom") && @@ -150,6 +158,11 @@ void ShaderGen::WriteHeader(std::stringstream& ss) // Enable SSBOs if it's not required by the version. if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_ES_VERSION_3_1 && GLAD_GL_ARB_shader_storage_buffer_object) ss << "#extension GL_ARB_shader_storage_buffer_object : require\n"; + + if (GLAD_GL_EXT_shader_framebuffer_fetch) + ss << "#extension GL_EXT_shader_framebuffer_fetch : require\n"; + else if (GLAD_GL_ARM_shader_framebuffer_fetch) + ss << "#extension GL_ARM_shader_framebuffer_fetch : require\n"; } #endif @@ -485,7 +498,7 @@ void ShaderGen::DeclareFragmentEntryPoint( const std::initializer_list>& additional_inputs, bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */, bool msaa /* = false */, bool ssaa /* = false */, bool declare_sample_id /* = false */, - bool noperspective_color /* = false */) + bool noperspective_color /* = false */, bool framebuffer_fetch /* = false */) { if (m_glsl) { @@ -539,24 +552,43 @@ void ShaderGen::DeclareFragmentEntryPoint( if (depth_output) ss << "#define o_depth gl_FragDepth\n"; + const char* target_0_qualifier = "out"; +#ifdef ENABLE_OPENGL + if ((m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) && m_supports_framebuffer_fetch && + framebuffer_fetch) + { + if (GLAD_GL_EXT_shader_framebuffer_fetch) + { + target_0_qualifier = "inout"; + ss << "#define LAST_FRAG_COLOR o_col0\n"; + } + else if (GLAD_GL_ARM_shader_framebuffer_fetch) + { + ss << "#define LAST_FRAG_COLOR gl_LastFragColorARM\n"; + } + } +#endif + if (m_use_glsl_binding_layout) { if (m_supports_dual_source_blend) { for (u32 i = 0; i < num_color_outputs; i++) - ss << "layout(location = 0, index = " << i << ") out float4 o_col" << i << ";\n"; + { + ss << "layout(location = 0, index = " << i << ") " << ((i == 0) ? target_0_qualifier : "out") + << " float4 o_col" << i << ";\n"; + } } else { Assert(num_color_outputs <= 1); - for (u32 i = 0; i < num_color_outputs; i++) - ss << "layout(location = " << i << ") out float4 o_col" << i << ";\n"; + ss << "layout(location = 0) " << target_0_qualifier << " float4 o_col0;\n"; } } else { for (u32 i = 0; i < num_color_outputs; i++) - ss << "out float4 o_col" << i << ";\n"; + ss << ((i == 0) ? target_0_qualifier : "out") << " float4 o_col" << i << ";\n"; } ss << "\n"; diff --git a/src/util/shadergen.h b/src/util/shadergen.h index 9d67a17e9..8cd4f119f 100644 --- a/src/util/shadergen.h +++ b/src/util/shadergen.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -11,7 +11,7 @@ class ShaderGen { public: - ShaderGen(RenderAPI render_api, bool supports_dual_source_blend); + ShaderGen(RenderAPI render_api, bool supports_dual_source_blend, bool supports_framebuffer_fetch); ~ShaderGen(); static bool UseGLSLBindingLayout(); @@ -36,6 +36,7 @@ protected: #endif void DefineMacro(std::stringstream& ss, const char* name, bool enabled); + void DefineMacro(std::stringstream& ss, const char* name, s32 value); void WriteHeader(std::stringstream& ss); void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan); void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members, @@ -51,12 +52,13 @@ protected: const std::initializer_list>& additional_inputs, bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false, bool msaa = false, bool ssaa = false, bool declare_sample_id = false, - bool noperspective_color = false); + bool noperspective_color = false, bool framebuffer_fetch = false); RenderAPI m_render_api; bool m_glsl; bool m_spirv; bool m_supports_dual_source_blend; + bool m_supports_framebuffer_fetch; bool m_use_glsl_interface_blocks; bool m_use_glsl_binding_layout; bool m_has_uniform_buffer = false; diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 93db7d479..2c7f23b40 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2196,6 +2196,7 @@ bool VulkanDevice::CheckFeatures() m_max_multisamples = 1; m_features.dual_source_blend = m_device_features.dualSrcBlend; // TODO: Option to disable + m_features.framebuffer_fetch = false; // TODO: Option to disable if (!m_features.dual_source_blend) Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance.");