From 65669b32500f14dbf0302ead8891c7f4047e836d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 8 Mar 2024 17:55:02 +1000 Subject: [PATCH] GPUDevice: Add support for feedback loops --- src/core/gpu.cpp | 10 +- src/core/gpu.h | 1 + src/core/gpu_hw.cpp | 3 +- src/util/d3d11_device.cpp | 9 +- src/util/d3d11_device.h | 4 +- src/util/d3d12_device.cpp | 10 +- src/util/d3d12_device.h | 4 +- src/util/gpu_device.cpp | 5 +- src/util/gpu_device.h | 37 ++- src/util/metal_device.mm | 1 + src/util/opengl_device.cpp | 15 +- src/util/opengl_device.h | 4 +- src/util/postprocessing_shader_fx.cpp | 1 + src/util/postprocessing_shader_glsl.cpp | 1 + src/util/shadergen.cpp | 35 ++- src/util/shadergen.h | 2 +- src/util/vulkan_builders.cpp | 16 ++ src/util/vulkan_builders.h | 4 + src/util/vulkan_device.cpp | 356 ++++++++++++++++-------- src/util/vulkan_device.h | 37 ++- src/util/vulkan_pipeline.cpp | 35 ++- src/util/vulkan_pipeline.h | 5 +- src/util/vulkan_texture.cpp | 22 +- 23 files changed, 426 insertions(+), 191 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 7ba776a0e..c08a116d1 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -1615,10 +1615,11 @@ bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_sm plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.geometry_shader = nullptr; plconfig.depth_format = GPUTexture::Format::Unknown; plconfig.samples = 1; plconfig.per_sample_shading = false; - plconfig.geometry_shader = nullptr; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; if (display) { @@ -2707,10 +2708,10 @@ void GPU::GetStatsString(SmallStringBase& str) { if (IsHardwareRenderer()) { - str.format("{} HW | {} P | {} DC | {} RP | {} RB | {} C | {} W", + str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, - m_stats.host_num_draws, m_stats.host_num_render_passes, m_stats.num_reads, m_stats.num_copies, - m_stats.num_writes); + m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes, m_stats.num_reads, + m_stats.num_copies, m_stats.num_writes); } else { @@ -2753,6 +2754,7 @@ void GPU::UpdateStatistics(u32 frame_count) UPDATE_GPU_STAT(buffer_streamed); UPDATE_GPU_STAT(num_draws); + UPDATE_GPU_STAT(num_barriers); UPDATE_GPU_STAT(num_render_passes); UPDATE_GPU_STAT(num_copies); UPDATE_GPU_STAT(num_downloads); diff --git a/src/core/gpu.h b/src/core/gpu.h index a4f516a5c..bbfe4e6eb 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -625,6 +625,7 @@ protected: { size_t host_buffer_streamed; u32 host_num_draws; + u32 host_num_barriers; u32 host_num_render_passes; u32 host_num_copies; u32 host_num_downloads; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index e0e55d79c..0aebe2e96 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -819,10 +819,11 @@ bool GPU_HW::CompilePipelines() plconfig.input_layout.vertex_stride = sizeof(BatchVertex); plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.geometry_shader = nullptr; plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT); plconfig.samples = m_multisamples; plconfig.per_sample_shading = m_per_sample_shading; - plconfig.geometry_shader = nullptr; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] for (u8 depth_test = 0; depth_test < 3; depth_test++) diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index b78119af5..ffeedec0b 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -184,6 +184,7 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) m_features.texture_copy_to_self = false; m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.texture_buffers_emulated_with_ssbo = false; + m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.partial_msaa_resolve = false; m_features.memory_import = false; @@ -909,9 +910,10 @@ void D3D11Device::UnmapUniformBuffer(u32 size) m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); } -void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) +void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, GPUPipeline::RenderPassFlag feedback_loop) { ID3D11RenderTargetView* rtvs[MAX_RENDER_TARGETS]; + DebugAssert(!feedback_loop); bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); m_current_depth_target = static_cast(ds); @@ -1057,3 +1059,8 @@ void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) s_stats.num_draws++; m_context->DrawIndexed(index_count, base_index, base_vertex); } + +void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) +{ + Panic("Barriers are not supported"); +} \ No newline at end of file diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index 466d957d0..8a3a9cb8d 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -84,7 +84,8 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -92,6 +93,7 @@ public: void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; bool GetHostRefreshRate(float* refresh_rate) override; diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 76637f77c..2ffc1276c 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1190,6 +1190,7 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features) /*!(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF)*/ false; // TODO: Support with Enhanced Barriers m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.texture_buffers_emulated_with_ssbo = false; + m_features.feedback_loops = false; m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.partial_msaa_resolve = true; m_features.memory_import = false; @@ -1548,8 +1549,10 @@ void D3D12Device::DestroyRootSignatures() it->Reset(); } -void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) +void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop) { + DebugAssert(!feedback_loop); if (InRenderPass()) EndRenderPass(); @@ -2140,3 +2143,8 @@ void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) s_stats.num_draws++; GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0); } + +void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) +{ + Panic("Barriers are not supported"); +} diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index e3776fb64..0d6fc7652 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -107,7 +107,8 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -115,6 +116,7 @@ public: void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 187610214..7443af9f7 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -492,6 +492,7 @@ bool GPUDevice::CreateResources() plconfig.SetTargetFormats(HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8); plconfig.samples = 1; plconfig.per_sample_shading = false; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; plconfig.vertex_shader = imgui_vs.get(); plconfig.geometry_shader = nullptr; plconfig.fragment_shader = imgui_fs.get(); @@ -615,9 +616,9 @@ void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size) UnmapUniformBuffer(data_size); } -void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds /*= nullptr*/) +void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds, GPUPipeline::RenderPassFlag render_pass_flags) { - SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds); + SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds, render_pass_flags); } void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index d77eb6a53..6fd3e9ccf 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -133,6 +133,13 @@ public: MaxCount }; + enum RenderPassFlag : u8 + { + NoRenderPassFlags = 0, + ColorFeedbackLoop = (1 << 0), + SampleDepthBuffer = (1 << 1), + }; + enum class Primitive : u8 { Points, @@ -369,8 +376,9 @@ public: GPUTexture::Format color_formats[4]; GPUTexture::Format depth_format; - u32 samples; + u8 samples; bool per_sample_shading; + RenderPassFlag render_pass_flags; void SetTargetFormats(GPUTexture::Format color_format, GPUTexture::Format depth_format_ = GPUTexture::Format::Unknown); @@ -425,11 +433,19 @@ public: enum FeatureMask : u32 { FEATURE_MASK_DUAL_SOURCE_BLEND = (1 << 0), - FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 1), - FEATURE_MASK_TEXTURE_BUFFERS = (1 << 2), - FEATURE_MASK_GEOMETRY_SHADERS = (1 << 3), - FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 4), - FEATURE_MASK_MEMORY_IMPORT = (1 << 5), + FEATURE_MASK_FEEDBACK_LOOPS = (1 << 1), + FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2), + FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3), + FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4), + FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5), + FEATURE_MASK_MEMORY_IMPORT = (1 << 6), + }; + + enum class DrawBarrier : u32 + { + None, + One, + Full }; struct Features @@ -441,6 +457,7 @@ public: bool texture_copy_to_self : 1; bool supports_texture_buffers : 1; bool texture_buffers_emulated_with_ssbo : 1; + bool feedback_loops : 1; bool geometry_shaders : 1; bool partial_msaa_resolve : 1; bool memory_import : 1; @@ -454,6 +471,7 @@ public: { size_t buffer_streamed; u32 num_draws; + u32 num_barriers; u32 num_render_passes; u32 num_copies; u32 num_downloads; @@ -616,18 +634,21 @@ public: void UploadUniformBuffer(const void* data, u32 data_size); /// Drawing setup abstraction. - virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) = 0; + virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags) = 0; virtual void SetPipeline(GPUPipeline* pipeline) = 0; virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0; virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0; virtual void SetViewport(s32 x, s32 y, s32 width, s32 height) = 0; // TODO: Rectangle virtual void SetScissor(s32 x, s32 y, s32 width, s32 height) = 0; - void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr); + void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr, + GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags); void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height); // Drawing abstraction. virtual void Draw(u32 vertex_count, u32 base_vertex) = 0; virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0; + virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0; /// Returns false if the window was completely occluded. virtual bool BeginPresent(bool skip_present) = 0; diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 2039dbe24..9c28066e2 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -235,6 +235,7 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features) m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.texture_buffers_emulated_with_ssbo = true; + m_features.feedback_loops = false; m_features.geometry_shaders = false; m_features.partial_msaa_resolve = false; m_features.memory_import = true; diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 18b612c8e..313386a1d 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -403,8 +403,9 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && (max_dual_source_draw_buffers > 0) && (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended || GLAD_GL_EXT_blend_func_extended); - m_features.framebuffer_fetch = !(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && - (GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch); + m_features.framebuffer_fetch = + !(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) && + (GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch); #ifdef __APPLE__ // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver. @@ -469,6 +470,8 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) // So, blit from the shadow texture, like in the other renderers. m_features.texture_copy_to_self = !vendor_id_arm && !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); + m_features.feedback_loops = m_features.framebuffer_fetch; + m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2); @@ -1035,6 +1038,11 @@ void OpenGLDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) glDrawElements(m_current_pipeline->GetTopology(), index_count, GL_UNSIGNED_SHORT, indices); } +void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) +{ + Panic("Barriers are not supported"); +} + void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex) { @@ -1088,8 +1096,9 @@ void OpenGLDevice::UnmapUniformBuffer(u32 size) glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), pos, size); } -void OpenGLDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) +void OpenGLDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, GPUPipeline::RenderPassFlag feedback_loop) { + //DebugAssert(!feedback_loop); TODO bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_rt_clear = false; diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index df67e8c4c..de56f7fa0 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -89,7 +89,8 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -97,6 +98,7 @@ public: void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; void SetSyncMode(DisplaySyncMode mode) override; diff --git a/src/util/postprocessing_shader_fx.cpp b/src/util/postprocessing_shader_fx.cpp index 9569a1ae0..4d43521a5 100644 --- a/src/util/postprocessing_shader_fx.cpp +++ b/src/util/postprocessing_shader_fx.cpp @@ -1222,6 +1222,7 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format, plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.samples = 1; plconfig.per_sample_shading = false; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; progress->PushState(); diff --git a/src/util/postprocessing_shader_glsl.cpp b/src/util/postprocessing_shader_glsl.cpp index a3f90c7af..0fa20fcb8 100644 --- a/src/util/postprocessing_shader_glsl.cpp +++ b/src/util/postprocessing_shader_glsl.cpp @@ -136,6 +136,7 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32 plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.samples = 1; plconfig.per_sample_shading = false; + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; plconfig.vertex_shader = vs.get(); plconfig.fragment_shader = fs.get(); plconfig.geometry_shader = nullptr; diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index 777ff5471..407c28911 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -505,7 +505,7 @@ void ShaderGen::DeclareFragmentEntryPoint( const std::initializer_list>& additional_inputs, bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */, bool msaa /* = false */, bool ssaa /* = false */, bool declare_sample_id /* = false */, - bool noperspective_color /* = false */, bool framebuffer_fetch /* = false */) + bool noperspective_color /* = false */, bool feedback_loop /* = false */) { if (m_glsl) { @@ -560,21 +560,32 @@ void ShaderGen::DeclareFragmentEntryPoint( ss << "#define o_depth gl_FragDepth\n"; const char* target_0_qualifier = "out"; -#ifdef ENABLE_OPENGL - if ((m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) && m_supports_framebuffer_fetch && - framebuffer_fetch) + + if (feedback_loop) { - if (GLAD_GL_EXT_shader_framebuffer_fetch) +#ifdef ENABLE_OPENGL + if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) { - target_0_qualifier = "inout"; - ss << "#define LAST_FRAG_COLOR o_col0\n"; + Assert(m_supports_framebuffer_fetch); + if (GLAD_GL_EXT_shader_framebuffer_fetch) + { + target_0_qualifier = "inout"; + ss << "#define LAST_FRAG_COLOR o_col0\n"; + } + else if (GLAD_GL_ARM_shader_framebuffer_fetch) + { + ss << "#define LAST_FRAG_COLOR gl_LastFragColorARM\n"; + } } - else if (GLAD_GL_ARM_shader_framebuffer_fetch) - { - ss << "#define LAST_FRAG_COLOR gl_LastFragColorARM\n"; - } - } #endif +#ifdef ENABLE_VULKAN + if (m_render_api == RenderAPI::Vulkan) + { + ss << "layout(input_attachment_index = 0, set = 2, binding = 0) uniform subpassInput u_input_rt;\n"; + ss << "#define LAST_FRAG_COLOR subpassLoad(u_input_rt)\n"; + } +#endif + } if (m_use_glsl_binding_layout) { diff --git a/src/util/shadergen.h b/src/util/shadergen.h index 0e88b0c6f..0108d0c26 100644 --- a/src/util/shadergen.h +++ b/src/util/shadergen.h @@ -53,7 +53,7 @@ protected: const std::initializer_list>& additional_inputs, bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false, bool msaa = false, bool ssaa = false, bool declare_sample_id = false, - bool noperspective_color = false, bool framebuffer_fetch = false); + bool noperspective_color = false, bool feedback_loop = false); RenderAPI m_render_api; bool m_glsl; diff --git a/src/util/vulkan_builders.cpp b/src/util/vulkan_builders.cpp index 92fb6f97f..825a717f0 100644 --- a/src/util/vulkan_builders.cpp +++ b/src/util/vulkan_builders.cpp @@ -267,6 +267,9 @@ void Vulkan::GraphicsPipelineBuilder::Clear() m_rendering = {}; m_rendering.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR; + m_rendering_input_attachment_locations = {}; + m_rendering_input_attachment_locations.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR; + // set defaults SetNoCullRasterizationState(); SetNoDepthTestState(); @@ -595,6 +598,19 @@ void Vulkan::GraphicsPipelineBuilder::SetDynamicRenderingDepthAttachment(VkForma m_rendering.stencilAttachmentFormat = stencil_format; } +void Vulkan::GraphicsPipelineBuilder::AddDynamicRenderingInputAttachment(u32 color_attachment_index) +{ + AddPointerToChain(&m_ci, &m_rendering_input_attachment_locations); + + DebugAssert(color_attachment_index < m_rendering.colorAttachmentCount); + DebugAssert(m_rendering_input_attachment_locations.colorAttachmentCount < MAX_INPUT_ATTACHMENTS); + + m_rendering_input_attachment_locations.pColorAttachmentLocations = m_rendering_input_attachment_indices.data(); + m_rendering_input_attachment_indices[m_rendering_input_attachment_locations.colorAttachmentCount] = + color_attachment_index; + m_rendering_input_attachment_locations.colorAttachmentCount++; +} + Vulkan::ComputePipelineBuilder::ComputePipelineBuilder() { Clear(); diff --git a/src/util/vulkan_builders.h b/src/util/vulkan_builders.h index a7ddf21b7..fbcf7fd83 100644 --- a/src/util/vulkan_builders.h +++ b/src/util/vulkan_builders.h @@ -81,6 +81,7 @@ public: MAX_VERTEX_ATTRIBUTES = 16, MAX_VERTEX_BUFFERS = 8, MAX_ATTACHMENTS = GPUDevice::MAX_RENDER_TARGETS + 1, + MAX_INPUT_ATTACHMENTS = 1, MAX_DYNAMIC_STATE = 8 }; @@ -144,6 +145,7 @@ public: void SetDynamicRendering(); void AddDynamicRenderingColorAttachment(VkFormat format); void SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format); + void AddDynamicRenderingInputAttachment(u32 color_attachment_index); private: VkGraphicsPipelineCreateInfo m_ci; @@ -174,7 +176,9 @@ private: VkPipelineRasterizationLineStateCreateInfoEXT m_line_rasterization_state; VkPipelineRenderingCreateInfoKHR m_rendering; + VkRenderingAttachmentLocationInfoKHR m_rendering_input_attachment_locations; std::array m_rendering_color_formats; + std::array m_rendering_input_attachment_indices; }; class ComputePipelineBuilder diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 13abf3bed..68c345ecd 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -53,6 +53,7 @@ enum : u32 { MAX_DRAW_CALLS_PER_FRAME = 2048, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = GPUDevice::MAX_TEXTURE_SAMPLERS * MAX_DRAW_CALLS_PER_FRAME, + MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, MAX_SAMPLER_DESCRIPTORS = 8192, @@ -380,8 +381,6 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en m_optional_extensions.vk_ext_rasterization_order_attachment_access = SupportsExtension(VK_EXT_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false) || SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false); - m_optional_extensions.vk_ext_attachment_feedback_loop_layout = - SupportsExtension(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME, false); m_optional_extensions.vk_khr_get_memory_requirements2 = SupportsExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, false); m_optional_extensions.vk_khr_bind_memory2 = SupportsExtension(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, false); @@ -392,6 +391,9 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false) && SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) && SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false); + m_optional_extensions.vk_khr_dynamic_rendering_local_read = + m_optional_extensions.vk_khr_dynamic_rendering && + SupportsExtension(VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME, false); m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false); m_optional_extensions.vk_ext_external_memory_host = SupportsExtension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, false); @@ -538,17 +540,19 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_TRUE, VK_FALSE, VK_FALSE}; - VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_TRUE}; VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE}; + VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_TRUE}; if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature); - if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout) - Vulkan::AddPointerToChain(&device_info, &attachment_feedback_loop_feature); if (m_optional_extensions.vk_khr_dynamic_rendering) + { Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature); + if (m_optional_extensions.vk_khr_dynamic_rendering_local_read) + Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_local_read_feature); + } VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); if (res != VK_SUCCESS) @@ -586,18 +590,20 @@ void VulkanDevice::ProcessDeviceExtensions() VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE, VK_FALSE}; - VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = { - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_FALSE}; VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_FALSE}; + VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_FALSE}; // add in optional feature structs if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature); - if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout) - Vulkan::AddPointerToChain(&features2, &attachment_feedback_loop_feature); if (m_optional_extensions.vk_khr_dynamic_rendering) + { Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature); + if (m_optional_extensions.vk_khr_dynamic_rendering_local_read) + Vulkan::AddPointerToChain(&features2, &dynamic_rendering_local_read_feature); + } // we might not have VK_KHR_get_physical_device_properties2... if (!vkGetPhysicalDeviceFeatures2 || !vkGetPhysicalDeviceProperties2 || !vkGetPhysicalDeviceMemoryProperties2) @@ -627,9 +633,9 @@ void VulkanDevice::ProcessDeviceExtensions() // confirm we actually support it m_optional_extensions.vk_ext_rasterization_order_attachment_access &= (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE); - m_optional_extensions.vk_ext_attachment_feedback_loop_layout &= - (attachment_feedback_loop_feature.attachmentFeedbackLoopLayout == VK_TRUE); m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE); + m_optional_extensions.vk_khr_dynamic_rendering_local_read &= + (dynamic_rendering_local_read_feature.dynamicRenderingLocalRead == VK_TRUE); VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}}; VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = { @@ -664,6 +670,7 @@ void VulkanDevice::ProcessDeviceExtensions() if (m_optional_extensions.vk_khr_dynamic_rendering) { m_optional_extensions.vk_khr_dynamic_rendering = false; + m_optional_extensions.vk_khr_dynamic_rendering_local_read = false; Log_WarningPrint("Disabling VK_KHR_dynamic_rendering on broken mobile driver."); } if (m_optional_extensions.vk_khr_push_descriptor) @@ -673,26 +680,24 @@ void VulkanDevice::ProcessDeviceExtensions() } } - Log_InfoPrintf("VK_EXT_memory_budget is %s", - m_optional_extensions.vk_ext_memory_budget ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_EXT_rasterization_order_attachment_access is %s", - m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_EXT_attachment_feedback_loop_layout is %s", - m_optional_extensions.vk_ext_attachment_feedback_loop_layout ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_KHR_get_memory_requirements2 is %s", - m_optional_extensions.vk_khr_get_memory_requirements2 ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_KHR_bind_memory2 is %s", - m_optional_extensions.vk_khr_bind_memory2 ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_KHR_get_physical_device_properties2 is %s", - m_optional_extensions.vk_khr_get_physical_device_properties2 ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_KHR_dedicated_allocation is %s", - m_optional_extensions.vk_khr_dedicated_allocation ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_KHR_dynamic_rendering is %s", - m_optional_extensions.vk_khr_dynamic_rendering ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_KHR_push_descriptor is %s", - m_optional_extensions.vk_khr_push_descriptor ? "supported" : "NOT supported"); - Log_InfoPrintf("VK_EXT_external_memory_host is %s", - m_optional_extensions.vk_ext_external_memory_host ? "supported" : "NOT supported"); + Log_InfoFmt("VK_EXT_memory_budget is {}", m_optional_extensions.vk_ext_memory_budget ? "supported" : "NOT supported"); + Log_InfoFmt("VK_EXT_rasterization_order_attachment_access is {}", + m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported"); + Log_InfoFmt("VK_KHR_get_memory_requirements2 is {}", + m_optional_extensions.vk_khr_get_memory_requirements2 ? "supported" : "NOT supported"); + Log_InfoFmt("VK_KHR_bind_memory2 is {}", m_optional_extensions.vk_khr_bind_memory2 ? "supported" : "NOT supported"); + Log_InfoFmt("VK_KHR_get_physical_device_properties2 is {}", + m_optional_extensions.vk_khr_get_physical_device_properties2 ? "supported" : "NOT supported"); + Log_InfoFmt("VK_KHR_dedicated_allocation is {}", + m_optional_extensions.vk_khr_dedicated_allocation ? "supported" : "NOT supported"); + Log_InfoFmt("VK_KHR_dynamic_rendering is {}", + m_optional_extensions.vk_khr_dynamic_rendering ? "supported" : "NOT supported"); + Log_InfoFmt("VK_KHR_dynamic_rendering_local_read is {}", + m_optional_extensions.vk_khr_dynamic_rendering_local_read ? "supported" : "NOT supported"); + Log_InfoFmt("VK_KHR_push_descriptor is {}", + m_optional_extensions.vk_khr_push_descriptor ? "supported" : "NOT supported"); + Log_InfoFmt("VK_EXT_external_memory_host is {}", + m_optional_extensions.vk_ext_external_memory_host ? "supported" : "NOT supported"); } bool VulkanDevice::CreateAllocator() @@ -834,25 +839,27 @@ bool VulkanDevice::CreateCommandBuffers() } Vulkan::SetObjectName(m_device, resources.fence, TinyString::from_format("Frame Fence {}", frame_index)); + u32 num_pools = 0; + VkDescriptorPoolSize pool_sizes[2]; if (!m_optional_extensions.vk_khr_push_descriptor) { - VkDescriptorPoolSize pool_sizes[] = { - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME}, - }; - - VkDescriptorPoolCreateInfo pool_create_info = { - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, MAX_DESCRIPTOR_SETS_PER_FRAME, - static_cast(std::size(pool_sizes)), pool_sizes}; - - res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); - return false; - } - Vulkan::SetObjectName(m_device, resources.descriptor_pool, - TinyString::from_format("Frame Descriptor Pool {}", frame_index)); + pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME}; } + pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME}; + + VkDescriptorPoolCreateInfo pool_create_info = { + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, MAX_DESCRIPTOR_SETS_PER_FRAME, + static_cast(std::size(pool_sizes)), pool_sizes}; + + res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); + return false; + } + Vulkan::SetObjectName(m_device, resources.descriptor_pool, + TinyString::from_format("Frame Descriptor Pool {}", frame_index)); ++frame_index; } @@ -970,17 +977,15 @@ VkRenderPass VulkanDevice::GetRenderPass(const GPUPipeline::GraphicsConfig& conf key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE; } - // key.color_feedback_loop = false; - // key.depth_sampling = false; - key.samples = static_cast(config.samples); + key.feedback_loop = config.render_pass_flags; const auto it = m_render_pass_cache.find(key); return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); } -VkRenderPass VulkanDevice::GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, - bool color_feedback_loop /* = false */, bool depth_sampling /* = false */) +VkRenderPass VulkanDevice::GetRenderPass(VulkanTexture* const* rts, u32 num_rts, VulkanTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop) { RenderPassCacheKey key; std::memset(&key, 0, sizeof(key)); @@ -1009,8 +1014,7 @@ VkRenderPass VulkanDevice::GetRenderPass(GPUTexture* const* rts, u32 num_rts, GP key.samples = static_cast(ds->GetSamples()); } - key.color_feedback_loop = color_feedback_loop; - key.depth_sampling = depth_sampling; + key.feedback_loop = feedback_loop; const auto it = m_render_pass_cache.find(key); return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); @@ -1674,8 +1678,9 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) break; const VkImageLayout layout = - key.color_feedback_loop ? - (UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : + (key.feedback_loop & GPUPipeline::ColorFeedbackLoop) ? + (m_optional_extensions.vk_khr_dynamic_rendering_local_read ? VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR : + VK_IMAGE_LAYOUT_GENERAL) : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; const RenderPassCacheKey::RenderTarget key_rt = key.color[i]; @@ -1692,15 +1697,12 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) color_references[num_attachments].layout = layout; color_reference_ptr = color_references.data(); - if (key.color_feedback_loop) + if (key.feedback_loop & GPUPipeline::ColorFeedbackLoop) { DebugAssert(i == 0); - if (!UseFeedbackLoopLayout()) - { - input_reference.attachment = num_attachments; - input_reference.layout = layout; - input_reference_ptr = &input_reference; - } + input_reference.attachment = num_attachments; + input_reference.layout = layout; + input_reference_ptr = &input_reference; if (!m_optional_extensions.vk_ext_rasterization_order_attachment_access) { @@ -1710,11 +1712,8 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; subpass_dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - subpass_dependency.dstAccessMask = - UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; - subpass_dependency.dependencyFlags = UseFeedbackLoopLayout() ? - (VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) : - VK_DEPENDENCY_BY_REGION_BIT; + subpass_dependency.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; + subpass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; subpass_dependency_ptr = &subpass_dependency; } } @@ -1726,10 +1725,9 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) if (key.depth_format != static_cast(GPUTexture::Format::Unknown)) { - const VkImageLayout layout = - key.depth_sampling ? - (UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + const VkImageLayout layout = (key.feedback_loop & GPUPipeline::SampleDepthBuffer) ? + VK_IMAGE_LAYOUT_GENERAL : + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachments[num_attachments] = {0, static_cast(TEXTURE_FORMAT_MAPPING[key.depth_format]), static_cast(key.samples), @@ -1746,7 +1744,8 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) } const VkSubpassDescriptionFlags subpass_flags = - (key.color_feedback_loop && m_optional_extensions.vk_ext_rasterization_order_attachment_access) ? + ((key.feedback_loop & GPUPipeline::ColorFeedbackLoop) && + m_optional_extensions.vk_ext_rasterization_order_attachment_access) ? VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT : 0; const VkSubpassDescription subpass = {subpass_flags, @@ -1784,7 +1783,9 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) VkFramebuffer VulkanDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags) { VulkanDevice& dev = VulkanDevice::GetInstance(); - VkRenderPass render_pass = dev.GetRenderPass(rts, num_rts, ds, false, false); + VkRenderPass render_pass = + dev.GetRenderPass(reinterpret_cast(rts), num_rts, static_cast(ds), + static_cast(flags)); const GPUTexture* rt_or_ds = (num_rts > 0) ? rts[0] : ds; DebugAssert(rt_or_ds); @@ -2510,7 +2511,9 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && m_device_features.dualSrcBlend; - m_features.framebuffer_fetch = /*!(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && */ false; + m_features.framebuffer_fetch = + !(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) && + m_optional_extensions.vk_ext_rasterization_order_attachment_access; if (!m_features.dual_source_blend) Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance."); @@ -2519,6 +2522,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features) m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); m_features.per_sample_shading = m_device_features.sampleRateShading; m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); + m_features.feedback_loops = !(disabled_features & FEATURE_MASK_FEEDBACK_LOOPS); #ifdef __APPLE__ // Partial texture buffer uploads appear to be broken in macOS/MoltenVK. @@ -2874,10 +2878,22 @@ bool VulkanDevice::CreatePipelineLayouts() Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout"); } + if (m_features.feedback_loops) + { + // TODO: This isn't ideal, since we can't push the RT descriptors. + dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1, VK_SHADER_STAGE_FRAGMENT_BIT); + if ((m_feedback_loop_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) + return false; + Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout"); + } + { VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureAndUBO)]; plb.AddDescriptorSet(m_ubo_ds_layout); plb.AddDescriptorSet(m_single_texture_ds_layout); + // TODO: REMOVE ME + if (m_features.feedback_loops) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); @@ -2886,6 +2902,9 @@ bool VulkanDevice::CreatePipelineLayouts() { VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureAndPushConstants)]; plb.AddDescriptorSet(m_single_texture_ds_layout); + // TODO: REMOVE ME + if (m_features.feedback_loops) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; @@ -2896,6 +2915,9 @@ bool VulkanDevice::CreatePipelineLayouts() VkPipelineLayout& pl = m_pipeline_layouts[static_cast(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; plb.AddDescriptorSet(m_single_texture_buffer_ds_layout); + // TODO: REMOVE ME + if (m_features.feedback_loops) + plb.AddDescriptorSet(m_feedback_loop_ds_layout); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) return false; @@ -2941,6 +2963,7 @@ void VulkanDevice::DestroyPipelineLayouts() l = VK_NULL_HANDLE; } }; + destroy_dsl(m_feedback_loop_ds_layout); destroy_dsl(m_multi_texture_ds_layout); destroy_dsl(m_single_texture_buffer_ds_layout); destroy_dsl(m_single_texture_ds_layout); @@ -3080,13 +3103,15 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa return true; } -void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) +void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop) { - bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); + bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds || + m_current_feedback_loop != feedback_loop); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_rt_clear = false; - m_current_depth_target = ds; + m_current_depth_target = static_cast(ds); for (u32 i = 0; i < num_rts; i++) { VulkanTexture* const RT = static_cast(rts[i]); @@ -3096,7 +3121,8 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText } for (u32 i = num_rts; i < m_num_current_render_targets; i++) m_current_render_targets[i] = nullptr; - m_num_current_render_targets = num_rts; + m_num_current_render_targets = Truncate8(num_rts); + m_current_feedback_loop = feedback_loop; if (changed) { @@ -3109,17 +3135,21 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText return; } - if (!m_optional_extensions.vk_khr_dynamic_rendering) + if (!m_optional_extensions.vk_khr_dynamic_rendering || ((feedback_loop & GPUPipeline::ColorFeedbackLoop) && + !m_optional_extensions.vk_khr_dynamic_rendering_local_read)) { - m_current_framebuffer = - m_framebuffer_manager.Lookup((m_num_current_render_targets > 0) ? m_current_render_targets.data() : nullptr, - m_num_current_render_targets, m_current_depth_target, 0); + m_current_framebuffer = m_framebuffer_manager.Lookup( + (m_num_current_render_targets > 0) ? reinterpret_cast(m_current_render_targets.data()) : nullptr, + m_num_current_render_targets, m_current_depth_target, feedback_loop); if (m_current_framebuffer == VK_NULL_HANDLE) { Log_ErrorPrint("Failed to create framebuffer"); return; } } + + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) | + ((feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0); } // TODO: This could use vkCmdClearAttachments() instead. @@ -3140,7 +3170,8 @@ void VulkanDevice::BeginRenderPass() for (u32 i = 0; i < num_textures; i++) m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); - if (m_optional_extensions.vk_khr_dynamic_rendering) + if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read || + !(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))) { VkRenderingInfoKHR ri = { VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr}; @@ -3157,7 +3188,9 @@ void VulkanDevice::BeginRenderPass() for (u32 i = 0; i < m_num_current_render_targets; i++) { VulkanTexture* const rt = static_cast(m_current_render_targets[i]); - rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); + rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? + VulkanTexture::Layout::FeedbackLoop : + VulkanTexture::Layout::ColorAttachment); rt->SetUseFenceCounter(GetCurrentFenceCounter()); VkRenderingAttachmentInfo& ai = attachments[i]; @@ -3179,7 +3212,7 @@ void VulkanDevice::BeginRenderPass() rt->SetState(GPUTexture::State::Dirty); } - if (VulkanTexture* const ds = static_cast(m_current_depth_target)) + if (VulkanTexture* const ds = m_current_depth_target) { ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); ds->SetUseFenceCounter(GetCurrentFenceCounter()); @@ -3201,8 +3234,8 @@ void VulkanDevice::BeginRenderPass() ds->SetState(GPUTexture::State::Dirty); } - const VulkanTexture* const rt_or_ds = static_cast( - (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target); + const VulkanTexture* const rt_or_ds = + (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target; ri.renderArea = {{}, {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}}; } else @@ -3236,7 +3269,7 @@ void VulkanDevice::BeginRenderPass() { bi.framebuffer = m_current_framebuffer; bi.renderPass = m_current_render_pass = GetRenderPass( - m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, false, false); + m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, m_current_feedback_loop); if (bi.renderPass == VK_NULL_HANDLE) { Log_ErrorPrint("Failed to create render pass"); @@ -3255,7 +3288,9 @@ void VulkanDevice::BeginRenderPass() bi.clearValueCount = i + 1; } rt->SetState(GPUTexture::State::Dirty); - rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); + rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? + VulkanTexture::Layout::FeedbackLoop : + VulkanTexture::Layout::ColorAttachment); rt->SetUseFenceCounter(GetCurrentFenceCounter()); } if (VulkanTexture* const ds = static_cast(m_current_depth_target)) @@ -3357,6 +3392,7 @@ void VulkanDevice::BeginSwapChainRenderPass() s_stats.num_render_passes++; m_num_current_render_targets = 0; + m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); m_current_depth_target = nullptr; m_current_framebuffer = VK_NULL_HANDLE; @@ -3420,7 +3456,8 @@ void VulkanDevice::UnbindPipeline(VulkanPipeline* pl) void VulkanDevice::InvalidateCachedState() { - m_dirty_flags = ALL_DIRTY_STATE; + m_dirty_flags = + ALL_DIRTY_STATE | ((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0); m_current_render_pass = VK_NULL_HANDLE; m_current_pipeline = nullptr; } @@ -3584,11 +3621,20 @@ void VulkanDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) void VulkanDevice::PreDrawCheck() { + if (!InRenderPass()) + BeginRenderPass(); + DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); - const u32 dirty = std::exchange(m_dirty_flags, 0); + const u32 update_mask = (m_current_feedback_loop ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT); + const u32 dirty = m_dirty_flags & update_mask; + m_dirty_flags = m_dirty_flags & ~update_mask; + if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT && !(dirty & DIRTY_FLAG_INPUT_ATTACHMENT)) + m_dirty_flags |= DIRTY_FLAG_INPUT_ATTACHMENT; // TODO: FOR NEXT TIME + if (dirty != 0) { - if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS)) + if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS | + DIRTY_FLAG_INPUT_ATTACHMENT)) { if (!UpdateDescriptorSets(dirty)) { @@ -3598,21 +3644,22 @@ void VulkanDevice::PreDrawCheck() } } } - - if (!InRenderPass()) - BeginRenderPass(); } template -bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynamic_offsets) +bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) { - std::array ds; + [[maybe_unused]] bool new_dynamic_offsets = false; + + std::array ds; u32 first_ds = 0; u32 num_ds = 0; if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) { - if (new_layout || new_dynamic_offsets) + new_dynamic_offsets = ((dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0); + + if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS)) { ds[num_ds++] = m_ubo_descriptor_set; new_dynamic_offsets = true; @@ -3645,7 +3692,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynam { DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, m_current_textures[i]->GetView(), - m_current_samplers[i], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_current_samplers[i], m_current_textures[i]->GetVkLayout()); } const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0; @@ -3666,13 +3713,32 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynam { DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, m_current_textures[i]->GetView(), m_current_samplers[i], - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + m_current_textures[i]->GetVkLayout()); } dsub.Update(m_device, false); } } + if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || + layout == GPUPipeline::Layout::SingleTextureAndPushConstants || + layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) + { + if (dirty & DIRTY_FLAG_INPUT_ATTACHMENT) + { + VkDescriptorSet ids = AllocateDescriptorSet(m_feedback_loop_ds_layout); + if (ids == VK_NULL_HANDLE) + return false; + + ds[num_ds++] = ids; + + Vulkan::DescriptorSetUpdateBuilder dsub; + dsub.AddInputAttachmentDescriptorWrite(ids, 0, m_current_render_targets[0]->GetView(), + m_current_render_targets[0]->GetVkLayout()); + dsub.Update(m_device, false); + } + } + DebugAssert(num_ds > 0); vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layouts[static_cast(m_current_pipeline_layout)], first_ds, num_ds, ds.data(), @@ -3684,25 +3750,22 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynam bool VulkanDevice::UpdateDescriptorSets(u32 dirty) { - const bool new_layout = (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) != 0; - const bool new_dynamic_offsets = (dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0; - switch (m_current_pipeline_layout) { case GPUPipeline::Layout::SingleTextureAndUBO: - return UpdateDescriptorSetsForLayout(new_layout, new_dynamic_offsets); + return UpdateDescriptorSetsForLayout(dirty); case GPUPipeline::Layout::SingleTextureAndPushConstants: - return UpdateDescriptorSetsForLayout(new_layout, false); + return UpdateDescriptorSetsForLayout(dirty); case GPUPipeline::Layout::SingleTextureBufferAndPushConstants: - return UpdateDescriptorSetsForLayout(new_layout, false); + return UpdateDescriptorSetsForLayout(dirty); case GPUPipeline::Layout::MultiTextureAndUBO: - return UpdateDescriptorSetsForLayout(new_layout, new_dynamic_offsets); + return UpdateDescriptorSetsForLayout(dirty); case GPUPipeline::Layout::MultiTextureAndPushConstants: - return UpdateDescriptorSetsForLayout(new_layout, false); + return UpdateDescriptorSetsForLayout(dirty); default: UnreachableCode(); @@ -3722,3 +3785,76 @@ void VulkanDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) s_stats.num_draws++; vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); } + +VkImageMemoryBarrier VulkanDevice::GetColorBufferBarrier(const VulkanTexture* rt) const +{ + const VkImageLayout vk_layout = m_optional_extensions.vk_khr_dynamic_rendering_local_read ? + VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR : + VK_IMAGE_LAYOUT_GENERAL; + DebugAssert(rt->GetLayout() == VulkanTexture::Layout::FeedbackLoop); + + return {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, + vk_layout, + vk_layout, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + rt->GetImage(), + {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; +} + +void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) +{ + PreDrawCheck(); + + // TODO: The first barrier is unnecessary if we're starting the render pass. + + switch (type) + { + case GPUDevice::DrawBarrier::None: + { + s_stats.num_draws++; + vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); + } + break; + + case GPUDevice::DrawBarrier::One: + { + DebugAssert(m_num_current_render_targets == 1); + s_stats.num_barriers++; + s_stats.num_draws++; + + const VkImageMemoryBarrier barrier = + GetColorBufferBarrier(static_cast(m_current_render_targets[0])); + vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, + 1, &barrier); + vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); + } + break; + + case GPUDevice::DrawBarrier::Full: + { + DebugAssert(m_num_current_render_targets == 1); + + const VkImageMemoryBarrier barrier = + GetColorBufferBarrier(static_cast(m_current_render_targets[0])); + const u32 indices_per_primitive = m_current_pipeline->GetVerticesPerPrimitive(); + const u32 end_batch = base_index + index_count; + + for (; base_index < end_batch; base_index += indices_per_primitive) + { + s_stats.num_barriers++; + s_stats.num_draws++; + + vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, + 1, &barrier); + vkCmdDrawIndexed(GetCurrentCommandBuffer(), indices_per_primitive, 1, base_index, base_vertex, 0); + } + } + break; + } +} diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index 38517b5e4..308b6497b 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -44,7 +44,6 @@ public: { bool vk_ext_memory_budget : 1; bool vk_ext_rasterization_order_attachment_access : 1; - bool vk_ext_attachment_feedback_loop_layout : 1; bool vk_ext_full_screen_exclusive : 1; bool vk_khr_get_memory_requirements2 : 1; bool vk_khr_bind_memory2 : 1; @@ -52,6 +51,7 @@ public: bool vk_khr_dedicated_allocation : 1; bool vk_khr_driver_properties : 1; bool vk_khr_dynamic_rendering : 1; + bool vk_khr_dynamic_rendering_local_read : 1; bool vk_khr_push_descriptor : 1; bool vk_ext_external_memory_host : 1; }; @@ -114,7 +114,8 @@ public: void PushUniformBuffer(const void* data, u32 data_size) override; void* MapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override; - void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; + void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, + GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; @@ -122,6 +123,7 @@ public: void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; @@ -144,13 +146,6 @@ public: /// Returns true if Vulkan is suitable as a default for the devices in the system. static bool IsSuitableDefaultRenderer(); - // The interaction between raster order attachment access and fbfetch is unclear. - ALWAYS_INLINE bool UseFeedbackLoopLayout() const - { - return (m_optional_extensions.vk_ext_attachment_feedback_loop_layout && - !m_optional_extensions.vk_ext_rasterization_order_attachment_access); - } - // Helpers for getting constants ALWAYS_INLINE u32 GetBufferCopyOffsetAlignment() const { @@ -165,8 +160,8 @@ public: // Creates a simple render pass. VkRenderPass GetRenderPass(const GPUPipeline::GraphicsConfig& config); - VkRenderPass GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, bool color_feedback_loop = false, - bool depth_sampling = false); + VkRenderPass GetRenderPass(VulkanTexture* const* rts, u32 num_rts, VulkanTexture* ds, + GPUPipeline::RenderPassFlag render_pass_flags); VkRenderPass GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op); // Gets a non-clearing version of the specified render pass. Slow, don't call in hot path. @@ -239,9 +234,10 @@ private: DIRTY_FLAG_PIPELINE_LAYOUT = (1 << 1), DIRTY_FLAG_DYNAMIC_OFFSETS = (1 << 2), DIRTY_FLAG_TEXTURES_OR_SAMPLERS = (1 << 3), + DIRTY_FLAG_INPUT_ATTACHMENT = (1 << 4), - ALL_DIRTY_STATE = - DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS, + ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | + DIRTY_FLAG_TEXTURES_OR_SAMPLERS | DIRTY_FLAG_INPUT_ATTACHMENT, }; struct RenderPassCacheKey @@ -259,8 +255,7 @@ private: u8 depth_store_op : 1; u8 stencil_load_op : 2; u8 stencil_store_op : 1; - u8 depth_sampling : 1; - u8 color_feedback_loop : 1; + u8 feedback_loop : 2; u8 samples; bool operator==(const RenderPassCacheKey& rhs) const; @@ -361,7 +356,7 @@ private: void PreDrawCheck(); template - bool UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynamic_offsets); + bool UpdateDescriptorSetsForLayout(u32 dirty); bool UpdateDescriptorSets(u32 dirty); // Ends a render pass if we're currently in one. @@ -375,6 +370,8 @@ private: static VkFramebuffer CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags); static void DestroyFramebuffer(VkFramebuffer fbo); + VkImageMemoryBarrier GetColorBufferBarrier(const VulkanTexture* rt) const; + void BeginCommandBuffer(u32 index); void WaitForCommandBufferCompletion(u32 index); @@ -445,6 +442,7 @@ private: VkDescriptorSetLayout m_single_texture_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE; + VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE; std::array(GPUPipeline::Layout::MaxCount)> m_pipeline_layouts = {}; VulkanStreamBuffer m_vertex_buffer; @@ -460,9 +458,10 @@ private: // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = ALL_DIRTY_STATE; - u32 m_num_current_render_targets = 0; - std::array m_current_render_targets = {}; - GPUTexture* m_current_depth_target = nullptr; + u8 m_num_current_render_targets = 0; + GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; + std::array m_current_render_targets = {}; + VulkanTexture* m_current_depth_target = nullptr; VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE; VkRenderPass m_current_render_pass = VK_NULL_HANDLE; diff --git a/src/util/vulkan_pipeline.cpp b/src/util/vulkan_pipeline.cpp index 657e50a8a..cc0bacfd1 100644 --- a/src/util/vulkan_pipeline.cpp +++ b/src/util/vulkan_pipeline.cpp @@ -72,8 +72,10 @@ std::unique_ptr VulkanDevice::CreateShaderFromSource(GPUShaderStage s ////////////////////////////////////////////////////////////////////////// -VulkanPipeline::VulkanPipeline(VkPipeline pipeline, Layout layout) - : GPUPipeline(), m_pipeline(pipeline), m_layout(layout) +VulkanPipeline::VulkanPipeline(VkPipeline pipeline, Layout layout, u8 vertices_per_primitive, + RenderPassFlag render_pass_flags) + : GPUPipeline(), m_pipeline(pipeline), m_layout(layout), m_vertices_per_primitive(vertices_per_primitive), + m_render_pass_flags(render_pass_flags) { } @@ -89,12 +91,13 @@ void VulkanPipeline::SetDebugName(const std::string_view& name) std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config) { - static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = {{ - VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // Points - VK_PRIMITIVE_TOPOLOGY_LINE_LIST, // Lines - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, // Triangles - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // TriangleStrips - }}; + static constexpr std::array, static_cast(GPUPipeline::Primitive::MaxCount)> + primitives = {{ + {VK_PRIMITIVE_TOPOLOGY_POINT_LIST, 1}, // Points + {VK_PRIMITIVE_TOPOLOGY_LINE_LIST, 2}, // Lines + {VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 3}, // Triangles + {VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 3}, // TriangleStrips + }}; static constexpr u32 MAX_COMPONENTS = 4; static constexpr const VkFormat format_mapping[static_cast( @@ -171,7 +174,8 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra } } - gpb.SetPrimitiveTopology(primitives[static_cast(config.primitive)]); + const auto [vk_topology, vertices_per_primitive] = primitives[static_cast(config.primitive)]; + gpb.SetPrimitiveTopology(vk_topology); // Line width? @@ -206,7 +210,8 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra gpb.SetPipelineLayout(m_pipeline_layouts[static_cast(config.layout)]); - if (m_optional_extensions.vk_khr_dynamic_rendering) + if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read || + !(config.render_pass_flags & GPUPipeline::ColorFeedbackLoop))) { gpb.SetDynamicRendering(); @@ -224,6 +229,13 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast(config.depth_format)], VK_FORMAT_UNDEFINED); } + + if (config.render_pass_flags & GPUPipeline::ColorFeedbackLoop) + { + DebugAssert(m_optional_extensions.vk_khr_dynamic_rendering_local_read && + config.color_formats[0] != GPUTexture::Format::Unknown); + gpb.AddDynamicRenderingInputAttachment(0); + } } else { @@ -236,5 +248,6 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra if (!pipeline) return {}; - return std::unique_ptr(new VulkanPipeline(pipeline, config.layout)); + return std::unique_ptr( + new VulkanPipeline(pipeline, config.layout, static_cast(vertices_per_primitive), config.render_pass_flags)); } diff --git a/src/util/vulkan_pipeline.h b/src/util/vulkan_pipeline.h index 384cf288f..db355a714 100644 --- a/src/util/vulkan_pipeline.h +++ b/src/util/vulkan_pipeline.h @@ -32,12 +32,15 @@ public: ALWAYS_INLINE VkPipeline GetPipeline() const { return m_pipeline; } ALWAYS_INLINE Layout GetLayout() const { return m_layout; } + ALWAYS_INLINE u8 GetVerticesPerPrimitive() const { return m_vertices_per_primitive; } void SetDebugName(const std::string_view& name) override; private: - VulkanPipeline(VkPipeline pipeline, Layout layout); + VulkanPipeline(VkPipeline pipeline, Layout layout, u8 vertices_per_primitive, RenderPassFlag render_pass_flags); VkPipeline m_pipeline; Layout m_layout; + u8 m_vertices_per_primitive; + RenderPassFlag m_render_pass_flags; }; diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index 5626c9e06..621774514 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -18,6 +18,7 @@ static constexpr const VkComponentMapping s_identity_swizzle{ static VkImageLayout GetVkImageLayout(VulkanTexture::Layout layout) { + // TODO: Wrong for depth textures in feedback loop static constexpr std::array(VulkanTexture::Layout::Count)> s_vk_layout_mapping = {{ VK_IMAGE_LAYOUT_UNDEFINED, // Undefined VK_IMAGE_LAYOUT_PREINITIALIZED, // Preinitialized @@ -34,17 +35,12 @@ static VkImageLayout GetVkImageLayout(VulkanTexture::Layout layout) VK_IMAGE_LAYOUT_GENERAL, // ComputeReadWriteImage VK_IMAGE_LAYOUT_GENERAL, // General }}; - return (layout == VulkanTexture::Layout::FeedbackLoop && VulkanDevice::GetInstance().UseFeedbackLoopLayout()) ? - VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : + return (layout == VulkanTexture::Layout::FeedbackLoop && + VulkanDevice::GetInstance().GetOptionalExtensions().vk_khr_dynamic_rendering_local_read) ? + VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR : s_vk_layout_mapping[static_cast(layout)]; } -static VkAccessFlagBits GetFeedbackLoopInputAccessBits() -{ - return VulkanDevice::GetInstance().UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : - VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; -} - VulkanTexture::VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format) : GPUTexture(static_cast(width), static_cast(height), static_cast(layers), static_cast(levels), @@ -111,8 +107,7 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 DebugAssert(levels == 1); ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - (dev.UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT : - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; } break; @@ -120,8 +115,7 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 { DebugAssert(levels == 1); ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | - (dev.UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT : 0); + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; } break; @@ -588,7 +582,7 @@ void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffe case Layout::FeedbackLoop: barrier.srcAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - GetFeedbackLoopInputAccessBits()) : + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT) : (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); srcStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? @@ -664,7 +658,7 @@ void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffe case Layout::FeedbackLoop: barrier.dstAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - GetFeedbackLoopInputAccessBits()) : + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT) : (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); dstStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ?