From ac41ace972eae5a51e104bec5c82594bd44442fd Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 7 Jan 2025 19:39:40 +1000 Subject: [PATCH] GPUDevice: Add stencil testing support --- src/util/d3d11_device.cpp | 17 ++++++++ src/util/d3d11_device.h | 5 ++- src/util/d3d11_pipeline.cpp | 34 +++++++++++++-- src/util/d3d12_device.cpp | 29 ++++++++++++- src/util/d3d12_device.h | 3 ++ src/util/d3d12_pipeline.cpp | 37 ++++++++++++++-- src/util/gpu_device.cpp | 14 ++++++- src/util/gpu_device.h | 47 +++++++++++++++++---- src/util/gpu_texture.h | 1 + src/util/opengl_device.cpp | 40 +++++++++++++++++- src/util/opengl_device.h | 6 ++- src/util/opengl_pipeline.cpp | 79 +++++++++++++++++++++++++++++++---- src/util/opengl_texture.cpp | 10 +++-- src/util/postprocessing.cpp | 4 +- src/util/vulkan_device.cpp | 81 ++++++++++++++++++++++++++++++++---- src/util/vulkan_device.h | 3 ++ src/util/vulkan_pipeline.cpp | 51 ++++++++++++++++++++--- src/util/vulkan_texture.cpp | 16 +++---- src/util/vulkan_texture.h | 6 +-- 19 files changed, 425 insertions(+), 58 deletions(-) diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index 4c9db05c5..a8a002329 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -646,6 +646,13 @@ void D3D11Device::ClearDepth(GPUTexture* t, float d) T->CommitClear(m_context.Get()); } +void D3D11Device::ClearStencil(GPUTexture* t, u8 value) +{ + D3D11Texture* const T = static_cast(t); + DebugAssert(T->HasStencil()); + m_context->ClearDepthStencilView(T->GetD3DDSV(), D3D11_CLEAR_STENCIL, 0.0f, value); +} + void D3D11Device::InvalidateRenderTarget(GPUTexture* t) { D3D11Texture* const T = static_cast(t); @@ -1146,6 +1153,16 @@ void D3D11Device::SetScissor(const GSVector4i rc) m_context->RSSetScissorRects(1, &drc); } +void D3D11Device::SetStencilRef(u8 value) +{ + if (m_current_stencil_ref == value) + return; + + m_current_stencil_ref = value; + if (m_current_pipeline) + m_context->OMSetDepthStencilState(m_current_pipeline->GetDepthStencilState(), m_current_stencil_ref); +} + void D3D11Device::Draw(u32 vertex_count, u32 base_vertex) { DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader); diff --git a/src/util/d3d11_device.h b/src/util/d3d11_device.h index bc6edd829..ee0860f33 100644 --- a/src/util/d3d11_device.h +++ b/src/util/d3d11_device.h @@ -70,6 +70,7 @@ public: u32 src_x, u32 src_y, u32 width, u32 height) override; void ClearRenderTarget(GPUTexture* t, u32 c) override; void ClearDepth(GPUTexture* t, float d) override; + void ClearStencil(GPUTexture* t, u8 value) override; void InvalidateRenderTarget(GPUTexture* t) override; std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data, @@ -101,6 +102,7 @@ public: void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetViewport(const GSVector4i rc) override; void SetScissor(const GSVector4i rc) override; + void SetStencilRef(u8 value) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -131,7 +133,7 @@ private: size_t operator()(const BlendStateMapKey& key) const; }; using RasterizationStateMap = std::unordered_map>; - using DepthStateMap = std::unordered_map>; + using DepthStateMap = std::unordered_map>; using BlendStateMap = std::unordered_map, BlendStateMapHash>; using InputLayoutMap = std::unordered_map, GPUPipeline::InputLayoutHash>; @@ -198,6 +200,7 @@ private: D3D_PRIMITIVE_TOPOLOGY m_current_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; u32 m_current_vertex_stride = 0; u32 m_current_blend_factor = 0; + u8 m_current_stencil_ref = 0; std::array m_current_textures = {}; std::array m_current_samplers = {}; diff --git a/src/util/d3d11_pipeline.cpp b/src/util/d3d11_pipeline.cpp index 21bac1563..6a8493d8d 100644 --- a/src/util/d3d11_pipeline.cpp +++ b/src/util/d3d11_pipeline.cpp @@ -192,8 +192,8 @@ D3D11Device::ComPtr D3D11Device::GetDepthState(const GP return dds; } - static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = - {{ + static constexpr std::array(GPUPipeline::ComparisonFunc::MaxCount)> + func_mapping = {{ D3D11_COMPARISON_NEVER, // Never D3D11_COMPARISON_ALWAYS, // Always D3D11_COMPARISON_LESS, // Less @@ -203,10 +203,36 @@ D3D11Device::ComPtr D3D11Device::GetDepthState(const GP D3D11_COMPARISON_EQUAL, // Equal }}; + static constexpr std::array(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = + {{ + D3D11_STENCIL_OP_KEEP, // Keep + D3D11_STENCIL_OP_ZERO, // Zero + D3D11_STENCIL_OP_REPLACE, // Replace + D3D11_STENCIL_OP_INCR_SAT, // IncrSat + D3D11_STENCIL_OP_DECR_SAT, // DecrSat + D3D11_STENCIL_OP_INVERT, // Invert + D3D11_STENCIL_OP_INCR, // Incr + D3D11_STENCIL_OP_DECR, // Decr + }}; + D3D11_DEPTH_STENCIL_DESC desc = {}; - desc.DepthEnable = ds.depth_test != GPUPipeline::DepthFunc::Always || ds.depth_write; + desc.DepthEnable = ds.depth_test != GPUPipeline::ComparisonFunc::Always || ds.depth_write; desc.DepthFunc = func_mapping[static_cast(ds.depth_test.GetValue())]; desc.DepthWriteMask = ds.depth_write ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; + desc.StencilEnable = ds.stencil_enable; + if (ds.stencil_enable) + { + desc.StencilReadMask = 0xFF; + desc.StencilWriteMask = 0xFF; + desc.FrontFace.StencilFailOp = stencil_op_mapping[static_cast(ds.front_stencil_fail_op.GetValue())]; + desc.FrontFace.StencilDepthFailOp = stencil_op_mapping[static_cast(ds.front_stencil_depth_fail_op.GetValue())]; + desc.FrontFace.StencilPassOp = stencil_op_mapping[static_cast(ds.front_stencil_pass_op.GetValue())]; + desc.FrontFace.StencilFunc = func_mapping[static_cast(ds.back_stencil_func.GetValue())]; + desc.BackFace.StencilFailOp = stencil_op_mapping[static_cast(ds.back_stencil_fail_op.GetValue())]; + desc.BackFace.StencilDepthFailOp = stencil_op_mapping[static_cast(ds.back_stencil_depth_fail_op.GetValue())]; + desc.BackFace.StencilPassOp = stencil_op_mapping[static_cast(ds.back_stencil_pass_op.GetValue())]; + desc.BackFace.StencilFunc = func_mapping[static_cast(ds.back_stencil_func.GetValue())]; + } HRESULT hr = m_device->CreateDepthStencilState(&desc, dds.GetAddressOf()); if (FAILED(hr)) [[unlikely]] @@ -449,7 +475,7 @@ void D3D11Device::SetPipeline(GPUPipeline* pipeline) if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds) { m_current_depth_state = ds; - m_context->OMSetDepthStencilState(ds, 0); + m_context->OMSetDepthStencilState(ds, m_current_stencil_ref); } if (ID3D11BlendState* bs = PL->GetBlendState(); diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 7dd946c3f..f4f26f986 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1509,6 +1509,16 @@ void D3D12Device::ClearDepth(GPUTexture* t, float d) EndRenderPass(); } +void D3D12Device::ClearStencil(GPUTexture* t, u8 value) +{ + DebugAssert(t->HasStencil()); + if (InRenderPass() && m_current_depth_target == t) + EndRenderPass(); + + GetCommandList()->ClearDepthStencilView(static_cast(t)->GetWriteDescriptor(), D3D12_CLEAR_FLAG_STENCIL, + 0.0f, value, 0, nullptr); +} + void D3D12Device::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); @@ -1892,8 +1902,12 @@ void D3D12Device::BeginRenderPass() ds_desc_p = &ds_desc; ds_desc.cpuDescriptor = ds->GetWriteDescriptor(); ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; - ds_desc.StencilBeginningAccess = {}; - ds_desc.StencilEndingAccess = {}; + ds_desc.StencilBeginningAccess = {ds->IsDepthStencil() ? D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE : + D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD, + {}}; + ds_desc.StencilEndingAccess = {ds->IsDepthStencil() ? D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE : + D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_DISCARD, + {}}; switch (ds->GetState()) { @@ -2073,6 +2087,8 @@ void D3D12Device::SetInitialPipelineState() m_current_blend_constant = m_current_pipeline->GetBlendConstants(); cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data()); + cmdlist->OMSetStencilRef(m_current_stencil_ref); + SetViewport(cmdlist); SetScissor(cmdlist); } @@ -2101,6 +2117,15 @@ void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist) cmdlist->RSSetScissorRects(1, reinterpret_cast(&m_current_scissor)); } +void D3D12Device::SetStencilRef(u8 value) +{ + if (m_current_stencil_ref == value) + return; + + m_current_stencil_ref = value; + GetCommandList()->OMSetStencilRef(m_current_stencil_ref); +} + void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) { D3D12Texture* T = static_cast(texture); diff --git a/src/util/d3d12_device.h b/src/util/d3d12_device.h index 3b55d65fd..6f69a02a4 100644 --- a/src/util/d3d12_device.h +++ b/src/util/d3d12_device.h @@ -91,6 +91,7 @@ public: u32 src_x, u32 src_y, u32 width, u32 height) override; void ClearRenderTarget(GPUTexture* t, u32 c) override; void ClearDepth(GPUTexture* t, float d) override; + void ClearStencil(GPUTexture* t, u8 value) override; void InvalidateRenderTarget(GPUTexture* t) override; std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data, @@ -122,6 +123,7 @@ public: void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetViewport(const GSVector4i rc) override; void SetScissor(const GSVector4i rc) override; + void SetStencilRef(u8 value) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -357,6 +359,7 @@ private: u32 m_current_vertex_stride = 0; u32 m_current_blend_constant = 0; GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + u8 m_current_stencil_ref = 0; std::array m_current_textures = {}; std::array m_current_samplers = {}; diff --git a/src/util/d3d12_pipeline.cpp b/src/util/d3d12_pipeline.cpp index 84088a3ef..18dafeb89 100644 --- a/src/util/d3d12_pipeline.cpp +++ b/src/util/d3d12_pipeline.cpp @@ -165,7 +165,7 @@ std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::Grap D3D12_CULL_MODE_BACK, // Back }}; - static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> + static constexpr std::array(GPUPipeline::ComparisonFunc::MaxCount)> compare_mapping = {{ D3D12_COMPARISON_FUNC_NEVER, // Never D3D12_COMPARISON_FUNC_ALWAYS, // Always @@ -176,6 +176,18 @@ std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::Grap D3D12_COMPARISON_FUNC_EQUAL, // Equal }}; + static constexpr std::array(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = + {{ + D3D12_STENCIL_OP_KEEP, // Keep + D3D12_STENCIL_OP_ZERO, // Zero + D3D12_STENCIL_OP_REPLACE, // Replace + D3D12_STENCIL_OP_INCR_SAT, // IncrSat + D3D12_STENCIL_OP_DECR_SAT, // DecrSat + D3D12_STENCIL_OP_INVERT, // Invert + D3D12_STENCIL_OP_INCR, // Incr + D3D12_STENCIL_OP_DECR, // Decr + }}; + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ D3D12_BLEND_ZERO, // Zero D3D12_BLEND_ONE, // One @@ -238,9 +250,28 @@ std::unique_ptr D3D12Device::CreatePipeline(const GPUPipeline::Grap cull_mapping[static_cast(config.rasterization.cull_mode.GetValue())], false); if (config.samples > 1) gpb.SetMultisamples(config.samples); - gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write, + gpb.SetDepthState(config.depth.depth_test != GPUPipeline::ComparisonFunc::Always || config.depth.depth_write, config.depth.depth_write, compare_mapping[static_cast(config.depth.depth_test.GetValue())]); - gpb.SetNoStencilState(); + if (config.depth.stencil_enable) + { + const D3D12_DEPTH_STENCILOP_DESC front = { + .StencilFailOp = stencil_op_mapping[static_cast(config.depth.front_stencil_fail_op.GetValue())], + .StencilDepthFailOp = stencil_op_mapping[static_cast(config.depth.front_stencil_depth_fail_op.GetValue())], + .StencilPassOp = stencil_op_mapping[static_cast(config.depth.front_stencil_pass_op.GetValue())], + .StencilFunc = compare_mapping[static_cast(config.depth.front_stencil_func.GetValue())], + }; + const D3D12_DEPTH_STENCILOP_DESC back = { + .StencilFailOp = stencil_op_mapping[static_cast(config.depth.back_stencil_fail_op.GetValue())], + .StencilDepthFailOp = stencil_op_mapping[static_cast(config.depth.back_stencil_depth_fail_op.GetValue())], + .StencilPassOp = stencil_op_mapping[static_cast(config.depth.back_stencil_pass_op.GetValue())], + .StencilFunc = compare_mapping[static_cast(config.depth.back_stencil_func.GetValue())], + }; + gpb.SetStencilState(config.depth.stencil_enable, 0xFF, 0xFF, front, back); + } + else + { + gpb.SetNoStencilState(); + } gpb.SetBlendState(0, config.blend.enable, blend_mapping[static_cast(config.blend.src_blend.GetValue())], blend_mapping[static_cast(config.blend.dst_blend.GetValue())], diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index a70576f97..8f523eae7 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -129,14 +129,14 @@ GPUPipeline::RasterizationState GPUPipeline::RasterizationState::GetNoCullState( GPUPipeline::DepthState GPUPipeline::DepthState::GetNoTestsState() { DepthState ret = {}; - ret.depth_test = DepthFunc::Always; + ret.depth_test = ComparisonFunc::Always; return ret; } GPUPipeline::DepthState GPUPipeline::DepthState::GetAlwaysWriteState() { DepthState ret = {}; - ret.depth_test = DepthFunc::Always; + ret.depth_test = ComparisonFunc::Always; ret.depth_write = true; return ret; } @@ -815,6 +815,16 @@ void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) SetScissor(GSVector4i(x, y, x + width, y + height)); } +void GPUDevice::SetStencilRef(u8 value) /*= 0*/ +{ + Panic("FIXME"); +} + +void GPUDevice::ClearStencil(GPUTexture* t, u8 value) /*= 0*/ +{ + Panic("FIXME"); +} + void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) { SetViewportAndScissor(GSVector4i(x, y, x + width, y + height)); diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 5ed91d7c1..cf3999d9f 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -289,7 +289,7 @@ public: MaxCount }; - enum class DepthFunc : u8 + enum class ComparisonFunc : u8 { Never, Always, @@ -302,6 +302,20 @@ public: MaxCount }; + enum class StencilOp : u8 + { + Keep, + Zero, + Replace, + IncrSat, + DecrSat, + Invert, + Incr, + Decr, + + MaxCount + }; + enum class BlendFunc : u8 { Zero, @@ -353,9 +367,19 @@ public: union DepthState { - BitField depth_test; - BitField depth_write; - u8 key; + BitField depth_test; + BitField depth_write; + + BitField stencil_enable; + BitField front_stencil_fail_op; + BitField front_stencil_depth_fail_op; + BitField front_stencil_pass_op; + BitField front_stencil_func; + BitField back_stencil_fail_op; + BitField back_stencil_depth_fail_op; + BitField back_stencil_pass_op; + BitField back_stencil_func; + u32 key; // clang-format off ALWAYS_INLINE DepthState() = default; @@ -366,6 +390,13 @@ public: ALWAYS_INLINE bool operator<(const DepthState& rhs) const { return key < rhs.key; } // clang-format on + ALWAYS_INLINE bool DepthMatches(const DepthState& ds) const { return ((key & 0x0Fu) == (ds.key & 0x0Fu)); } + ALWAYS_INLINE bool StencilMatches(const DepthState& ds) const + { + return ((key & 0x1FFFFFF0u) == (ds.key & 0x1FFFFFF0u)); + } + ALWAYS_INLINE bool FrontAndBackStencilAreSame() const { return ((key >> 6) & 0xFFFu) == ((key >> 18) & 0xFFFu); } + static DepthState GetNoTestsState(); static DepthState GetAlwaysWriteState(); }; @@ -417,11 +448,11 @@ public: struct GraphicsConfig { - Layout layout; - - Primitive primitive; InputLayout input_layout; + Layout layout; + Primitive primitive; + RasterizationState rasterization; DepthState depth; BlendState blend; @@ -773,6 +804,7 @@ public: GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) = 0; virtual void ClearRenderTarget(GPUTexture* t, u32 c); virtual void ClearDepth(GPUTexture* t, float d); + virtual void ClearStencil(GPUTexture* t, u8 value) = 0; virtual void InvalidateRenderTarget(GPUTexture* t); /// Shader abstraction. @@ -826,6 +858,7 @@ public: virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0; virtual void SetViewport(const GSVector4i rc) = 0; virtual void SetScissor(const GSVector4i rc) = 0; + virtual void SetStencilRef(u8 value) = 0; void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr, GPUPipeline::RenderPassFlag flags = GPUPipeline::NoRenderPassFlags); void SetViewport(s32 x, s32 y, s32 width, s32 height); diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index 40e4be872..d905ead94 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -151,6 +151,7 @@ public: { return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil); } + ALWAYS_INLINE bool HasStencil() const { return IsDepthStencilFormat(m_format); } ALWAYS_INLINE const ClearValue& GetClearValue() const { return m_clear_value; } ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; } diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 9c43d886d..9a756b52f 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -192,6 +192,33 @@ void OpenGLDevice::ClearDepth(GPUTexture* t, float d) CommitDSClearInFB(static_cast(t)); } +void OpenGLDevice::ClearStencil(GPUTexture* t, u8 value) +{ + OpenGLTexture* T = static_cast(t); + DebugAssert(T->HasStencil()); + + glDisable(GL_SCISSOR_TEST); + + const GLint ivalue = value; + + if (m_current_depth_target == T) + { + glClearBufferiv(GL_STENCIL, 0, &ivalue); + } + else + { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, T->GetGLTarget(), T->GetGLId(), 0); + + glClearBufferiv(GL_STENCIL, 0, &ivalue); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); + } + + glEnable(GL_SCISSOR_TEST); +} + void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); @@ -708,7 +735,8 @@ GLuint OpenGLDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUT if (ds) { OpenGLTexture* const DS = static_cast(ds); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, DS->GetGLTarget(), DS->GetGLId(), 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, DS->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT, + DS->GetGLTarget(), DS->GetGLId(), 0); } glDrawBuffers(num_rts, s_draw_buffers.data()); @@ -1271,6 +1299,16 @@ void OpenGLDevice::SetScissor(const GSVector4i rc) UpdateScissor(); } +void OpenGLDevice::SetStencilRef(u8 value) +{ + if (m_last_stencil_ref == value) + return; + + m_last_stencil_ref = value; + if (m_last_depth_state.stencil_enable) + UpdateStencilFunc(); +} + void OpenGLDevice::UpdateViewport() { glViewport(m_last_viewport.left, m_last_viewport.top, m_last_viewport.width(), m_last_viewport.height()); diff --git a/src/util/opengl_device.h b/src/util/opengl_device.h index 68b5352c3..1508baf81 100644 --- a/src/util/opengl_device.h +++ b/src/util/opengl_device.h @@ -74,6 +74,7 @@ public: u32 src_x, u32 src_y, u32 width, u32 height) override; void ClearRenderTarget(GPUTexture* t, u32 c) override; void ClearDepth(GPUTexture* t, float d) override; + void ClearStencil(GPUTexture* t, u8 value) override; void InvalidateRenderTarget(GPUTexture* t) override; std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data, @@ -105,6 +106,7 @@ public: void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetViewport(const GSVector4i rc) override; void SetScissor(const GSVector4i rc) override; + void SetStencilRef(u8 value) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -173,6 +175,7 @@ private: void UpdateViewport(); void UpdateScissor(); + void UpdateStencilFunc(); void CreateTimestampQueries(); void DestroyTimestampQueries(); @@ -206,8 +209,9 @@ private: // VAO cache - fixed max as key OpenGLPipeline::VertexArrayCache::const_iterator m_last_vao = m_vao_cache.cend(); GPUPipeline::BlendState m_last_blend_state = {}; - GPUPipeline::RasterizationState m_last_rasterization_state = {}; GPUPipeline::DepthState m_last_depth_state = {}; + GPUPipeline::RasterizationState m_last_rasterization_state = {}; + u8 m_last_stencil_ref = 0; GLuint m_uniform_buffer_alignment = 1; GLuint m_last_program = 0; u32 m_last_texture_unit = 0; diff --git a/src/util/opengl_pipeline.cpp b/src/util/opengl_pipeline.cpp index de0ed3523..4620e896e 100644 --- a/src/util/opengl_pipeline.cpp +++ b/src/util/opengl_pipeline.cpp @@ -686,9 +686,8 @@ ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyRasterizationState(GPUPipeline::Ra m_last_rasterization_state = rs; } -ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState ds) -{ - static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> func_mapping = {{ +static constexpr std::array(GPUPipeline::ComparisonFunc::MaxCount)> s_comparison_func_mapping = + {{ GL_NEVER, // Never GL_ALWAYS, // Always GL_LESS, // Less @@ -698,18 +697,82 @@ ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState GL_EQUAL, // Equal }}; +ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyDepthState(GPUPipeline::DepthState ds) +{ + static constexpr std::array(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = {{ + GL_KEEP, // Keep + GL_ZERO, // Zero + GL_REPLACE, // Replace + GL_INCR, // IncrSat + GL_DECR, // DecrSat + GL_INVERT, // Invert + GL_INCR_WRAP, // Incr + GL_DECR_WRAP, // Decr + }}; + if (m_last_depth_state == ds) return; - (ds.depth_test != GPUPipeline::DepthFunc::Always || ds.depth_write) ? glEnable(GL_DEPTH_TEST) : - glDisable(GL_DEPTH_TEST); - glDepthFunc(func_mapping[static_cast(ds.depth_test.GetValue())]); - if (m_last_depth_state.depth_write != ds.depth_write) - glDepthMask(ds.depth_write); + if (!m_last_depth_state.DepthMatches(ds)) + { + (ds.depth_test != GPUPipeline::ComparisonFunc::Always || ds.depth_write) ? glEnable(GL_DEPTH_TEST) : + glDisable(GL_DEPTH_TEST); + glDepthFunc(s_comparison_func_mapping[static_cast(ds.depth_test.GetValue())]); + if (m_last_depth_state.depth_write != ds.depth_write) + glDepthMask(ds.depth_write); + } + + if (!m_last_depth_state.StencilMatches(ds)) + { + if (m_last_depth_state.stencil_enable != ds.stencil_enable) + ds.stencil_enable ? glEnable(GL_STENCIL_TEST) : glDisable(GL_STENCIL_TEST); + + if (ds.FrontAndBackStencilAreSame()) + { + glStencilFuncSeparate(GL_FRONT_AND_BACK, + s_comparison_func_mapping[static_cast(ds.front_stencil_func.GetValue())], + m_last_stencil_ref, 0xFF); + glStencilOpSeparate(GL_FRONT_AND_BACK, stencil_op_mapping[static_cast(ds.front_stencil_fail_op.GetValue())], + stencil_op_mapping[static_cast(ds.front_stencil_depth_fail_op.GetValue())], + stencil_op_mapping[static_cast(ds.front_stencil_pass_op.GetValue())]); + } + else + { + glStencilFuncSeparate(GL_FRONT, s_comparison_func_mapping[static_cast(ds.front_stencil_func.GetValue())], + m_last_stencil_ref, 0xFF); + glStencilFuncSeparate(GL_BACK, s_comparison_func_mapping[static_cast(ds.back_stencil_func.GetValue())], + m_last_stencil_ref, 0xFF); + glStencilOpSeparate(GL_FRONT, stencil_op_mapping[static_cast(ds.front_stencil_fail_op.GetValue())], + stencil_op_mapping[static_cast(ds.front_stencil_depth_fail_op.GetValue())], + stencil_op_mapping[static_cast(ds.front_stencil_pass_op.GetValue())]); + glStencilOpSeparate(GL_BACK, stencil_op_mapping[static_cast(ds.back_stencil_fail_op.GetValue())], + stencil_op_mapping[static_cast(ds.back_stencil_depth_fail_op.GetValue())], + stencil_op_mapping[static_cast(ds.back_stencil_pass_op.GetValue())]); + } + } m_last_depth_state = ds; } +void OpenGLDevice::UpdateStencilFunc() +{ + if (m_last_depth_state.FrontAndBackStencilAreSame()) + { + glStencilFuncSeparate(GL_FRONT_AND_BACK, + s_comparison_func_mapping[static_cast(m_last_depth_state.front_stencil_func.GetValue())], + m_last_stencil_ref, 0xFF); + } + else + { + glStencilFuncSeparate(GL_FRONT, + s_comparison_func_mapping[static_cast(m_last_depth_state.front_stencil_func.GetValue())], + m_last_stencil_ref, 0xFF); + glStencilFuncSeparate(GL_BACK, + s_comparison_func_mapping[static_cast(m_last_depth_state.back_stencil_func.GetValue())], + m_last_stencil_ref, 0xFF); + } +} + ALWAYS_INLINE_RELEASE void OpenGLDevice::ApplyBlendState(GPUPipeline::BlendState bs) { static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp index 285466b16..fb1c44b38 100644 --- a/src/util/opengl_texture.cpp +++ b/src/util/opengl_texture.cpp @@ -587,7 +587,9 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); - const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; + const GLenum attachment = tex->IsDepthStencil() ? + (tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT) : + GL_COLOR_ATTACHMENT0; glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0); glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); @@ -612,7 +614,9 @@ void OpenGLDevice::CommitClear(OpenGLTexture* tex) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); - const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; + const GLenum attachment = tex->IsDepthStencil() ? + (tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT) : + GL_COLOR_ATTACHMENT0; glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0); if (tex->IsDepthStencil()) @@ -701,7 +705,7 @@ void OpenGLDevice::CommitDSClearInFB(OpenGLTexture* tex) { case GPUTexture::State::Invalidated: { - const GLenum attachment = GL_DEPTH_ATTACHMENT; + const GLenum attachment = tex->HasStencil() ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; if (glInvalidateFramebuffer) glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); tex->SetState(GPUTexture::State::Dirty); diff --git a/src/util/postprocessing.cpp b/src/util/postprocessing.cpp index a1f235f34..fb6f62a09 100644 --- a/src/util/postprocessing.cpp +++ b/src/util/postprocessing.cpp @@ -582,9 +582,9 @@ bool PostProcessing::Chain::CheckTargets(GPUTexture::Format target_format, u32 t GL_OBJECT_NAME(vso, "Post-processing rotate blit VS"); GL_OBJECT_NAME(vso, "Post-processing rotate blit FS"); - const GPUPipeline::GraphicsConfig config = {.layout = GPUPipeline::Layout::SingleTextureAndPushConstants, + const GPUPipeline::GraphicsConfig config = {.input_layout = {}, + .layout = GPUPipeline::Layout::SingleTextureAndPushConstants, .primitive = GPUPipeline::Primitive::Triangles, - .input_layout = {}, .rasterization = GPUPipeline::RasterizationState::GetNoCullState(), .depth = GPUPipeline::DepthState::GetNoTestsState(), .blend = GPUPipeline::BlendState::GetNoBlendingState(), diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index ace1138f9..b9526ef47 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -2317,8 +2317,8 @@ void VulkanDevice::EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u m_current_swap_chain = nullptr; VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); - VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, SC->GetCurrentImage(), GPUTexture::Type::RenderTarget, 0, 1, 0, - 1, VulkanTexture::Layout::ColorAttachment, + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, SC->GetCurrentImage(), GPUTexture::Type::RenderTarget, + SC->GetFormat(), 0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment, VulkanTexture::Layout::PresentSrc); EndAndSubmitCommandBuffer(SC, explicit_present); InvalidateCachedState(); @@ -2647,6 +2647,25 @@ void VulkanDevice::ClearDepth(GPUTexture* t, float d) } } +void VulkanDevice::ClearStencil(GPUTexture* t, u8 value) +{ + VulkanTexture* T = static_cast(t); + const VkClearDepthStencilValue clear_value = {0.0f, static_cast(value)}; + if (InRenderPass() && m_current_depth_target == T) + { + // Use an attachment clear so the render pass isn't restarted. + const VkClearAttachment ca = {VK_IMAGE_ASPECT_STENCIL_BIT, 0, {.depthStencil = clear_value}}; + const VkClearRect rc = {{{0, 0}, {T->GetWidth(), T->GetHeight()}}, 0u, 1u}; + vkCmdClearAttachments(m_current_command_buffer, 1, &ca, 1, &rc); + } + else + { + const VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_STENCIL_BIT, 0u, 1u, 0u, 1u}; + T->TransitionToLayout(VulkanTexture::Layout::ClearDst); + vkCmdClearDepthStencilImage(m_current_command_buffer, T->GetImage(), T->GetVkLayout(), &clear_value, 1, &srr); + } +} + void VulkanDevice::InvalidateRenderTarget(GPUTexture* t) { GPUDevice::InvalidateRenderTarget(t); @@ -3012,11 +3031,13 @@ void VulkanDevice::RenderBlankFrame(VulkanSwapChain* swap_chain) const VkImage image = swap_chain->GetCurrentImage(); static constexpr VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; static constexpr VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 1.0f}}; - VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, - VulkanTexture::Layout::Undefined, VulkanTexture::Layout::TransferDst); + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, swap_chain->GetFormat(), + 0, 1, 0, 1, VulkanTexture::Layout::Undefined, + VulkanTexture::Layout::TransferDst); vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &srr); - VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, - VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc); + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, swap_chain->GetFormat(), + 0, 1, 0, 1, VulkanTexture::Layout::TransferDst, + VulkanTexture::Layout::PresentSrc); EndAndSubmitCommandBuffer(swap_chain, false); @@ -3205,7 +3226,7 @@ void VulkanDevice::BeginRenderPass() VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr}; std::array attachments; - VkRenderingAttachmentInfoKHR depth_attachment; + VkRenderingAttachmentInfoKHR depth_attachment, stencil_attachment; if (m_num_current_render_targets > 0 || m_current_depth_target) { @@ -3276,6 +3297,20 @@ void VulkanDevice::BeginRenderPass() depth_attachment.clearValue.depthStencil = {ds->GetClearDepth(), 0u}; ds->SetState(GPUTexture::State::Dirty); + + if (ds->HasStencil()) + { + stencil_attachment.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; + stencil_attachment.pNext = nullptr; + stencil_attachment.imageView = ds->GetView(); + stencil_attachment.imageLayout = ds->GetVkLayout(); + stencil_attachment.resolveMode = VK_RESOLVE_MODE_NONE_KHR; + stencil_attachment.resolveImageView = VK_NULL_HANDLE; + stencil_attachment.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + ri.pStencilAttachment = &stencil_attachment; + } } const VulkanTexture* const rt_or_ds = @@ -3372,7 +3407,15 @@ void VulkanDevice::BeginRenderPass() // If this is a new command buffer, bind the pipeline and such. if (m_dirty_flags & DIRTY_FLAG_INITIAL) + { SetInitialPipelineState(); + } + else if (m_current_depth_target && m_current_depth_target->IsDepthStencil()) + { + // Stencil reference still needs to be set. + vkCmdSetStencilReference(GetCurrentCommandBuffer(), VK_STENCIL_FACE_FRONT_AND_BACK, + ZeroExtend32(m_current_stencil_ref)); + } } void VulkanDevice::BeginSwapChainRenderPass(VulkanSwapChain* swap_chain, u32 clear_color) @@ -3383,8 +3426,8 @@ void VulkanDevice::BeginSwapChainRenderPass(VulkanSwapChain* swap_chain, u32 cle const VkImage swap_chain_image = swap_chain->GetCurrentImage(); // Swap chain images start in undefined - VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, - VulkanTexture::Layout::Undefined, + VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget, + swap_chain->GetFormat(), 0, 1, 0, 1, VulkanTexture::Layout::Undefined, VulkanTexture::Layout::ColorAttachment); // All textures should be in shader read only optimal already, but just in case.. @@ -3563,6 +3606,12 @@ void VulkanDevice::SetInitialPipelineState() const VkRect2D vrc = {{m_current_scissor.left, m_current_scissor.top}, {static_cast(m_current_scissor.width()), static_cast(m_current_scissor.height())}}; vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc); + + if (m_current_depth_target && m_current_depth_target->IsDepthStencil()) + { + vkCmdSetStencilReference(GetCurrentCommandBuffer(), VK_STENCIL_FACE_FRONT_AND_BACK, + ZeroExtend32(m_current_stencil_ref)); + } } void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) @@ -3667,6 +3716,20 @@ void VulkanDevice::SetViewport(const GSVector4i rc) vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp); } +void VulkanDevice::SetStencilRef(u8 value) +{ + if (m_current_stencil_ref == value) + return; + + m_current_stencil_ref = value; + + // if current DS does not have a stencil component, then dynamic stencil state will not be enabled + if (!InRenderPass() || !m_current_depth_target || !m_current_depth_target->IsDepthStencil()) + return; + + vkCmdSetStencilReference(m_current_command_buffer, VK_STENCIL_FACE_FRONT_AND_BACK, ZeroExtend32(value)); +} + void VulkanDevice::SetScissor(const GSVector4i rc) { if (m_current_scissor.eq(rc)) diff --git a/src/util/vulkan_device.h b/src/util/vulkan_device.h index a7a9f78ba..1b424dac4 100644 --- a/src/util/vulkan_device.h +++ b/src/util/vulkan_device.h @@ -108,6 +108,7 @@ public: u32 src_x, u32 src_y, u32 width, u32 height) override; void ClearRenderTarget(GPUTexture* t, u32 c) override; void ClearDepth(GPUTexture* t, float d) override; + void ClearStencil(GPUTexture* t, u8 value) override; void InvalidateRenderTarget(GPUTexture* t) override; std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, std::span data, @@ -139,6 +140,7 @@ public: void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetViewport(const GSVector4i rc) override; void SetScissor(const GSVector4i rc) override; + void SetStencilRef(u8 value) override; void Draw(u32 vertex_count, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; @@ -471,6 +473,7 @@ private: VulkanPipeline* m_current_pipeline = nullptr; GPUPipeline::Layout m_current_pipeline_layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + u8 m_current_stencil_ref = 0; std::array m_current_textures = {}; std::array m_current_samplers = {}; diff --git a/src/util/vulkan_pipeline.cpp b/src/util/vulkan_pipeline.cpp index 7a6c76b87..c4dbb57e8 100644 --- a/src/util/vulkan_pipeline.cpp +++ b/src/util/vulkan_pipeline.cpp @@ -150,7 +150,7 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra VK_CULL_MODE_BACK_BIT, // Back }}; - static constexpr std::array(GPUPipeline::DepthFunc::MaxCount)> compare_mapping = {{ + static constexpr std::array(GPUPipeline::ComparisonFunc::MaxCount)> compare_mapping = {{ VK_COMPARE_OP_NEVER, // Never VK_COMPARE_OP_ALWAYS, // Always VK_COMPARE_OP_LESS, // Less @@ -160,6 +160,17 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra VK_COMPARE_OP_EQUAL, // Equal }}; + static constexpr std::array(GPUPipeline::StencilOp::MaxCount)> stencil_op_mapping = {{ + VK_STENCIL_OP_KEEP, // Keep + VK_STENCIL_OP_ZERO, // Zero + VK_STENCIL_OP_REPLACE, // Replace + VK_STENCIL_OP_INCREMENT_AND_CLAMP, // IncrSat + VK_STENCIL_OP_DECREMENT_AND_CLAMP, // DecrSat + VK_STENCIL_OP_INVERT, // Invert + VK_STENCIL_OP_INCREMENT_AND_WRAP, // Incr + VK_STENCIL_OP_DECREMENT_AND_WRAP, // Decr + }}; + static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ VK_BLEND_FACTOR_ZERO, // Zero VK_BLEND_FACTOR_ONE, // One @@ -215,9 +226,35 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra VK_FRONT_FACE_CLOCKWISE); if (config.samples > 1) gpb.SetMultisamples(config.samples, config.per_sample_shading); - gpb.SetDepthState(config.depth.depth_test != GPUPipeline::DepthFunc::Always || config.depth.depth_write, + gpb.SetDepthState(config.depth.depth_test != GPUPipeline::ComparisonFunc::Always || config.depth.depth_write, config.depth.depth_write, compare_mapping[static_cast(config.depth.depth_test.GetValue())]); - gpb.SetNoStencilState(); + + if (config.depth.stencil_enable) + { + const VkStencilOpState front = { + .failOp = stencil_op_mapping[static_cast(config.depth.front_stencil_fail_op.GetValue())], + .passOp = stencil_op_mapping[static_cast(config.depth.front_stencil_pass_op.GetValue())], + .depthFailOp = stencil_op_mapping[static_cast(config.depth.front_stencil_depth_fail_op.GetValue())], + .compareOp = compare_mapping[static_cast(config.depth.front_stencil_func.GetValue())], + .compareMask = 0xFFu, + .writeMask = 0xFFu, + .reference = 0x00u, + }; + const VkStencilOpState back = { + .failOp = stencil_op_mapping[static_cast(config.depth.back_stencil_fail_op.GetValue())], + .passOp = stencil_op_mapping[static_cast(config.depth.back_stencil_pass_op.GetValue())], + .depthFailOp = stencil_op_mapping[static_cast(config.depth.back_stencil_depth_fail_op.GetValue())], + .compareOp = compare_mapping[static_cast(config.depth.back_stencil_func.GetValue())], + .compareMask = 0xFFu, + .writeMask = 0xFFu, + .reference = 0x00u, + }; + gpb.SetStencilState(true, front, back); + } + else + { + gpb.SetNoStencilState(); + } for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) { @@ -239,6 +276,9 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra gpb.AddDynamicState(VK_DYNAMIC_STATE_VIEWPORT); gpb.AddDynamicState(VK_DYNAMIC_STATE_SCISSOR); + if (GPUTexture::IsDepthStencilFormat(config.depth_format)) + gpb.AddDynamicState(VK_DYNAMIC_STATE_STENCIL_REFERENCE); + gpb.SetPipelineLayout(m_pipeline_layouts[static_cast(GetPipelineLayoutType(config.render_pass_flags))] [static_cast(config.layout)]); @@ -258,8 +298,9 @@ std::unique_ptr VulkanDevice::CreatePipeline(const GPUPipeline::Gra if (config.depth_format != GPUTexture::Format::Unknown) { - gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast(config.depth_format)], - VK_FORMAT_UNDEFINED); + const VkFormat vk_format = VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast(config.depth_format)]; + gpb.SetDynamicRenderingDepthAttachment( + vk_format, GPUTexture::IsDepthStencilFormat(config.depth_format) ? vk_format : VK_FORMAT_UNDEFINED); } if (config.render_pass_flags & GPUPipeline::ColorFeedbackLoop) diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index 4e3accaab..6afee6e7c 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -116,7 +116,9 @@ std::unique_ptr VulkanTexture::Create(u32 width, u32 height, u32 DebugAssert(levels == 1); ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + vci.subresourceRange.aspectMask = IsDepthStencilFormat(format) ? + (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : + VK_IMAGE_ASPECT_STENCIL_BIT; } break; @@ -490,19 +492,19 @@ void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffe u32 start_level, u32 num_levels, Layout old_layout, Layout new_layout) { - TransitionSubresourcesToLayout(command_buffer, m_image, m_type, start_layer, num_layers, start_level, num_levels, - old_layout, new_layout); + TransitionSubresourcesToLayout(command_buffer, m_image, m_type, m_format, start_layer, num_layers, start_level, + num_levels, old_layout, new_layout); } void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, - u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels, - Layout old_layout, Layout new_layout) + Format format, u32 start_layer, u32 num_layers, u32 start_level, + u32 num_levels, Layout old_layout, Layout new_layout) { VkImageAspectFlags aspect; if (type == Type::DepthStencil) { - // TODO: detect stencil - aspect = VK_IMAGE_ASPECT_DEPTH_BIT; + aspect = IsDepthStencilFormat(format) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : + VK_IMAGE_ASPECT_DEPTH_BIT; } else { diff --git a/src/util/vulkan_texture.h b/src/util/vulkan_texture.h index f1c83ccec..18b714c7d 100644 --- a/src/util/vulkan_texture.h +++ b/src/util/vulkan_texture.h @@ -73,9 +73,9 @@ public: void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels, Layout old_layout, Layout new_layout); - static void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, u32 start_layer, - u32 num_layers, u32 start_level, u32 num_levels, Layout old_layout, - Layout new_layout); + static void TransitionSubresourcesToLayout(VkCommandBuffer command_buffer, VkImage image, Type type, Format format, + u32 start_layer, u32 num_layers, u32 start_level, u32 num_levels, + Layout old_layout, Layout new_layout); // Call when the texture is bound to the pipeline, or read from in a copy. ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }