GPUDevice: Add support for feedback loops

This commit is contained in:
Stenzek 2024-03-08 17:55:02 +10:00
parent 2c19f01a28
commit 65669b3250
No known key found for this signature in database
23 changed files with 426 additions and 191 deletions

View File

@ -1615,10 +1615,11 @@ bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_sm
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.geometry_shader = nullptr;
plconfig.depth_format = GPUTexture::Format::Unknown; plconfig.depth_format = GPUTexture::Format::Unknown;
plconfig.samples = 1; plconfig.samples = 1;
plconfig.per_sample_shading = false; plconfig.per_sample_shading = false;
plconfig.geometry_shader = nullptr; plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
if (display) if (display)
{ {
@ -2707,10 +2708,10 @@ void GPU::GetStatsString(SmallStringBase& str)
{ {
if (IsHardwareRenderer()) if (IsHardwareRenderer())
{ {
str.format("{} HW | {} P | {} DC | {} RP | {} RB | {} C | {} W", str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W",
GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives,
m_stats.host_num_draws, m_stats.host_num_render_passes, m_stats.num_reads, m_stats.num_copies, m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes, m_stats.num_reads,
m_stats.num_writes); m_stats.num_copies, m_stats.num_writes);
} }
else else
{ {
@ -2753,6 +2754,7 @@ void GPU::UpdateStatistics(u32 frame_count)
UPDATE_GPU_STAT(buffer_streamed); UPDATE_GPU_STAT(buffer_streamed);
UPDATE_GPU_STAT(num_draws); UPDATE_GPU_STAT(num_draws);
UPDATE_GPU_STAT(num_barriers);
UPDATE_GPU_STAT(num_render_passes); UPDATE_GPU_STAT(num_render_passes);
UPDATE_GPU_STAT(num_copies); UPDATE_GPU_STAT(num_copies);
UPDATE_GPU_STAT(num_downloads); UPDATE_GPU_STAT(num_downloads);

View File

@ -625,6 +625,7 @@ protected:
{ {
size_t host_buffer_streamed; size_t host_buffer_streamed;
u32 host_num_draws; u32 host_num_draws;
u32 host_num_barriers;
u32 host_num_render_passes; u32 host_num_render_passes;
u32 host_num_copies; u32 host_num_copies;
u32 host_num_downloads; u32 host_num_downloads;

View File

@ -819,10 +819,11 @@ bool GPU_HW::CompilePipelines()
plconfig.input_layout.vertex_stride = sizeof(BatchVertex); plconfig.input_layout.vertex_stride = sizeof(BatchVertex);
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.geometry_shader = nullptr;
plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT); plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT);
plconfig.samples = m_multisamples; plconfig.samples = m_multisamples;
plconfig.per_sample_shading = m_per_sample_shading; plconfig.per_sample_shading = m_per_sample_shading;
plconfig.geometry_shader = nullptr; plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
for (u8 depth_test = 0; depth_test < 3; depth_test++) for (u8 depth_test = 0; depth_test < 3; depth_test++)

View File

@ -184,6 +184,7 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.texture_copy_to_self = false; m_features.texture_copy_to_self = false;
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.texture_buffers_emulated_with_ssbo = false; m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.partial_msaa_resolve = false; m_features.partial_msaa_resolve = false;
m_features.memory_import = false; m_features.memory_import = false;
@ -909,9 +910,10 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants); m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
} }
void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, GPUPipeline::RenderPassFlag feedback_loop)
{ {
ID3D11RenderTargetView* rtvs[MAX_RENDER_TARGETS]; ID3D11RenderTargetView* rtvs[MAX_RENDER_TARGETS];
DebugAssert(!feedback_loop);
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
m_current_depth_target = static_cast<D3D11Texture*>(ds); m_current_depth_target = static_cast<D3D11Texture*>(ds);
@ -1057,3 +1059,8 @@ void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
s_stats.num_draws++; s_stats.num_draws++;
m_context->DrawIndexed(index_count, base_index, base_vertex); m_context->DrawIndexed(index_count, base_index, base_vertex);
} }
void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
{
Panic("Barriers are not supported");
}

View File

@ -84,7 +84,8 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override; void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override; void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override;
void SetPipeline(GPUPipeline* pipeline) override; void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -92,6 +93,7 @@ public:
void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void SetScissor(s32 x, s32 y, s32 width, s32 height) override;
void Draw(u32 vertex_count, u32 base_vertex) override; void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
bool GetHostRefreshRate(float* refresh_rate) override; bool GetHostRefreshRate(float* refresh_rate) override;

View File

@ -1190,6 +1190,7 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features)
/*!(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF)*/ false; // TODO: Support with Enhanced Barriers /*!(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF)*/ false; // TODO: Support with Enhanced Barriers
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.texture_buffers_emulated_with_ssbo = false; m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.partial_msaa_resolve = true; m_features.partial_msaa_resolve = true;
m_features.memory_import = false; m_features.memory_import = false;
@ -1548,8 +1549,10 @@ void D3D12Device::DestroyRootSignatures()
it->Reset(); it->Reset();
} }
void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop)
{ {
DebugAssert(!feedback_loop);
if (InRenderPass()) if (InRenderPass())
EndRenderPass(); EndRenderPass();
@ -2140,3 +2143,8 @@ void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
s_stats.num_draws++; s_stats.num_draws++;
GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0); GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0);
} }
void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
{
Panic("Barriers are not supported");
}

View File

@ -107,7 +107,8 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override; void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override; void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override;
void SetPipeline(GPUPipeline* pipeline) override; void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -115,6 +116,7 @@ public:
void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void SetScissor(s32 x, s32 y, s32 width, s32 height) override;
void Draw(u32 vertex_count, u32 base_vertex) override; void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
bool SetGPUTimingEnabled(bool enabled) override; bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override; float GetAndResetAccumulatedGPUTime() override;

View File

@ -492,6 +492,7 @@ bool GPUDevice::CreateResources()
plconfig.SetTargetFormats(HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8); plconfig.SetTargetFormats(HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8);
plconfig.samples = 1; plconfig.samples = 1;
plconfig.per_sample_shading = false; plconfig.per_sample_shading = false;
plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
plconfig.vertex_shader = imgui_vs.get(); plconfig.vertex_shader = imgui_vs.get();
plconfig.geometry_shader = nullptr; plconfig.geometry_shader = nullptr;
plconfig.fragment_shader = imgui_fs.get(); plconfig.fragment_shader = imgui_fs.get();
@ -615,9 +616,9 @@ void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size)
UnmapUniformBuffer(data_size); UnmapUniformBuffer(data_size);
} }
void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds /*= nullptr*/) void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds, GPUPipeline::RenderPassFlag render_pass_flags)
{ {
SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds); SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds, render_pass_flags);
} }
void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height)

View File

@ -133,6 +133,13 @@ public:
MaxCount MaxCount
}; };
enum RenderPassFlag : u8
{
NoRenderPassFlags = 0,
ColorFeedbackLoop = (1 << 0),
SampleDepthBuffer = (1 << 1),
};
enum class Primitive : u8 enum class Primitive : u8
{ {
Points, Points,
@ -369,8 +376,9 @@ public:
GPUTexture::Format color_formats[4]; GPUTexture::Format color_formats[4];
GPUTexture::Format depth_format; GPUTexture::Format depth_format;
u32 samples; u8 samples;
bool per_sample_shading; bool per_sample_shading;
RenderPassFlag render_pass_flags;
void SetTargetFormats(GPUTexture::Format color_format, void SetTargetFormats(GPUTexture::Format color_format,
GPUTexture::Format depth_format_ = GPUTexture::Format::Unknown); GPUTexture::Format depth_format_ = GPUTexture::Format::Unknown);
@ -425,11 +433,19 @@ public:
enum FeatureMask : u32 enum FeatureMask : u32
{ {
FEATURE_MASK_DUAL_SOURCE_BLEND = (1 << 0), FEATURE_MASK_DUAL_SOURCE_BLEND = (1 << 0),
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 1), FEATURE_MASK_FEEDBACK_LOOPS = (1 << 1),
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 2), FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 3), FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 4), FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
FEATURE_MASK_MEMORY_IMPORT = (1 << 5), FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
};
enum class DrawBarrier : u32
{
None,
One,
Full
}; };
struct Features struct Features
@ -441,6 +457,7 @@ public:
bool texture_copy_to_self : 1; bool texture_copy_to_self : 1;
bool supports_texture_buffers : 1; bool supports_texture_buffers : 1;
bool texture_buffers_emulated_with_ssbo : 1; bool texture_buffers_emulated_with_ssbo : 1;
bool feedback_loops : 1;
bool geometry_shaders : 1; bool geometry_shaders : 1;
bool partial_msaa_resolve : 1; bool partial_msaa_resolve : 1;
bool memory_import : 1; bool memory_import : 1;
@ -454,6 +471,7 @@ public:
{ {
size_t buffer_streamed; size_t buffer_streamed;
u32 num_draws; u32 num_draws;
u32 num_barriers;
u32 num_render_passes; u32 num_render_passes;
u32 num_copies; u32 num_copies;
u32 num_downloads; u32 num_downloads;
@ -616,18 +634,21 @@ public:
void UploadUniformBuffer(const void* data, u32 data_size); void UploadUniformBuffer(const void* data, u32 data_size);
/// Drawing setup abstraction. /// Drawing setup abstraction.
virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) = 0; virtual void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags) = 0;
virtual void SetPipeline(GPUPipeline* pipeline) = 0; virtual void SetPipeline(GPUPipeline* pipeline) = 0;
virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0; virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) = 0;
virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0; virtual void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) = 0;
virtual void SetViewport(s32 x, s32 y, s32 width, s32 height) = 0; // TODO: Rectangle virtual void SetViewport(s32 x, s32 y, s32 width, s32 height) = 0; // TODO: Rectangle
virtual void SetScissor(s32 x, s32 y, s32 width, s32 height) = 0; virtual void SetScissor(s32 x, s32 y, s32 width, s32 height) = 0;
void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr); void SetRenderTarget(GPUTexture* rt, GPUTexture* ds = nullptr,
GPUPipeline::RenderPassFlag render_pass_flags = GPUPipeline::NoRenderPassFlags);
void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height); void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height);
// Drawing abstraction. // Drawing abstraction.
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0; virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0; virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
/// Returns false if the window was completely occluded. /// Returns false if the window was completely occluded.
virtual bool BeginPresent(bool skip_present) = 0; virtual bool BeginPresent(bool skip_present) = 0;

View File

@ -235,6 +235,7 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.texture_buffers_emulated_with_ssbo = true; m_features.texture_buffers_emulated_with_ssbo = true;
m_features.feedback_loops = false;
m_features.geometry_shaders = false; m_features.geometry_shaders = false;
m_features.partial_msaa_resolve = false; m_features.partial_msaa_resolve = false;
m_features.memory_import = true; m_features.memory_import = true;

View File

@ -403,8 +403,9 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
!(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && (max_dual_source_draw_buffers > 0) && !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && (max_dual_source_draw_buffers > 0) &&
(GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended || GLAD_GL_EXT_blend_func_extended); (GLAD_GL_VERSION_3_3 || GLAD_GL_ARB_blend_func_extended || GLAD_GL_EXT_blend_func_extended);
m_features.framebuffer_fetch = !(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && m_features.framebuffer_fetch =
(GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch); !(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) &&
(GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch);
#ifdef __APPLE__ #ifdef __APPLE__
// Partial texture buffer uploads appear to be broken in macOS's OpenGL driver. // Partial texture buffer uploads appear to be broken in macOS's OpenGL driver.
@ -469,6 +470,8 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
// So, blit from the shadow texture, like in the other renderers. // So, blit from the shadow texture, like in the other renderers.
m_features.texture_copy_to_self = !vendor_id_arm && !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); m_features.texture_copy_to_self = !vendor_id_arm && !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.feedback_loops = m_features.framebuffer_fetch;
m_features.geometry_shaders = m_features.geometry_shaders =
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2); !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
@ -1035,6 +1038,11 @@ void OpenGLDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
glDrawElements(m_current_pipeline->GetTopology(), index_count, GL_UNSIGNED_SHORT, indices); glDrawElements(m_current_pipeline->GetTopology(), index_count, GL_UNSIGNED_SHORT, indices);
} }
void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
{
Panic("Barriers are not supported");
}
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
u32* map_base_vertex) u32* map_base_vertex)
{ {
@ -1088,8 +1096,9 @@ void OpenGLDevice::UnmapUniformBuffer(u32 size)
glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), pos, size); glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_uniform_buffer->GetGLBufferId(), pos, size);
} }
void OpenGLDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) void OpenGLDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, GPUPipeline::RenderPassFlag feedback_loop)
{ {
//DebugAssert(!feedback_loop); TODO
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds);
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false; bool needs_rt_clear = false;

View File

@ -89,7 +89,8 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override; void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override; void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override;
void SetPipeline(GPUPipeline* pipeline) override; void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -97,6 +98,7 @@ public:
void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void SetScissor(s32 x, s32 y, s32 width, s32 height) override;
void Draw(u32 vertex_count, u32 base_vertex) override; void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void SetSyncMode(DisplaySyncMode mode) override; void SetSyncMode(DisplaySyncMode mode) override;

View File

@ -1222,6 +1222,7 @@ bool PostProcessing::ReShadeFXShader::CompilePipeline(GPUTexture::Format format,
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.samples = 1; plconfig.samples = 1;
plconfig.per_sample_shading = false; plconfig.per_sample_shading = false;
plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
progress->PushState(); progress->PushState();

View File

@ -136,6 +136,7 @@ bool PostProcessing::GLSLShader::CompilePipeline(GPUTexture::Format format, u32
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.samples = 1; plconfig.samples = 1;
plconfig.per_sample_shading = false; plconfig.per_sample_shading = false;
plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
plconfig.vertex_shader = vs.get(); plconfig.vertex_shader = vs.get();
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();
plconfig.geometry_shader = nullptr; plconfig.geometry_shader = nullptr;

View File

@ -505,7 +505,7 @@ void ShaderGen::DeclareFragmentEntryPoint(
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs, const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */, bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool depth_output /* = false */,
bool msaa /* = false */, bool ssaa /* = false */, bool declare_sample_id /* = false */, bool msaa /* = false */, bool ssaa /* = false */, bool declare_sample_id /* = false */,
bool noperspective_color /* = false */, bool framebuffer_fetch /* = false */) bool noperspective_color /* = false */, bool feedback_loop /* = false */)
{ {
if (m_glsl) if (m_glsl)
{ {
@ -560,21 +560,32 @@ void ShaderGen::DeclareFragmentEntryPoint(
ss << "#define o_depth gl_FragDepth\n"; ss << "#define o_depth gl_FragDepth\n";
const char* target_0_qualifier = "out"; const char* target_0_qualifier = "out";
#ifdef ENABLE_OPENGL
if ((m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) && m_supports_framebuffer_fetch && if (feedback_loop)
framebuffer_fetch)
{ {
if (GLAD_GL_EXT_shader_framebuffer_fetch) #ifdef ENABLE_OPENGL
if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES)
{ {
target_0_qualifier = "inout"; Assert(m_supports_framebuffer_fetch);
ss << "#define LAST_FRAG_COLOR o_col0\n"; if (GLAD_GL_EXT_shader_framebuffer_fetch)
{
target_0_qualifier = "inout";
ss << "#define LAST_FRAG_COLOR o_col0\n";
}
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
{
ss << "#define LAST_FRAG_COLOR gl_LastFragColorARM\n";
}
} }
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
{
ss << "#define LAST_FRAG_COLOR gl_LastFragColorARM\n";
}
}
#endif #endif
#ifdef ENABLE_VULKAN
if (m_render_api == RenderAPI::Vulkan)
{
ss << "layout(input_attachment_index = 0, set = 2, binding = 0) uniform subpassInput u_input_rt;\n";
ss << "#define LAST_FRAG_COLOR subpassLoad(u_input_rt)\n";
}
#endif
}
if (m_use_glsl_binding_layout) if (m_use_glsl_binding_layout)
{ {

View File

@ -53,7 +53,7 @@ protected:
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs, const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false, bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false,
bool msaa = false, bool ssaa = false, bool declare_sample_id = false, bool msaa = false, bool ssaa = false, bool declare_sample_id = false,
bool noperspective_color = false, bool framebuffer_fetch = false); bool noperspective_color = false, bool feedback_loop = false);
RenderAPI m_render_api; RenderAPI m_render_api;
bool m_glsl; bool m_glsl;

View File

@ -267,6 +267,9 @@ void Vulkan::GraphicsPipelineBuilder::Clear()
m_rendering = {}; m_rendering = {};
m_rendering.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR; m_rendering.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR;
m_rendering_input_attachment_locations = {};
m_rendering_input_attachment_locations.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR;
// set defaults // set defaults
SetNoCullRasterizationState(); SetNoCullRasterizationState();
SetNoDepthTestState(); SetNoDepthTestState();
@ -595,6 +598,19 @@ void Vulkan::GraphicsPipelineBuilder::SetDynamicRenderingDepthAttachment(VkForma
m_rendering.stencilAttachmentFormat = stencil_format; m_rendering.stencilAttachmentFormat = stencil_format;
} }
void Vulkan::GraphicsPipelineBuilder::AddDynamicRenderingInputAttachment(u32 color_attachment_index)
{
AddPointerToChain(&m_ci, &m_rendering_input_attachment_locations);
DebugAssert(color_attachment_index < m_rendering.colorAttachmentCount);
DebugAssert(m_rendering_input_attachment_locations.colorAttachmentCount < MAX_INPUT_ATTACHMENTS);
m_rendering_input_attachment_locations.pColorAttachmentLocations = m_rendering_input_attachment_indices.data();
m_rendering_input_attachment_indices[m_rendering_input_attachment_locations.colorAttachmentCount] =
color_attachment_index;
m_rendering_input_attachment_locations.colorAttachmentCount++;
}
Vulkan::ComputePipelineBuilder::ComputePipelineBuilder() Vulkan::ComputePipelineBuilder::ComputePipelineBuilder()
{ {
Clear(); Clear();

View File

@ -81,6 +81,7 @@ public:
MAX_VERTEX_ATTRIBUTES = 16, MAX_VERTEX_ATTRIBUTES = 16,
MAX_VERTEX_BUFFERS = 8, MAX_VERTEX_BUFFERS = 8,
MAX_ATTACHMENTS = GPUDevice::MAX_RENDER_TARGETS + 1, MAX_ATTACHMENTS = GPUDevice::MAX_RENDER_TARGETS + 1,
MAX_INPUT_ATTACHMENTS = 1,
MAX_DYNAMIC_STATE = 8 MAX_DYNAMIC_STATE = 8
}; };
@ -144,6 +145,7 @@ public:
void SetDynamicRendering(); void SetDynamicRendering();
void AddDynamicRenderingColorAttachment(VkFormat format); void AddDynamicRenderingColorAttachment(VkFormat format);
void SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format); void SetDynamicRenderingDepthAttachment(VkFormat depth_format, VkFormat stencil_format);
void AddDynamicRenderingInputAttachment(u32 color_attachment_index);
private: private:
VkGraphicsPipelineCreateInfo m_ci; VkGraphicsPipelineCreateInfo m_ci;
@ -174,7 +176,9 @@ private:
VkPipelineRasterizationLineStateCreateInfoEXT m_line_rasterization_state; VkPipelineRasterizationLineStateCreateInfoEXT m_line_rasterization_state;
VkPipelineRenderingCreateInfoKHR m_rendering; VkPipelineRenderingCreateInfoKHR m_rendering;
VkRenderingAttachmentLocationInfoKHR m_rendering_input_attachment_locations;
std::array<VkFormat, MAX_ATTACHMENTS> m_rendering_color_formats; std::array<VkFormat, MAX_ATTACHMENTS> m_rendering_color_formats;
std::array<u32, MAX_INPUT_ATTACHMENTS> m_rendering_input_attachment_indices;
}; };
class ComputePipelineBuilder class ComputePipelineBuilder

View File

@ -53,6 +53,7 @@ enum : u32
{ {
MAX_DRAW_CALLS_PER_FRAME = 2048, MAX_DRAW_CALLS_PER_FRAME = 2048,
MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = GPUDevice::MAX_TEXTURE_SAMPLERS * MAX_DRAW_CALLS_PER_FRAME, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = GPUDevice::MAX_TEXTURE_SAMPLERS * MAX_DRAW_CALLS_PER_FRAME,
MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
MAX_SAMPLER_DESCRIPTORS = 8192, MAX_SAMPLER_DESCRIPTORS = 8192,
@ -380,8 +381,6 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en
m_optional_extensions.vk_ext_rasterization_order_attachment_access = m_optional_extensions.vk_ext_rasterization_order_attachment_access =
SupportsExtension(VK_EXT_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false) || SupportsExtension(VK_EXT_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false) ||
SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false); SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false);
m_optional_extensions.vk_ext_attachment_feedback_loop_layout =
SupportsExtension(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_get_memory_requirements2 = m_optional_extensions.vk_khr_get_memory_requirements2 =
SupportsExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, false); SupportsExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_bind_memory2 = SupportsExtension(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, false); m_optional_extensions.vk_khr_bind_memory2 = SupportsExtension(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, false);
@ -392,6 +391,9 @@ bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool en
SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false) && SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false) &&
SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) && SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) &&
SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false); SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_dynamic_rendering_local_read =
m_optional_extensions.vk_khr_dynamic_rendering &&
SupportsExtension(VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false); m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
m_optional_extensions.vk_ext_external_memory_host = m_optional_extensions.vk_ext_external_memory_host =
SupportsExtension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, false); SupportsExtension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, false);
@ -538,17 +540,19 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_TRUE, VK_FALSE, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_TRUE, VK_FALSE,
VK_FALSE}; VK_FALSE};
VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_TRUE};
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE}; VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE};
VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_TRUE};
if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature); Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature);
if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout)
Vulkan::AddPointerToChain(&device_info, &attachment_feedback_loop_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering) if (m_optional_extensions.vk_khr_dynamic_rendering)
{
Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature); Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering_local_read)
Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_local_read_feature);
}
VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device);
if (res != VK_SUCCESS) if (res != VK_SUCCESS)
@ -586,18 +590,20 @@ void VulkanDevice::ProcessDeviceExtensions()
VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE,
VK_FALSE}; VK_FALSE};
VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT attachment_feedback_loop_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, nullptr, VK_FALSE};
VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_FALSE}; VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_FALSE};
VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_FALSE};
// add in optional feature structs // add in optional feature structs
if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature); Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature);
if (m_optional_extensions.vk_ext_attachment_feedback_loop_layout)
Vulkan::AddPointerToChain(&features2, &attachment_feedback_loop_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering) if (m_optional_extensions.vk_khr_dynamic_rendering)
{
Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature); Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature);
if (m_optional_extensions.vk_khr_dynamic_rendering_local_read)
Vulkan::AddPointerToChain(&features2, &dynamic_rendering_local_read_feature);
}
// we might not have VK_KHR_get_physical_device_properties2... // we might not have VK_KHR_get_physical_device_properties2...
if (!vkGetPhysicalDeviceFeatures2 || !vkGetPhysicalDeviceProperties2 || !vkGetPhysicalDeviceMemoryProperties2) if (!vkGetPhysicalDeviceFeatures2 || !vkGetPhysicalDeviceProperties2 || !vkGetPhysicalDeviceMemoryProperties2)
@ -627,9 +633,9 @@ void VulkanDevice::ProcessDeviceExtensions()
// confirm we actually support it // confirm we actually support it
m_optional_extensions.vk_ext_rasterization_order_attachment_access &= m_optional_extensions.vk_ext_rasterization_order_attachment_access &=
(rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE); (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE);
m_optional_extensions.vk_ext_attachment_feedback_loop_layout &=
(attachment_feedback_loop_feature.attachmentFeedbackLoopLayout == VK_TRUE);
m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE); m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE);
m_optional_extensions.vk_khr_dynamic_rendering_local_read &=
(dynamic_rendering_local_read_feature.dynamicRenderingLocalRead == VK_TRUE);
VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}}; VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}};
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = { VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = {
@ -664,6 +670,7 @@ void VulkanDevice::ProcessDeviceExtensions()
if (m_optional_extensions.vk_khr_dynamic_rendering) if (m_optional_extensions.vk_khr_dynamic_rendering)
{ {
m_optional_extensions.vk_khr_dynamic_rendering = false; m_optional_extensions.vk_khr_dynamic_rendering = false;
m_optional_extensions.vk_khr_dynamic_rendering_local_read = false;
Log_WarningPrint("Disabling VK_KHR_dynamic_rendering on broken mobile driver."); Log_WarningPrint("Disabling VK_KHR_dynamic_rendering on broken mobile driver.");
} }
if (m_optional_extensions.vk_khr_push_descriptor) if (m_optional_extensions.vk_khr_push_descriptor)
@ -673,26 +680,24 @@ void VulkanDevice::ProcessDeviceExtensions()
} }
} }
Log_InfoPrintf("VK_EXT_memory_budget is %s", Log_InfoFmt("VK_EXT_memory_budget is {}", m_optional_extensions.vk_ext_memory_budget ? "supported" : "NOT supported");
m_optional_extensions.vk_ext_memory_budget ? "supported" : "NOT supported"); Log_InfoFmt("VK_EXT_rasterization_order_attachment_access is {}",
Log_InfoPrintf("VK_EXT_rasterization_order_attachment_access is %s", m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported");
m_optional_extensions.vk_ext_rasterization_order_attachment_access ? "supported" : "NOT supported"); Log_InfoFmt("VK_KHR_get_memory_requirements2 is {}",
Log_InfoPrintf("VK_EXT_attachment_feedback_loop_layout is %s", m_optional_extensions.vk_khr_get_memory_requirements2 ? "supported" : "NOT supported");
m_optional_extensions.vk_ext_attachment_feedback_loop_layout ? "supported" : "NOT supported"); Log_InfoFmt("VK_KHR_bind_memory2 is {}", m_optional_extensions.vk_khr_bind_memory2 ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_get_memory_requirements2 is %s", Log_InfoFmt("VK_KHR_get_physical_device_properties2 is {}",
m_optional_extensions.vk_khr_get_memory_requirements2 ? "supported" : "NOT supported"); m_optional_extensions.vk_khr_get_physical_device_properties2 ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_bind_memory2 is %s", Log_InfoFmt("VK_KHR_dedicated_allocation is {}",
m_optional_extensions.vk_khr_bind_memory2 ? "supported" : "NOT supported"); m_optional_extensions.vk_khr_dedicated_allocation ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_get_physical_device_properties2 is %s", Log_InfoFmt("VK_KHR_dynamic_rendering is {}",
m_optional_extensions.vk_khr_get_physical_device_properties2 ? "supported" : "NOT supported"); m_optional_extensions.vk_khr_dynamic_rendering ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_dedicated_allocation is %s", Log_InfoFmt("VK_KHR_dynamic_rendering_local_read is {}",
m_optional_extensions.vk_khr_dedicated_allocation ? "supported" : "NOT supported"); m_optional_extensions.vk_khr_dynamic_rendering_local_read ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_dynamic_rendering is %s", Log_InfoFmt("VK_KHR_push_descriptor is {}",
m_optional_extensions.vk_khr_dynamic_rendering ? "supported" : "NOT supported"); m_optional_extensions.vk_khr_push_descriptor ? "supported" : "NOT supported");
Log_InfoPrintf("VK_KHR_push_descriptor is %s", Log_InfoFmt("VK_EXT_external_memory_host is {}",
m_optional_extensions.vk_khr_push_descriptor ? "supported" : "NOT supported"); m_optional_extensions.vk_ext_external_memory_host ? "supported" : "NOT supported");
Log_InfoPrintf("VK_EXT_external_memory_host is %s",
m_optional_extensions.vk_ext_external_memory_host ? "supported" : "NOT supported");
} }
bool VulkanDevice::CreateAllocator() bool VulkanDevice::CreateAllocator()
@ -834,25 +839,27 @@ bool VulkanDevice::CreateCommandBuffers()
} }
Vulkan::SetObjectName(m_device, resources.fence, TinyString::from_format("Frame Fence {}", frame_index)); Vulkan::SetObjectName(m_device, resources.fence, TinyString::from_format("Frame Fence {}", frame_index));
u32 num_pools = 0;
VkDescriptorPoolSize pool_sizes[2];
if (!m_optional_extensions.vk_khr_push_descriptor) if (!m_optional_extensions.vk_khr_push_descriptor)
{ {
VkDescriptorPoolSize pool_sizes[] = { pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME}, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME};
};
VkDescriptorPoolCreateInfo pool_create_info = {
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, MAX_DESCRIPTOR_SETS_PER_FRAME,
static_cast<u32>(std::size(pool_sizes)), pool_sizes};
res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
return false;
}
Vulkan::SetObjectName(m_device, resources.descriptor_pool,
TinyString::from_format("Frame Descriptor Pool {}", frame_index));
} }
pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME};
VkDescriptorPoolCreateInfo pool_create_info = {
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, MAX_DESCRIPTOR_SETS_PER_FRAME,
static_cast<u32>(std::size(pool_sizes)), pool_sizes};
res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
return false;
}
Vulkan::SetObjectName(m_device, resources.descriptor_pool,
TinyString::from_format("Frame Descriptor Pool {}", frame_index));
++frame_index; ++frame_index;
} }
@ -970,17 +977,15 @@ VkRenderPass VulkanDevice::GetRenderPass(const GPUPipeline::GraphicsConfig& conf
key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE; key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
} }
// key.color_feedback_loop = false;
// key.depth_sampling = false;
key.samples = static_cast<u8>(config.samples); key.samples = static_cast<u8>(config.samples);
key.feedback_loop = config.render_pass_flags;
const auto it = m_render_pass_cache.find(key); const auto it = m_render_pass_cache.find(key);
return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
} }
VkRenderPass VulkanDevice::GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, VkRenderPass VulkanDevice::GetRenderPass(VulkanTexture* const* rts, u32 num_rts, VulkanTexture* ds,
bool color_feedback_loop /* = false */, bool depth_sampling /* = false */) GPUPipeline::RenderPassFlag feedback_loop)
{ {
RenderPassCacheKey key; RenderPassCacheKey key;
std::memset(&key, 0, sizeof(key)); std::memset(&key, 0, sizeof(key));
@ -1009,8 +1014,7 @@ VkRenderPass VulkanDevice::GetRenderPass(GPUTexture* const* rts, u32 num_rts, GP
key.samples = static_cast<u8>(ds->GetSamples()); key.samples = static_cast<u8>(ds->GetSamples());
} }
key.color_feedback_loop = color_feedback_loop; key.feedback_loop = feedback_loop;
key.depth_sampling = depth_sampling;
const auto it = m_render_pass_cache.find(key); const auto it = m_render_pass_cache.find(key);
return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
@ -1674,8 +1678,9 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
break; break;
const VkImageLayout layout = const VkImageLayout layout =
key.color_feedback_loop ? (key.feedback_loop & GPUPipeline::ColorFeedbackLoop) ?
(UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : (m_optional_extensions.vk_khr_dynamic_rendering_local_read ? VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR :
VK_IMAGE_LAYOUT_GENERAL) :
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
const RenderPassCacheKey::RenderTarget key_rt = key.color[i]; const RenderPassCacheKey::RenderTarget key_rt = key.color[i];
@ -1692,15 +1697,12 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
color_references[num_attachments].layout = layout; color_references[num_attachments].layout = layout;
color_reference_ptr = color_references.data(); color_reference_ptr = color_references.data();
if (key.color_feedback_loop) if (key.feedback_loop & GPUPipeline::ColorFeedbackLoop)
{ {
DebugAssert(i == 0); DebugAssert(i == 0);
if (!UseFeedbackLoopLayout()) input_reference.attachment = num_attachments;
{ input_reference.layout = layout;
input_reference.attachment = num_attachments; input_reference_ptr = &input_reference;
input_reference.layout = layout;
input_reference_ptr = &input_reference;
}
if (!m_optional_extensions.vk_ext_rasterization_order_attachment_access) if (!m_optional_extensions.vk_ext_rasterization_order_attachment_access)
{ {
@ -1710,11 +1712,8 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
subpass_dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; subpass_dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
subpass_dependency.dstAccessMask = subpass_dependency.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; subpass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
subpass_dependency.dependencyFlags = UseFeedbackLoopLayout() ?
(VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT) :
VK_DEPENDENCY_BY_REGION_BIT;
subpass_dependency_ptr = &subpass_dependency; subpass_dependency_ptr = &subpass_dependency;
} }
} }
@ -1726,10 +1725,9 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
if (key.depth_format != static_cast<u8>(GPUTexture::Format::Unknown)) if (key.depth_format != static_cast<u8>(GPUTexture::Format::Unknown))
{ {
const VkImageLayout layout = const VkImageLayout layout = (key.feedback_loop & GPUPipeline::SampleDepthBuffer) ?
key.depth_sampling ? VK_IMAGE_LAYOUT_GENERAL :
(UseFeedbackLoopLayout() ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VK_IMAGE_LAYOUT_GENERAL) : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[num_attachments] = {0, attachments[num_attachments] = {0,
static_cast<VkFormat>(TEXTURE_FORMAT_MAPPING[key.depth_format]), static_cast<VkFormat>(TEXTURE_FORMAT_MAPPING[key.depth_format]),
static_cast<VkSampleCountFlagBits>(key.samples), static_cast<VkSampleCountFlagBits>(key.samples),
@ -1746,7 +1744,8 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
} }
const VkSubpassDescriptionFlags subpass_flags = const VkSubpassDescriptionFlags subpass_flags =
(key.color_feedback_loop && m_optional_extensions.vk_ext_rasterization_order_attachment_access) ? ((key.feedback_loop & GPUPipeline::ColorFeedbackLoop) &&
m_optional_extensions.vk_ext_rasterization_order_attachment_access) ?
VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT : VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT :
0; 0;
const VkSubpassDescription subpass = {subpass_flags, const VkSubpassDescription subpass = {subpass_flags,
@ -1784,7 +1783,9 @@ VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
VkFramebuffer VulkanDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags) VkFramebuffer VulkanDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags)
{ {
VulkanDevice& dev = VulkanDevice::GetInstance(); VulkanDevice& dev = VulkanDevice::GetInstance();
VkRenderPass render_pass = dev.GetRenderPass(rts, num_rts, ds, false, false); VkRenderPass render_pass =
dev.GetRenderPass(reinterpret_cast<VulkanTexture* const*>(rts), num_rts, static_cast<VulkanTexture*>(ds),
static_cast<GPUPipeline::RenderPassFlag>(flags));
const GPUTexture* rt_or_ds = (num_rts > 0) ? rts[0] : ds; const GPUTexture* rt_or_ds = (num_rts > 0) ? rts[0] : ds;
DebugAssert(rt_or_ds); DebugAssert(rt_or_ds);
@ -2510,7 +2511,9 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
m_features.dual_source_blend = m_features.dual_source_blend =
!(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && m_device_features.dualSrcBlend; !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && m_device_features.dualSrcBlend;
m_features.framebuffer_fetch = /*!(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && */ false; m_features.framebuffer_fetch =
!(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) &&
m_optional_extensions.vk_ext_rasterization_order_attachment_access;
if (!m_features.dual_source_blend) if (!m_features.dual_source_blend)
Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance."); Log_WarningPrintf("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
@ -2519,6 +2522,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
m_features.per_sample_shading = m_device_features.sampleRateShading; m_features.per_sample_shading = m_device_features.sampleRateShading;
m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
m_features.feedback_loops = !(disabled_features & FEATURE_MASK_FEEDBACK_LOOPS);
#ifdef __APPLE__ #ifdef __APPLE__
// Partial texture buffer uploads appear to be broken in macOS/MoltenVK. // Partial texture buffer uploads appear to be broken in macOS/MoltenVK.
@ -2874,10 +2878,22 @@ bool VulkanDevice::CreatePipelineLayouts()
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout"); Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
} }
if (m_features.feedback_loops)
{
// TODO: This isn't ideal, since we can't push the RT descriptors.
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_feedback_loop_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
}
{ {
VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)]; VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
plb.AddDescriptorSet(m_ubo_ds_layout); plb.AddDescriptorSet(m_ubo_ds_layout);
plb.AddDescriptorSet(m_single_texture_ds_layout); plb.AddDescriptorSet(m_single_texture_ds_layout);
// TODO: REMOVE ME
if (m_features.feedback_loops)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false; return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
@ -2886,6 +2902,9 @@ bool VulkanDevice::CreatePipelineLayouts()
{ {
VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)]; VkPipelineLayout& pl = m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_ds_layout); plb.AddDescriptorSet(m_single_texture_ds_layout);
// TODO: REMOVE ME
if (m_features.feedback_loops)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false; return false;
@ -2896,6 +2915,9 @@ bool VulkanDevice::CreatePipelineLayouts()
VkPipelineLayout& pl = VkPipelineLayout& pl =
m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; m_pipeline_layouts[static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_buffer_ds_layout); plb.AddDescriptorSet(m_single_texture_buffer_ds_layout);
// TODO: REMOVE ME
if (m_features.feedback_loops)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false; return false;
@ -2941,6 +2963,7 @@ void VulkanDevice::DestroyPipelineLayouts()
l = VK_NULL_HANDLE; l = VK_NULL_HANDLE;
} }
}; };
destroy_dsl(m_feedback_loop_ds_layout);
destroy_dsl(m_multi_texture_ds_layout); destroy_dsl(m_multi_texture_ds_layout);
destroy_dsl(m_single_texture_buffer_ds_layout); destroy_dsl(m_single_texture_buffer_ds_layout);
destroy_dsl(m_single_texture_ds_layout); destroy_dsl(m_single_texture_ds_layout);
@ -3080,13 +3103,15 @@ bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsa
return true; return true;
} }
void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop)
{ {
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds); bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
m_current_feedback_loop != feedback_loop);
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false; bool needs_rt_clear = false;
m_current_depth_target = ds; m_current_depth_target = static_cast<VulkanTexture*>(ds);
for (u32 i = 0; i < num_rts; i++) for (u32 i = 0; i < num_rts; i++)
{ {
VulkanTexture* const RT = static_cast<VulkanTexture*>(rts[i]); VulkanTexture* const RT = static_cast<VulkanTexture*>(rts[i]);
@ -3096,7 +3121,8 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText
} }
for (u32 i = num_rts; i < m_num_current_render_targets; i++) for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr; m_current_render_targets[i] = nullptr;
m_num_current_render_targets = num_rts; m_num_current_render_targets = Truncate8(num_rts);
m_current_feedback_loop = feedback_loop;
if (changed) if (changed)
{ {
@ -3109,17 +3135,21 @@ void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUText
return; return;
} }
if (!m_optional_extensions.vk_khr_dynamic_rendering) if (!m_optional_extensions.vk_khr_dynamic_rendering || ((feedback_loop & GPUPipeline::ColorFeedbackLoop) &&
!m_optional_extensions.vk_khr_dynamic_rendering_local_read))
{ {
m_current_framebuffer = m_current_framebuffer = m_framebuffer_manager.Lookup(
m_framebuffer_manager.Lookup((m_num_current_render_targets > 0) ? m_current_render_targets.data() : nullptr, (m_num_current_render_targets > 0) ? reinterpret_cast<GPUTexture**>(m_current_render_targets.data()) : nullptr,
m_num_current_render_targets, m_current_depth_target, 0); m_num_current_render_targets, m_current_depth_target, feedback_loop);
if (m_current_framebuffer == VK_NULL_HANDLE) if (m_current_framebuffer == VK_NULL_HANDLE)
{ {
Log_ErrorPrint("Failed to create framebuffer"); Log_ErrorPrint("Failed to create framebuffer");
return; return;
} }
} }
m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) |
((feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0);
} }
// TODO: This could use vkCmdClearAttachments() instead. // TODO: This could use vkCmdClearAttachments() instead.
@ -3140,7 +3170,8 @@ void VulkanDevice::BeginRenderPass()
for (u32 i = 0; i < num_textures; i++) for (u32 i = 0; i < num_textures; i++)
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
if (m_optional_extensions.vk_khr_dynamic_rendering) if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read ||
!(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)))
{ {
VkRenderingInfoKHR ri = { VkRenderingInfoKHR ri = {
VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr}; VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr};
@ -3157,7 +3188,9 @@ void VulkanDevice::BeginRenderPass()
for (u32 i = 0; i < m_num_current_render_targets; i++) for (u32 i = 0; i < m_num_current_render_targets; i++)
{ {
VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]); VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]);
rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ?
VulkanTexture::Layout::FeedbackLoop :
VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter()); rt->SetUseFenceCounter(GetCurrentFenceCounter());
VkRenderingAttachmentInfo& ai = attachments[i]; VkRenderingAttachmentInfo& ai = attachments[i];
@ -3179,7 +3212,7 @@ void VulkanDevice::BeginRenderPass()
rt->SetState(GPUTexture::State::Dirty); rt->SetState(GPUTexture::State::Dirty);
} }
if (VulkanTexture* const ds = static_cast<VulkanTexture*>(m_current_depth_target)) if (VulkanTexture* const ds = m_current_depth_target)
{ {
ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment);
ds->SetUseFenceCounter(GetCurrentFenceCounter()); ds->SetUseFenceCounter(GetCurrentFenceCounter());
@ -3201,8 +3234,8 @@ void VulkanDevice::BeginRenderPass()
ds->SetState(GPUTexture::State::Dirty); ds->SetState(GPUTexture::State::Dirty);
} }
const VulkanTexture* const rt_or_ds = static_cast<const VulkanTexture*>( const VulkanTexture* const rt_or_ds =
(m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target); (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target;
ri.renderArea = {{}, {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}}; ri.renderArea = {{}, {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}};
} }
else else
@ -3236,7 +3269,7 @@ void VulkanDevice::BeginRenderPass()
{ {
bi.framebuffer = m_current_framebuffer; bi.framebuffer = m_current_framebuffer;
bi.renderPass = m_current_render_pass = GetRenderPass( bi.renderPass = m_current_render_pass = GetRenderPass(
m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, false, false); m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, m_current_feedback_loop);
if (bi.renderPass == VK_NULL_HANDLE) if (bi.renderPass == VK_NULL_HANDLE)
{ {
Log_ErrorPrint("Failed to create render pass"); Log_ErrorPrint("Failed to create render pass");
@ -3255,7 +3288,9 @@ void VulkanDevice::BeginRenderPass()
bi.clearValueCount = i + 1; bi.clearValueCount = i + 1;
} }
rt->SetState(GPUTexture::State::Dirty); rt->SetState(GPUTexture::State::Dirty);
rt->TransitionToLayout(VulkanTexture::Layout::ColorAttachment); rt->TransitionToLayout((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ?
VulkanTexture::Layout::FeedbackLoop :
VulkanTexture::Layout::ColorAttachment);
rt->SetUseFenceCounter(GetCurrentFenceCounter()); rt->SetUseFenceCounter(GetCurrentFenceCounter());
} }
if (VulkanTexture* const ds = static_cast<VulkanTexture*>(m_current_depth_target)) if (VulkanTexture* const ds = static_cast<VulkanTexture*>(m_current_depth_target))
@ -3357,6 +3392,7 @@ void VulkanDevice::BeginSwapChainRenderPass()
s_stats.num_render_passes++; s_stats.num_render_passes++;
m_num_current_render_targets = 0; m_num_current_render_targets = 0;
m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_current_depth_target = nullptr; m_current_depth_target = nullptr;
m_current_framebuffer = VK_NULL_HANDLE; m_current_framebuffer = VK_NULL_HANDLE;
@ -3420,7 +3456,8 @@ void VulkanDevice::UnbindPipeline(VulkanPipeline* pl)
void VulkanDevice::InvalidateCachedState() void VulkanDevice::InvalidateCachedState()
{ {
m_dirty_flags = ALL_DIRTY_STATE; m_dirty_flags =
ALL_DIRTY_STATE | ((m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0);
m_current_render_pass = VK_NULL_HANDLE; m_current_render_pass = VK_NULL_HANDLE;
m_current_pipeline = nullptr; m_current_pipeline = nullptr;
} }
@ -3584,11 +3621,20 @@ void VulkanDevice::SetScissor(s32 x, s32 y, s32 width, s32 height)
void VulkanDevice::PreDrawCheck() void VulkanDevice::PreDrawCheck()
{ {
if (!InRenderPass())
BeginRenderPass();
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 dirty = std::exchange(m_dirty_flags, 0); const u32 update_mask = (m_current_feedback_loop ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
const u32 dirty = m_dirty_flags & update_mask;
m_dirty_flags = m_dirty_flags & ~update_mask;
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT && !(dirty & DIRTY_FLAG_INPUT_ATTACHMENT))
m_dirty_flags |= DIRTY_FLAG_INPUT_ATTACHMENT; // TODO: FOR NEXT TIME
if (dirty != 0) if (dirty != 0)
{ {
if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS)) if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS |
DIRTY_FLAG_INPUT_ATTACHMENT))
{ {
if (!UpdateDescriptorSets(dirty)) if (!UpdateDescriptorSets(dirty))
{ {
@ -3598,21 +3644,22 @@ void VulkanDevice::PreDrawCheck()
} }
} }
} }
if (!InRenderPass())
BeginRenderPass();
} }
template<GPUPipeline::Layout layout> template<GPUPipeline::Layout layout>
bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynamic_offsets) bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{ {
std::array<VkDescriptorSet, 2> ds; [[maybe_unused]] bool new_dynamic_offsets = false;
std::array<VkDescriptorSet, 3> ds;
u32 first_ds = 0; u32 first_ds = 0;
u32 num_ds = 0; u32 num_ds = 0;
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO)
{ {
if (new_layout || new_dynamic_offsets) new_dynamic_offsets = ((dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0);
if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS))
{ {
ds[num_ds++] = m_ubo_descriptor_set; ds[num_ds++] = m_ubo_descriptor_set;
new_dynamic_offsets = true; new_dynamic_offsets = true;
@ -3645,7 +3692,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynam
{ {
DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE);
dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, m_current_textures[i]->GetView(), dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, m_current_textures[i]->GetView(),
m_current_samplers[i], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); m_current_samplers[i], m_current_textures[i]->GetVkLayout());
} }
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0; const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
@ -3666,13 +3713,32 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynam
{ {
DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE); DebugAssert(m_current_textures[i] && m_current_samplers[i] != VK_NULL_HANDLE);
dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, m_current_textures[i]->GetView(), m_current_samplers[i], dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, m_current_textures[i]->GetView(), m_current_samplers[i],
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); m_current_textures[i]->GetVkLayout());
} }
dsub.Update(m_device, false); dsub.Update(m_device, false);
} }
} }
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
{
if (dirty & DIRTY_FLAG_INPUT_ATTACHMENT)
{
VkDescriptorSet ids = AllocateDescriptorSet(m_feedback_loop_ds_layout);
if (ids == VK_NULL_HANDLE)
return false;
ds[num_ds++] = ids;
Vulkan::DescriptorSetUpdateBuilder dsub;
dsub.AddInputAttachmentDescriptorWrite(ids, 0, m_current_render_targets[0]->GetView(),
m_current_render_targets[0]->GetVkLayout());
dsub.Update(m_device, false);
}
}
DebugAssert(num_ds > 0); DebugAssert(num_ds > 0);
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS,
m_pipeline_layouts[static_cast<u8>(m_current_pipeline_layout)], first_ds, num_ds, ds.data(), m_pipeline_layouts[static_cast<u8>(m_current_pipeline_layout)], first_ds, num_ds, ds.data(),
@ -3684,25 +3750,22 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynam
bool VulkanDevice::UpdateDescriptorSets(u32 dirty) bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
{ {
const bool new_layout = (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) != 0;
const bool new_dynamic_offsets = (dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0;
switch (m_current_pipeline_layout) switch (m_current_pipeline_layout)
{ {
case GPUPipeline::Layout::SingleTextureAndUBO: case GPUPipeline::Layout::SingleTextureAndUBO:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(new_layout, new_dynamic_offsets); return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(dirty);
case GPUPipeline::Layout::SingleTextureAndPushConstants: case GPUPipeline::Layout::SingleTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(new_layout, false); return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::SingleTextureBufferAndPushConstants: case GPUPipeline::Layout::SingleTextureBufferAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(new_layout, false); return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(dirty);
case GPUPipeline::Layout::MultiTextureAndUBO: case GPUPipeline::Layout::MultiTextureAndUBO:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(new_layout, new_dynamic_offsets); return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(dirty);
case GPUPipeline::Layout::MultiTextureAndPushConstants: case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(new_layout, false); return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
default: default:
UnreachableCode(); UnreachableCode();
@ -3722,3 +3785,76 @@ void VulkanDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
s_stats.num_draws++; s_stats.num_draws++;
vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0);
} }
VkImageMemoryBarrier VulkanDevice::GetColorBufferBarrier(const VulkanTexture* rt) const
{
const VkImageLayout vk_layout = m_optional_extensions.vk_khr_dynamic_rendering_local_read ?
VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR :
VK_IMAGE_LAYOUT_GENERAL;
DebugAssert(rt->GetLayout() == VulkanTexture::Layout::FeedbackLoop);
return {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
nullptr,
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
vk_layout,
vk_layout,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
rt->GetImage(),
{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
}
void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
{
PreDrawCheck();
// TODO: The first barrier is unnecessary if we're starting the render pass.
switch (type)
{
case GPUDevice::DrawBarrier::None:
{
s_stats.num_draws++;
vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0);
}
break;
case GPUDevice::DrawBarrier::One:
{
DebugAssert(m_num_current_render_targets == 1);
s_stats.num_barriers++;
s_stats.num_draws++;
const VkImageMemoryBarrier barrier =
GetColorBufferBarrier(static_cast<VulkanTexture*>(m_current_render_targets[0]));
vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr,
1, &barrier);
vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0);
}
break;
case GPUDevice::DrawBarrier::Full:
{
DebugAssert(m_num_current_render_targets == 1);
const VkImageMemoryBarrier barrier =
GetColorBufferBarrier(static_cast<VulkanTexture*>(m_current_render_targets[0]));
const u32 indices_per_primitive = m_current_pipeline->GetVerticesPerPrimitive();
const u32 end_batch = base_index + index_count;
for (; base_index < end_batch; base_index += indices_per_primitive)
{
s_stats.num_barriers++;
s_stats.num_draws++;
vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr,
1, &barrier);
vkCmdDrawIndexed(GetCurrentCommandBuffer(), indices_per_primitive, 1, base_index, base_vertex, 0);
}
}
break;
}
}

View File

@ -44,7 +44,6 @@ public:
{ {
bool vk_ext_memory_budget : 1; bool vk_ext_memory_budget : 1;
bool vk_ext_rasterization_order_attachment_access : 1; bool vk_ext_rasterization_order_attachment_access : 1;
bool vk_ext_attachment_feedback_loop_layout : 1;
bool vk_ext_full_screen_exclusive : 1; bool vk_ext_full_screen_exclusive : 1;
bool vk_khr_get_memory_requirements2 : 1; bool vk_khr_get_memory_requirements2 : 1;
bool vk_khr_bind_memory2 : 1; bool vk_khr_bind_memory2 : 1;
@ -52,6 +51,7 @@ public:
bool vk_khr_dedicated_allocation : 1; bool vk_khr_dedicated_allocation : 1;
bool vk_khr_driver_properties : 1; bool vk_khr_driver_properties : 1;
bool vk_khr_dynamic_rendering : 1; bool vk_khr_dynamic_rendering : 1;
bool vk_khr_dynamic_rendering_local_read : 1;
bool vk_khr_push_descriptor : 1; bool vk_khr_push_descriptor : 1;
bool vk_ext_external_memory_host : 1; bool vk_ext_external_memory_host : 1;
}; };
@ -114,7 +114,8 @@ public:
void PushUniformBuffer(const void* data, u32 data_size) override; void PushUniformBuffer(const void* data, u32 data_size) override;
void* MapUniformBuffer(u32 size) override; void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override; void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds) override; void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop = GPUPipeline::NoRenderPassFlags) override;
void SetPipeline(GPUPipeline* pipeline) override; void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -122,6 +123,7 @@ public:
void SetScissor(s32 x, s32 y, s32 width, s32 height) override; void SetScissor(s32 x, s32 y, s32 width, s32 height) override;
void Draw(u32 vertex_count, u32 base_vertex) override; void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
bool SetGPUTimingEnabled(bool enabled) override; bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override; float GetAndResetAccumulatedGPUTime() override;
@ -144,13 +146,6 @@ public:
/// Returns true if Vulkan is suitable as a default for the devices in the system. /// Returns true if Vulkan is suitable as a default for the devices in the system.
static bool IsSuitableDefaultRenderer(); static bool IsSuitableDefaultRenderer();
// The interaction between raster order attachment access and fbfetch is unclear.
ALWAYS_INLINE bool UseFeedbackLoopLayout() const
{
return (m_optional_extensions.vk_ext_attachment_feedback_loop_layout &&
!m_optional_extensions.vk_ext_rasterization_order_attachment_access);
}
// Helpers for getting constants // Helpers for getting constants
ALWAYS_INLINE u32 GetBufferCopyOffsetAlignment() const ALWAYS_INLINE u32 GetBufferCopyOffsetAlignment() const
{ {
@ -165,8 +160,8 @@ public:
// Creates a simple render pass. // Creates a simple render pass.
VkRenderPass GetRenderPass(const GPUPipeline::GraphicsConfig& config); VkRenderPass GetRenderPass(const GPUPipeline::GraphicsConfig& config);
VkRenderPass GetRenderPass(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, bool color_feedback_loop = false, VkRenderPass GetRenderPass(VulkanTexture* const* rts, u32 num_rts, VulkanTexture* ds,
bool depth_sampling = false); GPUPipeline::RenderPassFlag render_pass_flags);
VkRenderPass GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op); VkRenderPass GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op);
// Gets a non-clearing version of the specified render pass. Slow, don't call in hot path. // Gets a non-clearing version of the specified render pass. Slow, don't call in hot path.
@ -239,9 +234,10 @@ private:
DIRTY_FLAG_PIPELINE_LAYOUT = (1 << 1), DIRTY_FLAG_PIPELINE_LAYOUT = (1 << 1),
DIRTY_FLAG_DYNAMIC_OFFSETS = (1 << 2), DIRTY_FLAG_DYNAMIC_OFFSETS = (1 << 2),
DIRTY_FLAG_TEXTURES_OR_SAMPLERS = (1 << 3), DIRTY_FLAG_TEXTURES_OR_SAMPLERS = (1 << 3),
DIRTY_FLAG_INPUT_ATTACHMENT = (1 << 4),
ALL_DIRTY_STATE = ALL_DIRTY_STATE = DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS |
DIRTY_FLAG_INITIAL | DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_TEXTURES_OR_SAMPLERS, DIRTY_FLAG_TEXTURES_OR_SAMPLERS | DIRTY_FLAG_INPUT_ATTACHMENT,
}; };
struct RenderPassCacheKey struct RenderPassCacheKey
@ -259,8 +255,7 @@ private:
u8 depth_store_op : 1; u8 depth_store_op : 1;
u8 stencil_load_op : 2; u8 stencil_load_op : 2;
u8 stencil_store_op : 1; u8 stencil_store_op : 1;
u8 depth_sampling : 1; u8 feedback_loop : 2;
u8 color_feedback_loop : 1;
u8 samples; u8 samples;
bool operator==(const RenderPassCacheKey& rhs) const; bool operator==(const RenderPassCacheKey& rhs) const;
@ -361,7 +356,7 @@ private:
void PreDrawCheck(); void PreDrawCheck();
template<GPUPipeline::Layout layout> template<GPUPipeline::Layout layout>
bool UpdateDescriptorSetsForLayout(bool new_layout, bool new_dynamic_offsets); bool UpdateDescriptorSetsForLayout(u32 dirty);
bool UpdateDescriptorSets(u32 dirty); bool UpdateDescriptorSets(u32 dirty);
// Ends a render pass if we're currently in one. // Ends a render pass if we're currently in one.
@ -375,6 +370,8 @@ private:
static VkFramebuffer CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags); static VkFramebuffer CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags);
static void DestroyFramebuffer(VkFramebuffer fbo); static void DestroyFramebuffer(VkFramebuffer fbo);
VkImageMemoryBarrier GetColorBufferBarrier(const VulkanTexture* rt) const;
void BeginCommandBuffer(u32 index); void BeginCommandBuffer(u32 index);
void WaitForCommandBufferCompletion(u32 index); void WaitForCommandBufferCompletion(u32 index);
@ -445,6 +442,7 @@ private:
VkDescriptorSetLayout m_single_texture_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_single_texture_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE; VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
std::array<VkPipelineLayout, static_cast<u8>(GPUPipeline::Layout::MaxCount)> m_pipeline_layouts = {}; std::array<VkPipelineLayout, static_cast<u8>(GPUPipeline::Layout::MaxCount)> m_pipeline_layouts = {};
VulkanStreamBuffer m_vertex_buffer; VulkanStreamBuffer m_vertex_buffer;
@ -460,9 +458,10 @@ private:
// Which bindings/state has to be updated before the next draw. // Which bindings/state has to be updated before the next draw.
u32 m_dirty_flags = ALL_DIRTY_STATE; u32 m_dirty_flags = ALL_DIRTY_STATE;
u32 m_num_current_render_targets = 0; u8 m_num_current_render_targets = 0;
std::array<GPUTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {}; GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
GPUTexture* m_current_depth_target = nullptr; std::array<VulkanTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
VulkanTexture* m_current_depth_target = nullptr;
VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE; VkFramebuffer m_current_framebuffer = VK_NULL_HANDLE;
VkRenderPass m_current_render_pass = VK_NULL_HANDLE; VkRenderPass m_current_render_pass = VK_NULL_HANDLE;

View File

@ -72,8 +72,10 @@ std::unique_ptr<GPUShader> VulkanDevice::CreateShaderFromSource(GPUShaderStage s
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
VulkanPipeline::VulkanPipeline(VkPipeline pipeline, Layout layout) VulkanPipeline::VulkanPipeline(VkPipeline pipeline, Layout layout, u8 vertices_per_primitive,
: GPUPipeline(), m_pipeline(pipeline), m_layout(layout) RenderPassFlag render_pass_flags)
: GPUPipeline(), m_pipeline(pipeline), m_layout(layout), m_vertices_per_primitive(vertices_per_primitive),
m_render_pass_flags(render_pass_flags)
{ {
} }
@ -89,12 +91,13 @@ void VulkanPipeline::SetDebugName(const std::string_view& name)
std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config) std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config)
{ {
static constexpr std::array<VkPrimitiveTopology, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives = {{ static constexpr std::array<std::pair<VkPrimitiveTopology, u32>, static_cast<u32>(GPUPipeline::Primitive::MaxCount)>
VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // Points primitives = {{
VK_PRIMITIVE_TOPOLOGY_LINE_LIST, // Lines {VK_PRIMITIVE_TOPOLOGY_POINT_LIST, 1}, // Points
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, // Triangles {VK_PRIMITIVE_TOPOLOGY_LINE_LIST, 2}, // Lines
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // TriangleStrips {VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 3}, // Triangles
}}; {VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 3}, // TriangleStrips
}};
static constexpr u32 MAX_COMPONENTS = 4; static constexpr u32 MAX_COMPONENTS = 4;
static constexpr const VkFormat format_mapping[static_cast<u8>( static constexpr const VkFormat format_mapping[static_cast<u8>(
@ -171,7 +174,8 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
} }
} }
gpb.SetPrimitiveTopology(primitives[static_cast<u8>(config.primitive)]); const auto [vk_topology, vertices_per_primitive] = primitives[static_cast<u8>(config.primitive)];
gpb.SetPrimitiveTopology(vk_topology);
// Line width? // Line width?
@ -206,7 +210,8 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
gpb.SetPipelineLayout(m_pipeline_layouts[static_cast<u8>(config.layout)]); gpb.SetPipelineLayout(m_pipeline_layouts[static_cast<u8>(config.layout)]);
if (m_optional_extensions.vk_khr_dynamic_rendering) if (m_optional_extensions.vk_khr_dynamic_rendering && (m_optional_extensions.vk_khr_dynamic_rendering_local_read ||
!(config.render_pass_flags & GPUPipeline::ColorFeedbackLoop)))
{ {
gpb.SetDynamicRendering(); gpb.SetDynamicRendering();
@ -224,6 +229,13 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)], gpb.SetDynamicRenderingDepthAttachment(VulkanDevice::TEXTURE_FORMAT_MAPPING[static_cast<u8>(config.depth_format)],
VK_FORMAT_UNDEFINED); VK_FORMAT_UNDEFINED);
} }
if (config.render_pass_flags & GPUPipeline::ColorFeedbackLoop)
{
DebugAssert(m_optional_extensions.vk_khr_dynamic_rendering_local_read &&
config.color_formats[0] != GPUTexture::Format::Unknown);
gpb.AddDynamicRenderingInputAttachment(0);
}
} }
else else
{ {
@ -236,5 +248,6 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
if (!pipeline) if (!pipeline)
return {}; return {};
return std::unique_ptr<GPUPipeline>(new VulkanPipeline(pipeline, config.layout)); return std::unique_ptr<GPUPipeline>(
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
} }

View File

@ -32,12 +32,15 @@ public:
ALWAYS_INLINE VkPipeline GetPipeline() const { return m_pipeline; } ALWAYS_INLINE VkPipeline GetPipeline() const { return m_pipeline; }
ALWAYS_INLINE Layout GetLayout() const { return m_layout; } ALWAYS_INLINE Layout GetLayout() const { return m_layout; }
ALWAYS_INLINE u8 GetVerticesPerPrimitive() const { return m_vertices_per_primitive; }
void SetDebugName(const std::string_view& name) override; void SetDebugName(const std::string_view& name) override;
private: private:
VulkanPipeline(VkPipeline pipeline, Layout layout); VulkanPipeline(VkPipeline pipeline, Layout layout, u8 vertices_per_primitive, RenderPassFlag render_pass_flags);
VkPipeline m_pipeline; VkPipeline m_pipeline;
Layout m_layout; Layout m_layout;
u8 m_vertices_per_primitive;
RenderPassFlag m_render_pass_flags;
}; };

View File

@ -18,6 +18,7 @@ static constexpr const VkComponentMapping s_identity_swizzle{
static VkImageLayout GetVkImageLayout(VulkanTexture::Layout layout) static VkImageLayout GetVkImageLayout(VulkanTexture::Layout layout)
{ {
// TODO: Wrong for depth textures in feedback loop
static constexpr std::array<VkImageLayout, static_cast<u32>(VulkanTexture::Layout::Count)> s_vk_layout_mapping = {{ static constexpr std::array<VkImageLayout, static_cast<u32>(VulkanTexture::Layout::Count)> s_vk_layout_mapping = {{
VK_IMAGE_LAYOUT_UNDEFINED, // Undefined VK_IMAGE_LAYOUT_UNDEFINED, // Undefined
VK_IMAGE_LAYOUT_PREINITIALIZED, // Preinitialized VK_IMAGE_LAYOUT_PREINITIALIZED, // Preinitialized
@ -34,17 +35,12 @@ static VkImageLayout GetVkImageLayout(VulkanTexture::Layout layout)
VK_IMAGE_LAYOUT_GENERAL, // ComputeReadWriteImage VK_IMAGE_LAYOUT_GENERAL, // ComputeReadWriteImage
VK_IMAGE_LAYOUT_GENERAL, // General VK_IMAGE_LAYOUT_GENERAL, // General
}}; }};
return (layout == VulkanTexture::Layout::FeedbackLoop && VulkanDevice::GetInstance().UseFeedbackLoopLayout()) ? return (layout == VulkanTexture::Layout::FeedbackLoop &&
VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : VulkanDevice::GetInstance().GetOptionalExtensions().vk_khr_dynamic_rendering_local_read) ?
VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR :
s_vk_layout_mapping[static_cast<u32>(layout)]; s_vk_layout_mapping[static_cast<u32>(layout)];
} }
static VkAccessFlagBits GetFeedbackLoopInputAccessBits()
{
return VulkanDevice::GetInstance().UseFeedbackLoopLayout() ? VK_ACCESS_SHADER_READ_BIT :
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
}
VulkanTexture::VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, VulkanTexture::VulkanTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format,
VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format) VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format)
: GPUTexture(static_cast<u16>(width), static_cast<u16>(height), static_cast<u8>(layers), static_cast<u8>(levels), : GPUTexture(static_cast<u16>(width), static_cast<u16>(height), static_cast<u8>(layers), static_cast<u8>(levels),
@ -111,8 +107,7 @@ std::unique_ptr<VulkanTexture> VulkanTexture::Create(u32 width, u32 height, u32
DebugAssert(levels == 1); DebugAssert(levels == 1);
ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
(dev.UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT : VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT);
} }
break; break;
@ -120,8 +115,7 @@ std::unique_ptr<VulkanTexture> VulkanTexture::Create(u32 width, u32 height, u32
{ {
DebugAssert(levels == 1); DebugAssert(levels == 1);
ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | ici.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
(dev.UseFeedbackLoopLayout() ? VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT : 0);
vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; vci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
} }
break; break;
@ -588,7 +582,7 @@ void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffe
case Layout::FeedbackLoop: case Layout::FeedbackLoop:
barrier.srcAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? barrier.srcAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ?
(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
GetFeedbackLoopInputAccessBits()) : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT) :
(VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT);
srcStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? srcStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ?
@ -664,7 +658,7 @@ void VulkanTexture::TransitionSubresourcesToLayout(VkCommandBuffer command_buffe
case Layout::FeedbackLoop: case Layout::FeedbackLoop:
barrier.dstAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? barrier.dstAccessMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ?
(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
GetFeedbackLoopInputAccessBits()) : VK_ACCESS_INPUT_ATTACHMENT_READ_BIT) :
(VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT);
dstStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? dstStageMask = (aspect == VK_IMAGE_ASPECT_COLOR_BIT) ?