GPUDevice: Fix/improve compute shader support

Add multiple texture layout with/without UBO.
This commit is contained in:
Stenzek 2025-04-06 17:06:39 +10:00
parent 2a99529473
commit eca113cd76
No known key found for this signature in database
5 changed files with 96 additions and 34 deletions

View File

@ -696,7 +696,14 @@ GPUDevice::PresentResult D3D11Device::BeginPresent(GPUSwapChain* swap_chain, u32
}
m_context->ClearRenderTargetView(SC->GetRTV(), GSVector4::unorm8(clear_color).F32);
m_context->OMSetRenderTargets(1, SC->GetRTVArray(), nullptr);
// Ugh, have to clear out any UAV bindings...
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages && !m_current_compute_shader)
m_context->OMSetRenderTargetsAndUnorderedAccessViews(1, SC->GetRTVArray(), nullptr, 0, 0, nullptr, nullptr);
else
m_context->OMSetRenderTargets(1, SC->GetRTVArray(), nullptr);
if (m_current_compute_shader)
UnbindComputePipeline();
s_stats.num_render_passes++;
m_num_current_render_targets = 0;
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;

View File

@ -1764,7 +1764,19 @@ bool D3D12Device::CreateRootSignatures(Error* error)
}
{
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndUBO)];
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Compute Multi Texture + UBO Pipeline Layout");
}
{
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndPushConstants)];
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
@ -1772,7 +1784,7 @@ bool D3D12Device::CreateRootSignatures(Error* error)
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout");
D3D12::SetObjectName(rs.Get(), "Compute Multi Texture Pipeline Layout");
}
return true;
@ -2058,7 +2070,7 @@ bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const
void D3D12Device::InvalidateCachedState()
{
DebugAssert(!m_in_render_pass);;
DebugAssert(!m_in_render_pass);
m_dirty_flags = ALL_DIRTY_STATE &
((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS);
}
@ -2405,7 +2417,7 @@ void D3D12Device::PreDispatchCheck()
for (u32 i = 0; i < num_textures; i++)
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
}
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
@ -2459,7 +2471,7 @@ bool D3D12Device::IsUsingROVRootSignature() const
bool D3D12Device::IsUsingComputeRootSignature() const
{
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
return IsComputeLayout(m_current_pipeline_layout);
}
void D3D12Device::UpdateRootSignature()
@ -2481,10 +2493,17 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
{
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO)
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::MultiTextureAndUBO ||
layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO)
{
if (dirty & DIRTY_FLAG_CONSTANT_BUFFER)
cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
{
if constexpr (!IsComputeLayout(layout))
cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
else
cmdlist->SetComputeRootConstantBufferView(3, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
}
}
constexpr u32 num_textures = GetActiveTexturesForLayout(layout);
@ -2514,7 +2533,7 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
if constexpr (!IsComputeLayout(layout))
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
@ -2535,7 +2554,7 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
return false;
}
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
if constexpr (!IsComputeLayout(layout))
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
@ -2576,11 +2595,14 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
constexpr u32 rov_param =
(layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ?
1 :
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
2);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
IsComputeLayout(layout) ?
2 :
((layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ?
1 :
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ?
3 :
2));
if constexpr (!IsComputeLayout(layout))
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
@ -2608,8 +2630,11 @@ bool D3D12Device::UpdateRootParameters(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeMultiTextureAndUBO:
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeMultiTextureAndUBO>(dirty);
case GPUPipeline::Layout::ComputeMultiTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeMultiTextureAndPushConstants>(dirty);
default:
UnreachableCode();

View File

@ -182,8 +182,11 @@ public:
// Multiple textures, 128 byte UBO via push constants.
MultiTextureAndPushConstants,
// 128 byte UBO via push constants, 1 texture, compute shader.
ComputeSingleTextureAndPushConstants,
// Multiple textures, 1 streamed UBO, compute shader.
ComputeMultiTextureAndUBO,
// 128 byte UBO via push constants, multiple textures, compute shader.
ComputeMultiTextureAndPushConstants,
MaxCount
};
@ -697,12 +700,19 @@ public:
0, // SingleTextureBufferAndPushConstants
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
1, // ComputeSingleTextureAndPushConstants
MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndUBO
MAX_TEXTURE_SAMPLERS, // ComputeMultiTextureAndPushConstants
};
return counts[static_cast<u8>(layout)];
}
/// Returns true if the given pipeline layout is used for compute shaders.
static constexpr bool IsComputeLayout(GPUPipeline::Layout layout)
{
return (layout >= GPUPipeline::Layout::ComputeMultiTextureAndUBO);
}
/// Returns the number of thread groups to dispatch for a given total count and local size.
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
u32 local_size_y, u32 local_size_z)

View File

@ -1627,7 +1627,7 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
MetalShader temp_shader(GPUShaderStage::Compute, m_shaders, function);
GPUPipeline::ComputeConfig config;
config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants;
config.layout = GPUPipeline::Layout::ComputeMultiTextureAndPushConstants;
config.compute_shader = &temp_shader;
std::unique_ptr<GPUPipeline> pipeline = CreatePipeline(config, nullptr);

View File

@ -2966,15 +2966,25 @@ bool VulkanDevice::CreatePipelineLayouts()
}
}
{
VkPipelineLayout& pl = m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndUBO)];
plb.AddDescriptorSet(m_ubo_ds_layout);
plb.AddDescriptorSet(m_multi_texture_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Compute Multi Texture + UBO Pipeline Layout");
}
{
VkPipelineLayout& pl =
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_ds_layout);
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeMultiTextureAndPushConstants)];
plb.AddDescriptorSet(m_multi_texture_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout");
Vulkan::SetObjectName(m_device, pl, "Compute Multi Texture Pipeline Layout");
}
return true;
@ -3517,7 +3527,10 @@ void VulkanDevice::SetPipeline(GPUPipeline* pipeline)
m_current_pipeline = static_cast<VulkanPipeline*>(pipeline);
vkCmdBindPipeline(m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline());
vkCmdBindPipeline(m_current_command_buffer,
IsComputeLayout(m_current_pipeline->GetLayout()) ? VK_PIPELINE_BIND_POINT_COMPUTE :
VK_PIPELINE_BIND_POINT_GRAPHICS,
m_current_pipeline->GetPipeline());
if (m_current_pipeline_layout != m_current_pipeline->GetLayout())
{
@ -3562,7 +3575,9 @@ VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline
VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const
{
return m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
return m_pipeline_layouts[IsComputeLayout(m_current_pipeline_layout) ?
0 :
static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
[static_cast<size_t>(m_current_pipeline_layout)];
}
@ -3778,14 +3793,15 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
[[maybe_unused]] bool new_dynamic_offsets = false;
constexpr VkPipelineBindPoint vk_bind_point =
((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS :
VK_PIPELINE_BIND_POINT_COMPUTE);
(IsComputeLayout(layout) ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout();
std::array<VkDescriptorSet, 3> ds;
u32 first_ds = 0;
u32 num_ds = 0;
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO)
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::MultiTextureAndUBO ||
layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO)
{
new_dynamic_offsets = ((dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0);
@ -3801,8 +3817,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
{
VulkanTexture* const tex =
m_current_textures[0] ? m_current_textures[0] : static_cast<VulkanTexture*>(m_empty_texture.get());
@ -3815,7 +3830,9 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
ds[num_ds++] = m_current_texture_buffer->GetDescriptorSet();
}
else if constexpr (layout == GPUPipeline::Layout::MultiTextureAndUBO ||
layout == GPUPipeline::Layout::MultiTextureAndPushConstants)
layout == GPUPipeline::Layout::MultiTextureAndPushConstants ||
layout == GPUPipeline::Layout::ComputeMultiTextureAndUBO ||
layout == GPUPipeline::Layout::ComputeMultiTextureAndPushConstants)
{
Vulkan::DescriptorSetUpdateBuilder dsub;
@ -3925,8 +3942,11 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeMultiTextureAndUBO:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeMultiTextureAndUBO>(dirty);
case GPUPipeline::Layout::ComputeMultiTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeMultiTextureAndPushConstants>(dirty);
default:
UnreachableCode();