GPUDevice: Add compute shader support

This commit is contained in:
Stenzek 2024-11-20 20:44:37 +10:00
parent affbdfc350
commit e647192437
No known key found for this signature in database
20 changed files with 791 additions and 197 deletions

View File

@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.compute_shaders =
(!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0);
m_features.partial_msaa_resolve = false;
m_features.memory_import = false;
m_features.explicit_present = false;
@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += data_size;
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
DebugAssert(res.index_aligned == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
}
void* D3D11Device::MapUniformBuffer(u32 size)
@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += size;
BindUniformBuffer(pos, req_size);
}
void D3D11Device::BindUniformBuffer(u32 offset, u32 size)
{
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = pos / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
const UINT first_constant = offset / 16u;
const UINT num_constants = size / 16u;
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
}
else
{
DebugAssert(pos == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
DebugAssert(offset == 0);
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
else
{
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
}
}
@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
if (!m_current_compute_shader)
{
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
}
else
{
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr);
}
}
else
{
@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
{
m_current_textures[slot] = T;
m_context->PSSetShaderResources(slot, 1, &T);
if (m_current_compute_shader)
m_context->CSSetShaderResources(slot, 1, &T);
}
if (m_current_samplers[slot] != S)
{
m_current_samplers[slot] = S;
m_context->PSSetSamplers(slot, 1, &S);
if (m_current_compute_shader)
m_context->CSSetSamplers(slot, 1, &S);
}
}
@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
if (m_current_textures[slot] != B)
{
m_current_textures[slot] = B;
// Compute doesn't support texture buffers, yet...
m_context->PSSetShaderResources(slot, 1, &B);
}
}
@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc)
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->Draw(vertex_count, base_vertex);
}
void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->DrawIndexed(index_count, base_index, base_vertex);
}
@ -1129,3 +1151,15 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
{
Panic("Barriers are not supported");
}
void D3D11Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z)
{
DebugAssert(m_current_compute_shader);
s_stats.num_draws++;
const u32 groups_x = threads_x / group_size_x;
const u32 groups_y = threads_y / group_size_y;
const u32 groups_z = threads_z / group_size_z;
m_context->Dispatch(groups_x, groups_y, groups_z);
}

View File

@ -75,6 +75,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -98,6 +99,8 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@ -140,6 +143,8 @@ private:
bool CreateBuffers();
void DestroyBuffers();
void BindUniformBuffer(u32 offset, u32 size);
void UnbindComputePipeline();
bool IsRenderTargetBound(const D3D11Texture* tex) const;
@ -180,6 +185,7 @@ private:
ID3D11VertexShader* m_current_vertex_shader = nullptr;
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
ID3D11PixelShader* m_current_pixel_shader = nullptr;
ID3D11ComputeShader* m_current_compute_shader = nullptr;
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
ID3D11DepthStencilState* m_current_depth_state = nullptr;
ID3D11BlendState* m_current_blend_state = nullptr;

View File

@ -3,6 +3,7 @@
#include "d3d11_pipeline.h"
#include "d3d11_device.h"
#include "d3d11_texture.h"
#include "d3d_common.h"
#include "common/assert.h"
@ -121,10 +122,10 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromSource(GPUShaderStage st
D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11PixelShader> ps,
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11DeviceChild> ps_or_cs,
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride),
m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride),
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
{
}
@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c
return h;
}
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error)
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts,
Error* error)
{
ComPtr<ID3D11BlendState> dbs;
@ -365,69 +367,124 @@ std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::Grap
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
}
std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
if (!config.compute_shader) [[unlikely]]
{
Error::SetStringView(error, "Missing compute shader.");
return {};
}
return std::unique_ptr<GPUPipeline>(
new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
static_cast<const D3D11Shader*>(config.compute_shader)->GetComputeShader(),
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
}
void D3D11Device::SetPipeline(GPUPipeline* pipeline)
{
if (m_current_pipeline == pipeline)
return;
const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline();
D3D11Pipeline* const PL = static_cast<D3D11Pipeline*>(pipeline);
m_current_pipeline = PL;
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
if (!PL->IsComputePipeline())
{
m_current_input_layout = il;
m_context->IASetInputLayout(il);
}
if (was_compute)
UnbindComputePipeline();
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
{
const UINT offset = 0;
m_current_vertex_stride = PL->GetVertexStride();
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
}
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
{
m_current_input_layout = il;
m_context->IASetInputLayout(il);
}
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
{
m_current_primitive_topology = topology;
m_context->IASetPrimitiveTopology(topology);
}
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
{
const UINT offset = 0;
m_current_vertex_stride = PL->GetVertexStride();
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
}
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
{
m_current_vertex_shader = vs;
m_context->VSSetShader(vs, nullptr, 0);
}
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
{
m_current_primitive_topology = topology;
m_context->IASetPrimitiveTopology(topology);
}
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
{
m_current_geometry_shader = gs;
m_context->GSSetShader(gs, nullptr, 0);
}
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
{
m_current_vertex_shader = vs;
m_context->VSSetShader(vs, nullptr, 0);
}
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
{
m_current_pixel_shader = ps;
m_context->PSSetShader(ps, nullptr, 0);
}
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
{
m_current_geometry_shader = gs;
m_context->GSSetShader(gs, nullptr, 0);
}
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
{
m_current_rasterizer_state = rs;
m_context->RSSetState(rs);
}
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
{
m_current_pixel_shader = ps;
m_context->PSSetShader(ps, nullptr, 0);
}
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
{
m_current_depth_state = ds;
m_context->OMSetDepthStencilState(ds, 0);
}
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
{
m_current_rasterizer_state = rs;
m_context->RSSetState(rs);
}
if (ID3D11BlendState* bs = PL->GetBlendState();
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
{
m_current_depth_state = ds;
m_context->OMSetDepthStencilState(ds, 0);
}
if (ID3D11BlendState* bs = PL->GetBlendState();
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
{
m_current_blend_state = bs;
m_current_blend_factor = PL->GetBlendFactor();
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
}
}
else
{
m_current_blend_state = bs;
m_current_blend_factor = PL->GetBlendFactor();
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader)
{
m_current_compute_shader = cs;
m_context->CSSetShader(cs, nullptr, 0);
}
if (!was_compute)
{
// need to bind all SRVs/samplers
u32 count;
for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++)
{
if (!m_current_textures[count])
break;
}
if (count > 0)
{
m_context->CSSetShaderResources(0, count, m_current_textures.data());
m_context->CSSetSamplers(0, count, m_current_samplers.data());
}
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS];
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
m_context->OMSetRenderTargets(0, nullptr, nullptr);
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr);
}
}
}
}
@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl)
if (m_current_pipeline != pl)
return;
if (pl->IsComputePipeline())
UnbindComputePipeline();
// Let the runtime deal with the dead objects...
m_current_pipeline = nullptr;
}
void D3D11Device::UnbindComputePipeline()
{
m_current_compute_shader = nullptr;
ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {};
ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {};
ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {};
m_context->CSSetShader(nullptr, nullptr, 0);
m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs);
m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers);
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr);
}

View File

@ -51,13 +51,18 @@ public:
void SetDebugName(std::string_view name) override;
ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; }
ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); }
ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); }
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); }
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast<ID3D11PixelShader*>(m_ps_or_cs.Get()); }
ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const
{
return static_cast<ID3D11ComputeShader*>(m_ps_or_cs.Get());
}
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; }
@ -66,7 +71,8 @@ public:
private:
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);
ComPtr<ID3D11DeviceChild> ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride,
u32 blend_factor);
ComPtr<ID3D11RasterizerState> m_rs;
ComPtr<ID3D11DepthStencilState> m_ds;
@ -74,7 +80,7 @@ private:
ComPtr<ID3D11InputLayout> m_il;
ComPtr<ID3D11VertexShader> m_vs;
ComPtr<ID3D11GeometryShader> m_gs;
ComPtr<ID3D11PixelShader> m_ps;
ComPtr<ID3D11DeviceChild> m_ps_or_cs;
D3D11_PRIMITIVE_TOPOLOGY m_topology;
u32 m_vertex_stride;
u32 m_blend_factor;

View File

@ -115,6 +115,8 @@ public:
ComputePipelineBuilder();
~ComputePipelineBuilder() = default;
ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; }
void Clear();
Microsoft::WRL::ComPtr<ID3D12PipelineState> Create(ID3D12Device* device, Error* error, bool clear);

View File

@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = false;
m_features.explicit_present = true;
@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
1, // SingleTextureBufferAndPushConstants
0, // MultiTextureAndUBO
2, // MultiTextureAndPushConstants
2, // ComputeSingleTextureAndPushConstants
};
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
const u32 push_param =
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if (!IsUsingComputeRootSignature())
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
else
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
}
void* D3D12Device::MapUniformBuffer(u32 size)
@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error)
}
}
{
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout");
}
return true;
}
@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass()
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt->CommitClear(cmdlist);
rt->SetState(GPUTexture::State::Dirty);
}
}
if (m_current_depth_target)
@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck()
BeginRenderPass();
}
void D3D12Device::PreDispatchCheck()
{
if (InRenderPass())
EndRenderPass();
// Transition images.
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
{
// Still need to clear the RTs.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
D3D12Texture* const rt = m_current_render_targets[i];
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt->CommitClear(cmdlist);
rt->SetState(GPUTexture::State::Dirty);
}
}
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
SetInitialPipelineState();
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 dirty = std::exchange(m_dirty_flags, 0);
if (dirty != 0)
{
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
{
UpdateRootSignature();
if (!UpdateRootParameters(dirty))
{
SubmitCommandList(false, "out of descriptors");
PreDispatchCheck();
return;
}
}
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
{
if (!UpdateRootParameters(dirty))
{
SubmitCommandList(false, "out of descriptors");
PreDispatchCheck();
return;
}
}
}
}
bool D3D12Device::IsUsingROVRootSignature() const
{
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
}
bool D3D12Device::IsUsingComputeRootSignature() const
{
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
}
void D3D12Device::UpdateRootSignature()
{
GetCommandList()->SetGraphicsRootSignature(
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if (!IsUsingComputeRootSignature())
{
cmdlist->SetGraphicsRootSignature(
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
}
else
{
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
}
}
template<GPUPipeline::Layout layout>
@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
}
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
return false;
}
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
}
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
1 :
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
2);
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
}
return true;
@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
default:
UnreachableCode();
}
@ -2331,3 +2435,15 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
{
Panic("Barriers are not supported");
}
void D3D12Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z)
{
PreDispatchCheck();
s_stats.num_draws++;
const u32 groups_x = threads_x / group_size_x;
const u32 groups_y = threads_y / group_size_y;
const u32 groups_z = threads_z / group_size_z;
GetCommandList()->Dispatch(groups_x, groups_y, groups_z);
}

View File

@ -96,6 +96,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -119,6 +120,8 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@ -275,8 +278,10 @@ private:
ID3D12RootSignature* GetCurrentRootSignature() const;
void SetInitialPipelineState();
void PreDrawCheck();
void PreDispatchCheck();
bool IsUsingROVRootSignature() const;
bool IsUsingComputeRootSignature() const;
void UpdateRootSignature();
template<GPUPipeline::Layout layout>
bool UpdateParametersForLayout(u32 dirty);

View File

@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
return SHA1Digest::DigestToString(digest);
}
std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config)
{
SHA1Digest hash;
hash.Update(&config.layout, sizeof(config.layout));
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.compute_shader))
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
u8 digest[SHA1Digest::DIGEST_SIZE];
hash.Final(digest);
return SHA1Digest::DigestToString(digest);
}
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
{
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives =
@ -274,3 +286,46 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
pipeline, config.layout, primitives[static_cast<u8>(config.primitive)],
config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant));
}
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
D3D12::ComputePipelineBuilder cpb;
cpb.SetRootSignature(m_root_signatures[0][static_cast<u8>(config.layout)].Get());
cpb.SetShader(static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeData(),
static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeSize());
ComPtr<ID3D12PipelineState> pipeline;
if (m_pipeline_library)
{
const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config));
HRESULT hr =
m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
if (FAILED(hr))
{
// E_INVALIDARG = not found.
if (hr != E_INVALIDARG)
ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
// Need to create it normally.
pipeline = cpb.Create(m_device.Get(), error, false);
// Store if it wasn't an OOM or something else.
if (pipeline && hr == E_INVALIDARG)
{
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
if (FAILED(hr))
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
}
}
}
else
{
pipeline = cpb.Create(m_device.Get(), error, false);
}
if (!pipeline)
return {};
return std::unique_ptr<GPUPipeline>(
new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
}

View File

@ -51,6 +51,7 @@ public:
void SetDebugName(std::string_view name) override;
static std::string GetPipelineName(const GraphicsConfig& config);
static std::string GetPipelineName(const ComputeConfig& config);
private:
D3D12Pipeline(Microsoft::WRL::ComPtr<ID3D12PipelineState> pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology,

View File

@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
// Need to know if there's UBOs for mapping.
const spvc_reflected_resource *ubos, *textures;
size_t ubos_count, textures_count;
size_t ubos_count, textures_count, images_count;
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
&ubos_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
&textures, &textures_count)) != SPVC_SUCCESS)
&textures, &textures_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
&textures, &images_count)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
return {};
@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
[[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler);
[[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0;
[[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1;
[[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2;
switch (target_language)
{
@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
}
}
}
if (stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < images_count; i++)
{
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
.desc_set = IMAGE_DESCRIPTOR_SET,
.binding = i,
.cbv = {},
.uav = {.register_space = 0, .register_binding = i},
.srv = {},
.sampler = {}};
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
}
}
}
break;
#endif
@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
return {};
}
if (stage == GPUShaderStage::Fragment)
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
.msl_buffer = 0,
.msl_texture = 0,
.msl_sampler = 0};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
static_cast<int>(sres));
return {};
}
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment,
.desc_set = 1,
const spvc_msl_resource_binding rb = {.stage = execmodel,
.desc_set = TEXTURE_DESCRIPTOR_SET,
.binding = i,
.msl_buffer = i,
.msl_texture = i,
@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
return {};
}
}
}
if (!m_features.framebuffer_fetch)
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
{
const spvc_msl_resource_binding rb = {
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
static_cast<int>(sres));
return {};
}
}
if (stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
const spvc_msl_resource_binding rb = {
.stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
static_cast<int>(sres));
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
}

View File

@ -160,6 +160,9 @@ public:
// Multiple textures, 128 byte UBO via push constants.
MultiTextureAndPushConstants,
// 128 byte UBO via push constants, 1 texture, compute shader.
ComputeSingleTextureAndPushConstants,
MaxCount
};
@ -416,6 +419,12 @@ public:
u32 GetRenderTargetCount() const;
};
struct ComputeConfig
{
Layout layout;
GPUShader* compute_shader;
};
GPUPipeline();
virtual ~GPUPipeline();
@ -501,9 +510,10 @@ public:
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7),
FEATURE_MASK_COMPUTE_SHADERS = (1 << 5),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
};
enum class DrawBarrier : u32
@ -532,6 +542,7 @@ public:
bool texture_buffers_emulated_with_ssbo : 1;
bool feedback_loops : 1;
bool geometry_shaders : 1;
bool compute_shaders : 1;
bool partial_msaa_resolve : 1;
bool memory_import : 1;
bool explicit_present : 1;
@ -625,11 +636,20 @@ public:
0, // SingleTextureBufferAndPushConstants
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
1, // ComputeSingleTextureAndPushConstants
};
return counts[static_cast<u8>(layout)];
}
/// Returns the number of thread groups to dispatch for a given total count and local size.
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
u32 local_size_y, u32 local_size_z)
{
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
(count_z + (local_size_z - 1)) / local_size_z);
}
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
@ -638,10 +658,6 @@ public:
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
// ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); }
// ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); }
// ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; }
// ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; }
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); }
@ -712,6 +728,8 @@ public:
Error* error = nullptr, const char* entry_point = "main");
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
Error* error = nullptr) = 0;
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
Error* error = nullptr) = 0;
/// Debug messaging.
virtual void PushDebugGroup(const char* name) = 0;
@ -753,6 +771,8 @@ public:
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
virtual void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z) = 0;
/// Returns false if the window was completely occluded.
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;

View File

@ -78,7 +78,16 @@ class MetalPipeline final : public GPUPipeline
public:
~MetalPipeline() override;
ALWAYS_INLINE id<MTLRenderPipelineState> GetPipelineState() const { return m_pipeline; }
ALWAYS_INLINE bool IsRenderPipeline() const { return (m_depth != nil); }
ALWAYS_INLINE bool IsComputePipeline() const { return (m_depth == nil); }
ALWAYS_INLINE id<MTLRenderPipelineState> GetRenderPipelineState() const
{
return (id<MTLRenderPipelineState>)m_pipeline;
}
ALWAYS_INLINE id<MTLComputePipelineState> GetComputePipelineState() const
{
return (id<MTLComputePipelineState>)m_pipeline;
}
ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
@ -86,10 +95,9 @@ public:
void SetDebugName(std::string_view name) override;
private:
MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
MTLPrimitiveType primitive);
MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, MTLPrimitiveType primitive);
id<MTLRenderPipelineState> m_pipeline;
id m_pipeline;
id<MTLDepthStencilState> m_depth;
MTLCullMode m_cull_mode;
MTLPrimitiveType m_primitive;
@ -251,6 +259,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -265,7 +274,7 @@ public:
void* MapUniformBuffer(u32 size) override;
void UnmapUniformBuffer(u32 size) override;
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop) override;
GPUPipeline::RenderPassFlag flags) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
@ -274,6 +283,8 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@ -338,7 +349,6 @@ private:
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, std::string_view source,
std::string_view entry_point, Error* error);
id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name);
ClearPipelineConfig GetCurrentClearPipelineConfig() const;
id<MTLRenderPipelineState> GetClearDepthPipeline(const ClearPipelineConfig& config);
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
@ -349,9 +359,12 @@ private:
void CleanupObjects();
ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); }
ALWAYS_INLINE bool InComputePass() const { return (m_compute_encoder != nil); }
ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); }
void BeginRenderPass();
void EndRenderPass();
void BeginComputePass();
void EndComputePass();
void EndInlineUploading();
void EndAnyEncoding();
@ -359,6 +372,8 @@ private:
void SetInitialEncoderState();
void SetViewportInRenderEncoder();
void SetScissorInRenderEncoder();
void CommitRenderTargetClears();
void BindRenderTargetsAsComputeImages();
void RenderBlankFrame(MetalSwapChain* swap_chain);
@ -384,7 +399,7 @@ private:
id<MTLLibrary> m_shaders = nil;
id<MTLBinaryArchive> m_pipeline_archive = nil;
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>>
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, std::unique_ptr<GPUPipeline>>>
m_resolve_pipelines;
std::vector<std::pair<ClearPipelineConfig, id<MTLRenderPipelineState>>> m_clear_pipelines;
@ -394,9 +409,10 @@ private:
id<MTLCommandBuffer> m_render_cmdbuf = nil;
id<MTLRenderCommandEncoder> m_render_encoder = nil;
id<MTLComputeCommandEncoder> m_compute_encoder = nil;
u8 m_num_current_render_targets = 0;
GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
MetalTexture* m_current_depth_target = nullptr;

View File

@ -77,7 +77,8 @@ static void LogNSError(NSError* error, std::string_view message)
{
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, message);
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Code: {}", static_cast<u32>(error.code));
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}", [error.description UTF8String]);
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}",
[error.description UTF8String]);
}
static GPUTexture::Format GetTextureFormatForMTLFormat(MTLPixelFormat fmt)
@ -503,28 +504,6 @@ id<MTLFunction> MetalDevice::GetFunctionFromLibrary(id<MTLLibrary> library, NSSt
return function;
}
id<MTLComputePipelineState> MetalDevice::CreateComputePipeline(id<MTLFunction> function, NSString* name)
{
MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new];
if (name != nil)
[desc setLabel:name];
[desc setComputeFunction:function];
NSError* err = nil;
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
options:MTLPipelineOptionNone
reflection:nil
error:&err];
[desc release];
if (pipeline == nil)
{
LogNSError(err, "Create compute pipeline failed:");
return nil;
}
return pipeline;
}
void MetalDevice::DestroyDevice()
{
WaitForPreviousCommandBuffers();
@ -564,11 +543,6 @@ void MetalDevice::DestroyDevice()
[it.second release];
}
m_depth_states.clear();
for (auto& it : m_resolve_pipelines)
{
if (it.second != nil)
[it.second release];
}
m_resolve_pipelines.clear();
for (auto& it : m_clear_pipelines)
{
@ -755,7 +729,7 @@ std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage st
return CreateShaderFromMSL(stage, source, entry_point, error);
}
MetalPipeline::MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
MetalPipeline::MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
MTLPrimitiveType primitive)
: m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive)
{
@ -982,6 +956,29 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
}
}
std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
@autoreleasepool
{
MTLComputePipelineDescriptor* desc = [[MTLComputePipelineDescriptor new] autorelease];
[desc setComputeFunction:static_cast<MetalShader*>(config.compute_shader)->GetFunction()];
NSError* nserror = nil;
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
options:MTLPipelineOptionNone
reflection:nil
error:&nserror];
if (pipeline == nil)
{
LogNSError(nserror, "Failed to create compute pipeline state");
CocoaTools::NSErrorToErrorObject(error, "newComputePipelineStateWithDescriptor failed: ", nserror);
return {};
}
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, nil, MTLCullModeNone, MTLPrimitiveTypePoint));
}
}
MetalTexture::MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
Format format)
: GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture)
@ -1559,14 +1556,14 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
const GPUTexture::Format src_format = dst->GetFormat();
const GPUTexture::Format dst_format = dst->GetFormat();
id<MTLComputePipelineState> resolve_pipeline = nil;
GPUPipeline* resolve_pipeline;
if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(),
[src_format, dst_format](const auto& it) {
return it.first.first == src_format && it.first.second == dst_format;
});
iter != m_resolve_pipelines.end())
{
resolve_pipeline = iter->second;
resolve_pipeline = iter->second.get();
}
else
{
@ -1579,32 +1576,41 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
if (function == nil)
Panic("Failed to get resolve kernel");
resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease];
if (resolve_pipeline != nil)
[resolve_pipeline retain];
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline);
MetalShader temp_shader(GPUShaderStage::Compute, m_shaders, function);
GPUPipeline::ComputeConfig config;
config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants;
config.compute_shader = &temp_shader;
std::unique_ptr<GPUPipeline> pipeline = CreatePipeline(config, nullptr);
if (!pipeline)
Panic("Failed to create resolve pipeline");
GL_OBJECT_NAME(pipeline, is_depth ? "Depth Resolve" : "Color Resolve");
resolve_pipeline =
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), std::move(pipeline)).second.get();
}
}
if (resolve_pipeline == nil)
Panic("Failed to get resolve pipeline");
if (InRenderPass())
EndRenderPass();
s_stats.num_copies++;
const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth;
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1);
const id<MTLComputePipelineState> mtl_pipeline =
static_cast<MetalPipeline*>(resolve_pipeline)->GetComputePipelineState();
const u32 threadgroupHeight = mtl_pipeline.maxTotalThreadsPerThreadgroup / mtl_pipeline.threadExecutionWidth;
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(mtl_pipeline.threadExecutionWidth, threadgroupHeight, 1);
const MTLSize threadgroupsInGrid =
MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width,
(src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1);
id<MTLComputeCommandEncoder> computeEncoder = [m_render_cmdbuf computeCommandEncoder];
[computeEncoder setComputePipelineState:resolve_pipeline];
[computeEncoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
[computeEncoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
[computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
[computeEncoder endEncoding];
// Set up manually to not disturb state.
BeginComputePass();
[m_compute_encoder setComputePipelineState:mtl_pipeline];
[m_compute_encoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
[m_compute_encoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
[m_compute_encoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
EndComputePass();
}
void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)
@ -1645,7 +1651,7 @@ void MetalDevice::ClearDepth(GPUTexture* t, float d)
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
if (m_current_pipeline)
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
if (m_current_cull_mode != MTLCullModeNone)
[m_render_encoder setCullMode:m_current_cull_mode];
if (depth != m_current_depth_state)
@ -1674,6 +1680,8 @@ void MetalDevice::CommitClear(MetalTexture* tex)
// TODO: We could combine it with the current render pass.
if (InRenderPass())
EndRenderPass();
else if (InComputePass())
EndComputePass();
@autoreleasepool
{
@ -1896,11 +1904,13 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
}
void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
GPUPipeline::RenderPassFlag feedback_loop)
GPUPipeline::RenderPassFlag flags)
{
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
(!m_features.framebuffer_fetch && ((feedback_loop & GPUPipeline::ColorFeedbackLoop) !=
(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))));
((flags & GPUPipeline::BindRenderTargetsAsImages) !=
(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) ||
(!m_features.framebuffer_fetch && ((flags & GPUPipeline::ColorFeedbackLoop) !=
(m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))));
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
bool needs_rt_clear = false;
@ -1915,12 +1925,19 @@ void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
m_current_render_targets[i] = nullptr;
m_num_current_render_targets = static_cast<u8>(num_rts);
m_current_feedback_loop = feedback_loop;
m_current_render_pass_flags = flags;
if (changed || needs_rt_clear || needs_ds_clear)
{
if (InRenderPass())
{
EndRenderPass();
}
else if (InComputePass() && (flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
{
CommitRenderTargetClears();
BindRenderTargetsAsComputeImages();
}
}
}
@ -1931,26 +1948,34 @@ void MetalDevice::SetPipeline(GPUPipeline* pipeline)
return;
m_current_pipeline = static_cast<MetalPipeline*>(pipeline);
if (InRenderPass())
if (!m_current_pipeline->IsComputePipeline())
{
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
if (InRenderPass())
{
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
if (m_current_depth_state != m_current_pipeline->GetDepthState())
{
m_current_depth_state = m_current_pipeline->GetDepthState();
[m_render_encoder setDepthStencilState:m_current_depth_state];
if (m_current_depth_state != m_current_pipeline->GetDepthState())
{
m_current_depth_state = m_current_pipeline->GetDepthState();
[m_render_encoder setDepthStencilState:m_current_depth_state];
}
if (m_current_cull_mode != m_current_pipeline->GetCullMode())
{
m_current_cull_mode = m_current_pipeline->GetCullMode();
[m_render_encoder setCullMode:m_current_cull_mode];
}
}
if (m_current_cull_mode != m_current_pipeline->GetCullMode())
else
{
// Still need to set depth state before the draw begins.
m_current_depth_state = m_current_pipeline->GetDepthState();
m_current_cull_mode = m_current_pipeline->GetCullMode();
[m_render_encoder setCullMode:m_current_cull_mode];
}
}
else
{
// Still need to set depth state before the draw begins.
m_current_depth_state = m_current_pipeline->GetDepthState();
m_current_cull_mode = m_current_pipeline->GetCullMode();
if (InComputePass())
[m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()];
}
}
@ -1979,6 +2004,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
m_current_textures[slot] = T;
if (InRenderPass())
[m_render_encoder setFragmentTexture:T atIndex:slot];
else if (InComputePass())
[m_compute_encoder setTexture:T atIndex:slot];
}
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
@ -1987,6 +2014,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
m_current_samplers[slot] = S;
if (InRenderPass())
[m_render_encoder setFragmentSamplerState:S atIndex:slot];
else if (InComputePass())
[m_compute_encoder setTexture:T atIndex:slot];
}
}
@ -2011,6 +2040,8 @@ void MetalDevice::UnbindTexture(MetalTexture* tex)
m_current_textures[i] = nil;
if (InRenderPass())
[m_render_encoder setFragmentTexture:nil atIndex:i];
else if (InComputePass())
[m_compute_encoder setTexture:nil atIndex:0];
}
}
@ -2070,7 +2101,7 @@ void MetalDevice::SetScissor(const GSVector4i rc)
void MetalDevice::BeginRenderPass()
{
DebugAssert(m_render_encoder == nil);
DebugAssert(m_render_encoder == nil && !InComputePass());
// Inline writes :(
if (m_inline_upload_encoder != nil)
@ -2180,12 +2211,57 @@ void MetalDevice::BeginRenderPass()
void MetalDevice::EndRenderPass()
{
DebugAssert(InRenderPass() && !IsInlineUploading());
DebugAssert(InRenderPass() && !IsInlineUploading() && !InComputePass());
[m_render_encoder endEncoding];
[m_render_encoder release];
m_render_encoder = nil;
}
void MetalDevice::BeginComputePass()
{
DebugAssert(!InRenderPass() && !IsInlineUploading() && !InComputePass());
if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
CommitRenderTargetClears();
m_compute_encoder = [[m_render_cmdbuf computeCommandEncoder] retain];
[m_compute_encoder setTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
[m_compute_encoder setSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
BindRenderTargetsAsComputeImages();
if (m_current_pipeline && m_current_pipeline->IsComputePipeline())
[m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()];
}
void MetalDevice::CommitRenderTargetClears()
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
MetalTexture* rt = m_current_render_targets[i];
if (rt->GetState() == GPUTexture::State::Invalidated)
rt->SetState(GPUTexture::State::Dirty);
else if (rt->GetState() == GPUTexture::State::Cleared)
CommitClear(rt);
}
}
void MetalDevice::BindRenderTargetsAsComputeImages()
{
for (u32 i = 0; i < m_num_current_render_targets; i++)
[m_compute_encoder setTexture:m_current_render_targets[i]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS + i];
}
void MetalDevice::EndComputePass()
{
DebugAssert(InComputePass());
[m_compute_encoder endEncoding];
[m_compute_encoder release];
m_compute_encoder = nil;
}
void MetalDevice::EndInlineUploading()
{
DebugAssert(IsInlineUploading() && !InRenderPass());
@ -2198,6 +2274,8 @@ void MetalDevice::EndAnyEncoding()
{
if (InRenderPass())
EndRenderPass();
else if (InComputePass())
EndComputePass();
else if (IsInlineUploading())
EndInlineUploading();
}
@ -2213,14 +2291,14 @@ void MetalDevice::SetInitialEncoderState()
[m_render_encoder setCullMode:m_current_cull_mode];
if (m_current_depth_state != nil)
[m_render_encoder setDepthStencilState:m_current_depth_state];
if (m_current_pipeline != nil)
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
if (m_current_pipeline && m_current_pipeline->IsRenderPipeline())
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
[m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
[m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
if (m_current_ssbo)
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
if (!m_features.framebuffer_fetch && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))
if (!m_features.framebuffer_fetch && (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))
{
DebugAssert(m_current_render_targets[0]);
[m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS];
@ -2249,7 +2327,12 @@ void MetalDevice::SetScissorInRenderEncoder()
void MetalDevice::PreDrawCheck()
{
if (!InRenderPass())
{
if (InComputePass())
EndComputePass();
BeginRenderPass();
}
}
void MetalDevice::Draw(u32 vertex_count, u32 base_vertex)
@ -2392,6 +2475,25 @@ void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
}
}
void MetalDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z)
{
if (!InComputePass())
{
if (InRenderPass())
EndRenderPass();
BeginComputePass();
}
DebugAssert(m_current_pipeline && m_current_pipeline->IsComputePipeline());
id<MTLComputePipelineState> pipeline = m_current_pipeline->GetComputePipelineState();
// TODO: We could remap to the optimal group size..
[m_compute_encoder dispatchThreads:MTLSizeMake(threads_x, threads_y, threads_z)
threadsPerThreadgroup:MTLSizeMake(group_size_x, group_size_y, group_size_z)];
}
id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline)
{
@autoreleasepool
@ -2450,7 +2552,7 @@ GPUDevice::PresentResult MetalDevice::BeginPresent(GPUSwapChain* swap_chain, u32
s_stats.num_render_passes++;
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
m_num_current_render_targets = 0;
m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
m_current_depth_target = nullptr;
m_current_pipeline = nullptr;
m_current_depth_state = nil;

View File

@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
}
}
std::unique_ptr<GPUPipeline> OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
ERROR_LOG("Compute shaders are not yet supported.");
return {};
}
void OpenGLDevice::PushDebugGroup(const char* name)
{
#ifdef _DEBUG
@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
m_features.geometry_shaders =
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
m_features.compute_shaders = false;
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
@ -1078,6 +1085,12 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
Panic("Barriers are not supported");
}
void OpenGLDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z)
{
Panic("Compute shaders are not supported");
}
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
u32* map_base_vertex)
{

View File

@ -77,6 +77,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -100,6 +101,8 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z) override;
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;

View File

@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear()
m_smap_constants = {};
}
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/,
bool clear /*= true*/)
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear,
Error* error)
{
VkPipeline pipeline;
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
SetErrorObject(error, "vkCreateComputePipelines() failed: ", res);
return VK_NULL_HANDLE;
}

View File

@ -197,7 +197,7 @@ public:
void Clear();
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error);
void SetShader(VkShaderModule module, const char* entry_point);

View File

@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
WARNING_LOG("Emulating texture buffers with SSBOs.");
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
@ -2802,7 +2803,8 @@ bool VulkanDevice::CreatePipelineLayouts()
}
{
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
@ -2822,7 +2824,8 @@ bool VulkanDevice::CreatePipelineLayouts()
if (m_optional_extensions.vk_khr_push_descriptor)
dslb.SetPushFlag();
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
@ -2837,14 +2840,13 @@ bool VulkanDevice::CreatePipelineLayouts()
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
}
if (m_features.raster_order_views)
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
}
if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout");
for (u32 type = 0; type < 3; type++)
{
@ -2860,7 +2862,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
@ -2873,7 +2875,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
@ -2887,7 +2889,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
@ -2901,7 +2903,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
@ -2915,13 +2917,24 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
}
}
{
VkPipelineLayout& pl =
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout");
}
return true;
}
@ -2942,7 +2955,7 @@ void VulkanDevice::DestroyPipelineLayouts()
l = VK_NULL_HANDLE;
}
};
destroy_dsl(m_rov_ds_layout);
destroy_dsl(m_image_ds_layout);
destroy_dsl(m_feedback_loop_ds_layout);
destroy_dsl(m_multi_texture_ds_layout);
destroy_dsl(m_single_texture_buffer_ds_layout);
@ -3674,12 +3687,56 @@ void VulkanDevice::PreDrawCheck()
}
}
void VulkanDevice::PreDispatchCheck()
{
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
}
// Binding as image, but we still need to clear it.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
VulkanTexture* rt = m_current_render_targets[i];
if (rt->GetState() == GPUTexture::State::Cleared)
rt->CommitClear(m_current_command_buffer);
rt->SetState(GPUTexture::State::Dirty);
rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
}
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
SetInitialPipelineState();
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
const u32 dirty = m_dirty_flags & update_mask;
m_dirty_flags = m_dirty_flags & ~update_mask;
if (dirty != 0)
{
if (!UpdateDescriptorSets(dirty))
{
SubmitCommandBuffer(false, "out of descriptor sets");
PreDispatchCheck();
return;
}
}
}
template<GPUPipeline::Layout layout>
bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
[[maybe_unused]] bool new_dynamic_offsets = false;
VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
constexpr VkPipelineBindPoint vk_bind_point =
((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS :
VK_PIPELINE_BIND_POINT_COMPUTE);
const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout();
std::array<VkDescriptorSet, 3> ds;
u32 first_ds = 0;
u32 num_ds = 0;
@ -3700,7 +3757,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
{
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
@ -3727,7 +3785,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set);
if (num_ds == 0)
return true;
}
@ -3757,7 +3815,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout);
if (ids == VK_NULL_HANDLE)
return false;
@ -3792,8 +3850,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
DebugAssert(num_ds > 0);
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(),
static_cast<u32>(new_dynamic_offsets),
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
return true;
@ -3818,6 +3876,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
default:
UnreachableCode();
}
@ -3911,3 +3972,15 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
DefaultCaseIsUnreachable();
}
}
void VulkanDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z)
{
PreDispatchCheck();
s_stats.num_draws++;
const u32 groups_x = threads_x / group_size_x;
const u32 groups_y = threads_y / group_size_y;
const u32 groups_z = threads_z / group_size_z;
vkCmdDispatch(GetCurrentCommandBuffer(), groups_x, groups_y, groups_z);
}

View File

@ -113,6 +113,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@ -136,6 +137,8 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@ -373,6 +376,7 @@ private:
VkPipelineLayout GetCurrentVkPipelineLayout() const;
void SetInitialPipelineState();
void PreDrawCheck();
void PreDispatchCheck();
template<GPUPipeline::Layout layout>
bool UpdateDescriptorSetsForLayout(u32 dirty);
@ -435,7 +439,7 @@ private:
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE;
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
static_cast<size_t>(PipelineLayoutType::MaxCount)>
m_pipeline_layouts = {};

View File

@ -275,3 +275,16 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
return std::unique_ptr<GPUPipeline>(
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
}
std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
Vulkan::ComputePipelineBuilder cpb;
cpb.SetShader(static_cast<const VulkanShader*>(config.compute_shader)->GetModule(), "main");
cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast<size_t>(config.layout)]);
const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error);
if (!pipeline)
return {};
return std::unique_ptr<GPUPipeline>(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags));
}