GPUDevice: Add compute shader support
This commit is contained in:
parent
affbdfc350
commit
e647192437
|
@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
|
|||
m_features.texture_buffers_emulated_with_ssbo = false;
|
||||
m_features.feedback_loops = false;
|
||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
||||
m_features.compute_shaders =
|
||||
(!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0);
|
||||
m_features.partial_msaa_resolve = false;
|
||||
m_features.memory_import = false;
|
||||
m_features.explicit_present = false;
|
||||
|
@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
|
|||
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
||||
s_stats.buffer_streamed += data_size;
|
||||
|
||||
if (m_uniform_buffer.IsUsingMapNoOverwrite())
|
||||
{
|
||||
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
|
||||
const UINT num_constants = req_size / 16u;
|
||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
DebugAssert(res.index_aligned == 0);
|
||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
}
|
||||
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
|
||||
}
|
||||
|
||||
void* D3D11Device::MapUniformBuffer(u32 size)
|
||||
|
@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
|
|||
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
||||
s_stats.buffer_streamed += size;
|
||||
|
||||
BindUniformBuffer(pos, req_size);
|
||||
}
|
||||
|
||||
void D3D11Device::BindUniformBuffer(u32 offset, u32 size)
|
||||
{
|
||||
if (m_uniform_buffer.IsUsingMapNoOverwrite())
|
||||
{
|
||||
const UINT first_constant = pos / 16u;
|
||||
const UINT num_constants = req_size / 16u;
|
||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
const UINT first_constant = offset / 16u;
|
||||
const UINT num_constants = size / 16u;
|
||||
if (m_current_compute_shader)
|
||||
{
|
||||
m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
DebugAssert(pos == 0);
|
||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
DebugAssert(offset == 0);
|
||||
if (m_current_compute_shader)
|
||||
{
|
||||
m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
}
|
||||
else
|
||||
{
|
||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
|
|||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
|
||||
|
||||
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
|
||||
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
|
||||
m_num_current_render_targets, uavs.data(), nullptr);
|
||||
if (!m_current_compute_shader)
|
||||
{
|
||||
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
|
||||
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
|
||||
m_num_current_render_targets, uavs.data(), nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
|||
{
|
||||
m_current_textures[slot] = T;
|
||||
m_context->PSSetShaderResources(slot, 1, &T);
|
||||
if (m_current_compute_shader)
|
||||
m_context->CSSetShaderResources(slot, 1, &T);
|
||||
}
|
||||
if (m_current_samplers[slot] != S)
|
||||
{
|
||||
m_current_samplers[slot] = S;
|
||||
m_context->PSSetSamplers(slot, 1, &S);
|
||||
if (m_current_compute_shader)
|
||||
m_context->CSSetSamplers(slot, 1, &S);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
|
|||
if (m_current_textures[slot] != B)
|
||||
{
|
||||
m_current_textures[slot] = B;
|
||||
|
||||
// Compute doesn't support texture buffers, yet...
|
||||
m_context->PSSetShaderResources(slot, 1, &B);
|
||||
}
|
||||
}
|
||||
|
@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc)
|
|||
|
||||
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
|
||||
{
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
|
||||
s_stats.num_draws++;
|
||||
m_context->Draw(vertex_count, base_vertex);
|
||||
}
|
||||
|
||||
void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
|
||||
{
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
|
||||
s_stats.num_draws++;
|
||||
m_context->DrawIndexed(index_count, base_index, base_vertex);
|
||||
}
|
||||
|
@ -1129,3 +1151,15 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
|||
{
|
||||
Panic("Barriers are not supported");
|
||||
}
|
||||
|
||||
void D3D11Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z)
|
||||
{
|
||||
DebugAssert(m_current_compute_shader);
|
||||
s_stats.num_draws++;
|
||||
|
||||
const u32 groups_x = threads_x / group_size_x;
|
||||
const u32 groups_y = threads_y / group_size_y;
|
||||
const u32 groups_z = threads_z / group_size_z;
|
||||
m_context->Dispatch(groups_x, groups_y, groups_z);
|
||||
}
|
||||
|
|
|
@ -75,6 +75,7 @@ public:
|
|||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
|
@ -98,6 +99,8 @@ public:
|
|||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z) override;
|
||||
|
||||
bool SetGPUTimingEnabled(bool enabled) override;
|
||||
float GetAndResetAccumulatedGPUTime() override;
|
||||
|
@ -140,6 +143,8 @@ private:
|
|||
|
||||
bool CreateBuffers();
|
||||
void DestroyBuffers();
|
||||
void BindUniformBuffer(u32 offset, u32 size);
|
||||
void UnbindComputePipeline();
|
||||
|
||||
bool IsRenderTargetBound(const D3D11Texture* tex) const;
|
||||
|
||||
|
@ -180,6 +185,7 @@ private:
|
|||
ID3D11VertexShader* m_current_vertex_shader = nullptr;
|
||||
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
|
||||
ID3D11PixelShader* m_current_pixel_shader = nullptr;
|
||||
ID3D11ComputeShader* m_current_compute_shader = nullptr;
|
||||
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
|
||||
ID3D11DepthStencilState* m_current_depth_state = nullptr;
|
||||
ID3D11BlendState* m_current_blend_state = nullptr;
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "d3d11_pipeline.h"
|
||||
#include "d3d11_device.h"
|
||||
#include "d3d11_texture.h"
|
||||
#include "d3d_common.h"
|
||||
|
||||
#include "common/assert.h"
|
||||
|
@ -121,10 +122,10 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromSource(GPUShaderStage st
|
|||
|
||||
D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
|
||||
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
|
||||
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11PixelShader> ps,
|
||||
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11DeviceChild> ps_or_cs,
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
|
||||
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
|
||||
m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride),
|
||||
m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride),
|
||||
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
|
||||
{
|
||||
}
|
||||
|
@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c
|
|||
return h;
|
||||
}
|
||||
|
||||
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error)
|
||||
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts,
|
||||
Error* error)
|
||||
{
|
||||
ComPtr<ID3D11BlendState> dbs;
|
||||
|
||||
|
@ -365,69 +367,124 @@ std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::Grap
|
|||
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
if (!config.compute_shader) [[unlikely]]
|
||||
{
|
||||
Error::SetStringView(error, "Missing compute shader.");
|
||||
return {};
|
||||
}
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(
|
||||
new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
static_cast<const D3D11Shader*>(config.compute_shader)->GetComputeShader(),
|
||||
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
|
||||
}
|
||||
|
||||
void D3D11Device::SetPipeline(GPUPipeline* pipeline)
|
||||
{
|
||||
if (m_current_pipeline == pipeline)
|
||||
return;
|
||||
|
||||
const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline();
|
||||
D3D11Pipeline* const PL = static_cast<D3D11Pipeline*>(pipeline);
|
||||
m_current_pipeline = PL;
|
||||
|
||||
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
|
||||
if (!PL->IsComputePipeline())
|
||||
{
|
||||
m_current_input_layout = il;
|
||||
m_context->IASetInputLayout(il);
|
||||
}
|
||||
if (was_compute)
|
||||
UnbindComputePipeline();
|
||||
|
||||
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
|
||||
{
|
||||
const UINT offset = 0;
|
||||
m_current_vertex_stride = PL->GetVertexStride();
|
||||
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
|
||||
}
|
||||
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
|
||||
{
|
||||
m_current_input_layout = il;
|
||||
m_context->IASetInputLayout(il);
|
||||
}
|
||||
|
||||
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
|
||||
{
|
||||
m_current_primitive_topology = topology;
|
||||
m_context->IASetPrimitiveTopology(topology);
|
||||
}
|
||||
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
|
||||
{
|
||||
const UINT offset = 0;
|
||||
m_current_vertex_stride = PL->GetVertexStride();
|
||||
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
|
||||
}
|
||||
|
||||
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
|
||||
{
|
||||
m_current_vertex_shader = vs;
|
||||
m_context->VSSetShader(vs, nullptr, 0);
|
||||
}
|
||||
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
|
||||
{
|
||||
m_current_primitive_topology = topology;
|
||||
m_context->IASetPrimitiveTopology(topology);
|
||||
}
|
||||
|
||||
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
|
||||
{
|
||||
m_current_geometry_shader = gs;
|
||||
m_context->GSSetShader(gs, nullptr, 0);
|
||||
}
|
||||
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
|
||||
{
|
||||
m_current_vertex_shader = vs;
|
||||
m_context->VSSetShader(vs, nullptr, 0);
|
||||
}
|
||||
|
||||
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
|
||||
{
|
||||
m_current_pixel_shader = ps;
|
||||
m_context->PSSetShader(ps, nullptr, 0);
|
||||
}
|
||||
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
|
||||
{
|
||||
m_current_geometry_shader = gs;
|
||||
m_context->GSSetShader(gs, nullptr, 0);
|
||||
}
|
||||
|
||||
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
|
||||
{
|
||||
m_current_rasterizer_state = rs;
|
||||
m_context->RSSetState(rs);
|
||||
}
|
||||
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
|
||||
{
|
||||
m_current_pixel_shader = ps;
|
||||
m_context->PSSetShader(ps, nullptr, 0);
|
||||
}
|
||||
|
||||
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
|
||||
{
|
||||
m_current_depth_state = ds;
|
||||
m_context->OMSetDepthStencilState(ds, 0);
|
||||
}
|
||||
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
|
||||
{
|
||||
m_current_rasterizer_state = rs;
|
||||
m_context->RSSetState(rs);
|
||||
}
|
||||
|
||||
if (ID3D11BlendState* bs = PL->GetBlendState();
|
||||
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
|
||||
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
|
||||
{
|
||||
m_current_depth_state = ds;
|
||||
m_context->OMSetDepthStencilState(ds, 0);
|
||||
}
|
||||
|
||||
if (ID3D11BlendState* bs = PL->GetBlendState();
|
||||
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
|
||||
{
|
||||
m_current_blend_state = bs;
|
||||
m_current_blend_factor = PL->GetBlendFactor();
|
||||
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_current_blend_state = bs;
|
||||
m_current_blend_factor = PL->GetBlendFactor();
|
||||
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
|
||||
if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader)
|
||||
{
|
||||
m_current_compute_shader = cs;
|
||||
m_context->CSSetShader(cs, nullptr, 0);
|
||||
}
|
||||
|
||||
if (!was_compute)
|
||||
{
|
||||
// need to bind all SRVs/samplers
|
||||
u32 count;
|
||||
for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++)
|
||||
{
|
||||
if (!m_current_textures[count])
|
||||
break;
|
||||
}
|
||||
if (count > 0)
|
||||
{
|
||||
m_context->CSSetShaderResources(0, count, m_current_textures.data());
|
||||
m_context->CSSetSamplers(0, count, m_current_samplers.data());
|
||||
}
|
||||
|
||||
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||
{
|
||||
ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS];
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
|
||||
|
||||
m_context->OMSetRenderTargets(0, nullptr, nullptr);
|
||||
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl)
|
|||
if (m_current_pipeline != pl)
|
||||
return;
|
||||
|
||||
if (pl->IsComputePipeline())
|
||||
UnbindComputePipeline();
|
||||
|
||||
// Let the runtime deal with the dead objects...
|
||||
m_current_pipeline = nullptr;
|
||||
}
|
||||
|
||||
void D3D11Device::UnbindComputePipeline()
|
||||
{
|
||||
m_current_compute_shader = nullptr;
|
||||
|
||||
ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {};
|
||||
ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {};
|
||||
ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {};
|
||||
m_context->CSSetShader(nullptr, nullptr, 0);
|
||||
m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs);
|
||||
m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers);
|
||||
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr);
|
||||
}
|
||||
|
|
|
@ -51,13 +51,18 @@ public:
|
|||
|
||||
void SetDebugName(std::string_view name) override;
|
||||
|
||||
ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; }
|
||||
ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); }
|
||||
ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); }
|
||||
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
|
||||
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
|
||||
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
|
||||
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
|
||||
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); }
|
||||
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast<ID3D11PixelShader*>(m_ps_or_cs.Get()); }
|
||||
ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const
|
||||
{
|
||||
return static_cast<ID3D11ComputeShader*>(m_ps_or_cs.Get());
|
||||
}
|
||||
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
|
||||
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
|
||||
ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; }
|
||||
|
@ -66,7 +71,8 @@ public:
|
|||
private:
|
||||
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
|
||||
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
|
||||
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);
|
||||
ComPtr<ID3D11DeviceChild> ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride,
|
||||
u32 blend_factor);
|
||||
|
||||
ComPtr<ID3D11RasterizerState> m_rs;
|
||||
ComPtr<ID3D11DepthStencilState> m_ds;
|
||||
|
@ -74,7 +80,7 @@ private:
|
|||
ComPtr<ID3D11InputLayout> m_il;
|
||||
ComPtr<ID3D11VertexShader> m_vs;
|
||||
ComPtr<ID3D11GeometryShader> m_gs;
|
||||
ComPtr<ID3D11PixelShader> m_ps;
|
||||
ComPtr<ID3D11DeviceChild> m_ps_or_cs;
|
||||
D3D11_PRIMITIVE_TOPOLOGY m_topology;
|
||||
u32 m_vertex_stride;
|
||||
u32 m_blend_factor;
|
||||
|
|
|
@ -115,6 +115,8 @@ public:
|
|||
ComputePipelineBuilder();
|
||||
~ComputePipelineBuilder() = default;
|
||||
|
||||
ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; }
|
||||
|
||||
void Clear();
|
||||
|
||||
Microsoft::WRL::ComPtr<ID3D12PipelineState> Create(ID3D12Device* device, Error* error, bool clear);
|
||||
|
|
|
@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
|
|||
m_features.texture_buffers_emulated_with_ssbo = false;
|
||||
m_features.feedback_loops = false;
|
||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
||||
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
|
||||
m_features.partial_msaa_resolve = true;
|
||||
m_features.memory_import = false;
|
||||
m_features.explicit_present = true;
|
||||
|
@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
|
|||
1, // SingleTextureBufferAndPushConstants
|
||||
0, // MultiTextureAndUBO
|
||||
2, // MultiTextureAndPushConstants
|
||||
2, // ComputeSingleTextureAndPushConstants
|
||||
};
|
||||
|
||||
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
|
@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
|
|||
|
||||
const u32 push_param =
|
||||
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
|
||||
GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||
if (!IsUsingComputeRootSignature())
|
||||
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||
else
|
||||
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||
}
|
||||
|
||||
void* D3D12Device::MapUniformBuffer(u32 size)
|
||||
|
@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error)
|
|||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
|
||||
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
if (!(rs = rsb.Create(error, true)))
|
||||
return false;
|
||||
D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass()
|
|||
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
rt->SetUseFenceValue(GetCurrentFenceValue());
|
||||
rt->CommitClear(cmdlist);
|
||||
rt->SetState(GPUTexture::State::Dirty);
|
||||
}
|
||||
}
|
||||
if (m_current_depth_target)
|
||||
|
@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck()
|
|||
BeginRenderPass();
|
||||
}
|
||||
|
||||
void D3D12Device::PreDispatchCheck()
|
||||
{
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
// Transition images.
|
||||
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||
|
||||
// All textures should be in shader read only optimal already, but just in case..
|
||||
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
|
||||
for (u32 i = 0; i < num_textures; i++)
|
||||
{
|
||||
if (m_current_textures[i])
|
||||
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
}
|
||||
|
||||
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
|
||||
{
|
||||
// Still need to clear the RTs.
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
{
|
||||
D3D12Texture* const rt = m_current_render_targets[i];
|
||||
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
rt->SetUseFenceValue(GetCurrentFenceValue());
|
||||
rt->CommitClear(cmdlist);
|
||||
rt->SetState(GPUTexture::State::Dirty);
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a new command buffer, bind the pipeline and such.
|
||||
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
|
||||
SetInitialPipelineState();
|
||||
|
||||
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
|
||||
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
|
||||
const u32 dirty = std::exchange(m_dirty_flags, 0);
|
||||
if (dirty != 0)
|
||||
{
|
||||
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
|
||||
{
|
||||
UpdateRootSignature();
|
||||
if (!UpdateRootParameters(dirty))
|
||||
{
|
||||
SubmitCommandList(false, "out of descriptors");
|
||||
PreDispatchCheck();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
|
||||
{
|
||||
if (!UpdateRootParameters(dirty))
|
||||
{
|
||||
SubmitCommandList(false, "out of descriptors");
|
||||
PreDispatchCheck();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool D3D12Device::IsUsingROVRootSignature() const
|
||||
{
|
||||
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
|
||||
}
|
||||
|
||||
bool D3D12Device::IsUsingComputeRootSignature() const
|
||||
{
|
||||
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
|
||||
}
|
||||
|
||||
void D3D12Device::UpdateRootSignature()
|
||||
{
|
||||
GetCommandList()->SetGraphicsRootSignature(
|
||||
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||
if (!IsUsingComputeRootSignature())
|
||||
{
|
||||
cmdlist->SetGraphicsRootSignature(
|
||||
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||
}
|
||||
else
|
||||
{
|
||||
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||
}
|
||||
}
|
||||
|
||||
template<GPUPipeline::Layout layout>
|
||||
|
@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
|||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
|
||||
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
|
||||
else
|
||||
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
|
||||
}
|
||||
|
||||
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
|
||||
|
@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
|||
return false;
|
||||
}
|
||||
|
||||
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
|
||||
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
|
||||
else
|
||||
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
|
||||
}
|
||||
|
||||
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
|
||||
|
@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
|||
1 :
|
||||
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
|
||||
2);
|
||||
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
|
||||
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
|
||||
else
|
||||
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty)
|
|||
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
||||
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
||||
|
||||
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
|
||||
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
}
|
||||
|
@ -2331,3 +2435,15 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
|||
{
|
||||
Panic("Barriers are not supported");
|
||||
}
|
||||
|
||||
void D3D12Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z)
|
||||
{
|
||||
PreDispatchCheck();
|
||||
s_stats.num_draws++;
|
||||
|
||||
const u32 groups_x = threads_x / group_size_x;
|
||||
const u32 groups_y = threads_y / group_size_y;
|
||||
const u32 groups_z = threads_z / group_size_z;
|
||||
GetCommandList()->Dispatch(groups_x, groups_y, groups_z);
|
||||
}
|
||||
|
|
|
@ -96,6 +96,7 @@ public:
|
|||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
|
@ -119,6 +120,8 @@ public:
|
|||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z) override;
|
||||
|
||||
bool SetGPUTimingEnabled(bool enabled) override;
|
||||
float GetAndResetAccumulatedGPUTime() override;
|
||||
|
@ -275,8 +278,10 @@ private:
|
|||
ID3D12RootSignature* GetCurrentRootSignature() const;
|
||||
void SetInitialPipelineState();
|
||||
void PreDrawCheck();
|
||||
void PreDispatchCheck();
|
||||
|
||||
bool IsUsingROVRootSignature() const;
|
||||
bool IsUsingComputeRootSignature() const;
|
||||
void UpdateRootSignature();
|
||||
template<GPUPipeline::Layout layout>
|
||||
bool UpdateParametersForLayout(u32 dirty);
|
||||
|
|
|
@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
|
|||
return SHA1Digest::DigestToString(digest);
|
||||
}
|
||||
|
||||
std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config)
|
||||
{
|
||||
SHA1Digest hash;
|
||||
hash.Update(&config.layout, sizeof(config.layout));
|
||||
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.compute_shader))
|
||||
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
|
||||
|
||||
u8 digest[SHA1Digest::DIGEST_SIZE];
|
||||
hash.Final(digest);
|
||||
return SHA1Digest::DigestToString(digest);
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
|
||||
{
|
||||
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives =
|
||||
|
@ -274,3 +286,46 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
|
|||
pipeline, config.layout, primitives[static_cast<u8>(config.primitive)],
|
||||
config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant));
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
D3D12::ComputePipelineBuilder cpb;
|
||||
cpb.SetRootSignature(m_root_signatures[0][static_cast<u8>(config.layout)].Get());
|
||||
cpb.SetShader(static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeData(),
|
||||
static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeSize());
|
||||
|
||||
ComPtr<ID3D12PipelineState> pipeline;
|
||||
if (m_pipeline_library)
|
||||
{
|
||||
const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config));
|
||||
HRESULT hr =
|
||||
m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
// E_INVALIDARG = not found.
|
||||
if (hr != E_INVALIDARG)
|
||||
ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
|
||||
|
||||
// Need to create it normally.
|
||||
pipeline = cpb.Create(m_device.Get(), error, false);
|
||||
|
||||
// Store if it wasn't an OOM or something else.
|
||||
if (pipeline && hr == E_INVALIDARG)
|
||||
{
|
||||
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
|
||||
if (FAILED(hr))
|
||||
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pipeline = cpb.Create(m_device.Get(), error, false);
|
||||
}
|
||||
|
||||
if (!pipeline)
|
||||
return {};
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(
|
||||
new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ public:
|
|||
void SetDebugName(std::string_view name) override;
|
||||
|
||||
static std::string GetPipelineName(const GraphicsConfig& config);
|
||||
static std::string GetPipelineName(const ComputeConfig& config);
|
||||
|
||||
private:
|
||||
D3D12Pipeline(Microsoft::WRL::ComPtr<ID3D12PipelineState> pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology,
|
||||
|
|
|
@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
|||
|
||||
// Need to know if there's UBOs for mapping.
|
||||
const spvc_reflected_resource *ubos, *textures;
|
||||
size_t ubos_count, textures_count;
|
||||
size_t ubos_count, textures_count, images_count;
|
||||
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
|
||||
&ubos_count)) != SPVC_SUCCESS ||
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
|
||||
&textures, &textures_count)) != SPVC_SUCCESS)
|
||||
&textures, &textures_count)) != SPVC_SUCCESS ||
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
|
||||
&textures, &images_count)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
|
@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
|||
[[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler);
|
||||
[[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0;
|
||||
[[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1;
|
||||
[[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2;
|
||||
|
||||
switch (target_language)
|
||||
{
|
||||
|
@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < images_count; i++)
|
||||
{
|
||||
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
|
||||
.desc_set = IMAGE_DESCRIPTOR_SET,
|
||||
.binding = i,
|
||||
.cbv = {},
|
||||
.uav = {.register_space = 0, .register_binding = i},
|
||||
.srv = {},
|
||||
.sampler = {}};
|
||||
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
|||
return {};
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Fragment)
|
||||
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
|
||||
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
|
||||
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
|
||||
.msl_buffer = 0,
|
||||
.msl_texture = 0,
|
||||
.msl_sampler = 0};
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment,
|
||||
.desc_set = 1,
|
||||
const spvc_msl_resource_binding rb = {.stage = execmodel,
|
||||
.desc_set = TEXTURE_DESCRIPTOR_SET,
|
||||
.binding = i,
|
||||
.msl_buffer = i,
|
||||
.msl_texture = i,
|
||||
|
@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
|||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_features.framebuffer_fetch)
|
||||
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {
|
||||
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
|
||||
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {
|
||||
.stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
|
||||
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
|
||||
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
|
||||
static_cast<int>(sres));
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -160,6 +160,9 @@ public:
|
|||
// Multiple textures, 128 byte UBO via push constants.
|
||||
MultiTextureAndPushConstants,
|
||||
|
||||
// 128 byte UBO via push constants, 1 texture, compute shader.
|
||||
ComputeSingleTextureAndPushConstants,
|
||||
|
||||
MaxCount
|
||||
};
|
||||
|
||||
|
@ -416,6 +419,12 @@ public:
|
|||
u32 GetRenderTargetCount() const;
|
||||
};
|
||||
|
||||
struct ComputeConfig
|
||||
{
|
||||
Layout layout;
|
||||
GPUShader* compute_shader;
|
||||
};
|
||||
|
||||
GPUPipeline();
|
||||
virtual ~GPUPipeline();
|
||||
|
||||
|
@ -501,9 +510,10 @@ public:
|
|||
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
|
||||
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
|
||||
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
|
||||
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
|
||||
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
|
||||
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7),
|
||||
FEATURE_MASK_COMPUTE_SHADERS = (1 << 5),
|
||||
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
|
||||
FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
|
||||
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
|
||||
};
|
||||
|
||||
enum class DrawBarrier : u32
|
||||
|
@ -532,6 +542,7 @@ public:
|
|||
bool texture_buffers_emulated_with_ssbo : 1;
|
||||
bool feedback_loops : 1;
|
||||
bool geometry_shaders : 1;
|
||||
bool compute_shaders : 1;
|
||||
bool partial_msaa_resolve : 1;
|
||||
bool memory_import : 1;
|
||||
bool explicit_present : 1;
|
||||
|
@ -625,11 +636,20 @@ public:
|
|||
0, // SingleTextureBufferAndPushConstants
|
||||
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
|
||||
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
|
||||
1, // ComputeSingleTextureAndPushConstants
|
||||
};
|
||||
|
||||
return counts[static_cast<u8>(layout)];
|
||||
}
|
||||
|
||||
/// Returns the number of thread groups to dispatch for a given total count and local size.
|
||||
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
|
||||
u32 local_size_y, u32 local_size_z)
|
||||
{
|
||||
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
|
||||
(count_z + (local_size_z - 1)) / local_size_z);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
|
||||
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
|
||||
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
|
||||
|
@ -638,10 +658,6 @@ public:
|
|||
|
||||
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
|
||||
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
|
||||
// ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); }
|
||||
// ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); }
|
||||
// ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; }
|
||||
// ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; }
|
||||
|
||||
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
|
||||
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); }
|
||||
|
@ -712,6 +728,8 @@ public:
|
|||
Error* error = nullptr, const char* entry_point = "main");
|
||||
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
|
||||
Error* error = nullptr) = 0;
|
||||
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
|
||||
Error* error = nullptr) = 0;
|
||||
|
||||
/// Debug messaging.
|
||||
virtual void PushDebugGroup(const char* name) = 0;
|
||||
|
@ -753,6 +771,8 @@ public:
|
|||
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
|
||||
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
|
||||
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
|
||||
virtual void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z) = 0;
|
||||
|
||||
/// Returns false if the window was completely occluded.
|
||||
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;
|
||||
|
|
|
@ -78,7 +78,16 @@ class MetalPipeline final : public GPUPipeline
|
|||
public:
|
||||
~MetalPipeline() override;
|
||||
|
||||
ALWAYS_INLINE id<MTLRenderPipelineState> GetPipelineState() const { return m_pipeline; }
|
||||
ALWAYS_INLINE bool IsRenderPipeline() const { return (m_depth != nil); }
|
||||
ALWAYS_INLINE bool IsComputePipeline() const { return (m_depth == nil); }
|
||||
ALWAYS_INLINE id<MTLRenderPipelineState> GetRenderPipelineState() const
|
||||
{
|
||||
return (id<MTLRenderPipelineState>)m_pipeline;
|
||||
}
|
||||
ALWAYS_INLINE id<MTLComputePipelineState> GetComputePipelineState() const
|
||||
{
|
||||
return (id<MTLComputePipelineState>)m_pipeline;
|
||||
}
|
||||
ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
|
||||
ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
|
||||
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
|
||||
|
@ -86,10 +95,9 @@ public:
|
|||
void SetDebugName(std::string_view name) override;
|
||||
|
||||
private:
|
||||
MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
|
||||
MTLPrimitiveType primitive);
|
||||
MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, MTLPrimitiveType primitive);
|
||||
|
||||
id<MTLRenderPipelineState> m_pipeline;
|
||||
id m_pipeline;
|
||||
id<MTLDepthStencilState> m_depth;
|
||||
MTLCullMode m_cull_mode;
|
||||
MTLPrimitiveType m_primitive;
|
||||
|
@ -251,6 +259,7 @@ public:
|
|||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
|
@ -265,7 +274,7 @@ public:
|
|||
void* MapUniformBuffer(u32 size) override;
|
||||
void UnmapUniformBuffer(u32 size) override;
|
||||
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
|
||||
GPUPipeline::RenderPassFlag feedback_loop) override;
|
||||
GPUPipeline::RenderPassFlag flags) override;
|
||||
void SetPipeline(GPUPipeline* pipeline) override;
|
||||
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
|
||||
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
|
||||
|
@ -274,6 +283,8 @@ public:
|
|||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z) override;
|
||||
|
||||
bool SetGPUTimingEnabled(bool enabled) override;
|
||||
float GetAndResetAccumulatedGPUTime() override;
|
||||
|
@ -338,7 +349,6 @@ private:
|
|||
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, std::string_view source,
|
||||
std::string_view entry_point, Error* error);
|
||||
id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
|
||||
id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name);
|
||||
ClearPipelineConfig GetCurrentClearPipelineConfig() const;
|
||||
id<MTLRenderPipelineState> GetClearDepthPipeline(const ClearPipelineConfig& config);
|
||||
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
|
||||
|
@ -349,9 +359,12 @@ private:
|
|||
void CleanupObjects();
|
||||
|
||||
ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); }
|
||||
ALWAYS_INLINE bool InComputePass() const { return (m_compute_encoder != nil); }
|
||||
ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); }
|
||||
void BeginRenderPass();
|
||||
void EndRenderPass();
|
||||
void BeginComputePass();
|
||||
void EndComputePass();
|
||||
void EndInlineUploading();
|
||||
void EndAnyEncoding();
|
||||
|
||||
|
@ -359,6 +372,8 @@ private:
|
|||
void SetInitialEncoderState();
|
||||
void SetViewportInRenderEncoder();
|
||||
void SetScissorInRenderEncoder();
|
||||
void CommitRenderTargetClears();
|
||||
void BindRenderTargetsAsComputeImages();
|
||||
|
||||
void RenderBlankFrame(MetalSwapChain* swap_chain);
|
||||
|
||||
|
@ -384,7 +399,7 @@ private:
|
|||
|
||||
id<MTLLibrary> m_shaders = nil;
|
||||
id<MTLBinaryArchive> m_pipeline_archive = nil;
|
||||
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>>
|
||||
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, std::unique_ptr<GPUPipeline>>>
|
||||
m_resolve_pipelines;
|
||||
std::vector<std::pair<ClearPipelineConfig, id<MTLRenderPipelineState>>> m_clear_pipelines;
|
||||
|
||||
|
@ -394,9 +409,10 @@ private:
|
|||
|
||||
id<MTLCommandBuffer> m_render_cmdbuf = nil;
|
||||
id<MTLRenderCommandEncoder> m_render_encoder = nil;
|
||||
id<MTLComputeCommandEncoder> m_compute_encoder = nil;
|
||||
|
||||
u8 m_num_current_render_targets = 0;
|
||||
GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
|
||||
GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
|
||||
std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
|
||||
MetalTexture* m_current_depth_target = nullptr;
|
||||
|
||||
|
|
|
@ -77,7 +77,8 @@ static void LogNSError(NSError* error, std::string_view message)
|
|||
{
|
||||
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, message);
|
||||
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Code: {}", static_cast<u32>(error.code));
|
||||
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}", [error.description UTF8String]);
|
||||
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}",
|
||||
[error.description UTF8String]);
|
||||
}
|
||||
|
||||
static GPUTexture::Format GetTextureFormatForMTLFormat(MTLPixelFormat fmt)
|
||||
|
@ -503,28 +504,6 @@ id<MTLFunction> MetalDevice::GetFunctionFromLibrary(id<MTLLibrary> library, NSSt
|
|||
return function;
|
||||
}
|
||||
|
||||
id<MTLComputePipelineState> MetalDevice::CreateComputePipeline(id<MTLFunction> function, NSString* name)
|
||||
{
|
||||
MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new];
|
||||
if (name != nil)
|
||||
[desc setLabel:name];
|
||||
[desc setComputeFunction:function];
|
||||
|
||||
NSError* err = nil;
|
||||
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
|
||||
options:MTLPipelineOptionNone
|
||||
reflection:nil
|
||||
error:&err];
|
||||
[desc release];
|
||||
if (pipeline == nil)
|
||||
{
|
||||
LogNSError(err, "Create compute pipeline failed:");
|
||||
return nil;
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void MetalDevice::DestroyDevice()
|
||||
{
|
||||
WaitForPreviousCommandBuffers();
|
||||
|
@ -564,11 +543,6 @@ void MetalDevice::DestroyDevice()
|
|||
[it.second release];
|
||||
}
|
||||
m_depth_states.clear();
|
||||
for (auto& it : m_resolve_pipelines)
|
||||
{
|
||||
if (it.second != nil)
|
||||
[it.second release];
|
||||
}
|
||||
m_resolve_pipelines.clear();
|
||||
for (auto& it : m_clear_pipelines)
|
||||
{
|
||||
|
@ -755,7 +729,7 @@ std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage st
|
|||
return CreateShaderFromMSL(stage, source, entry_point, error);
|
||||
}
|
||||
|
||||
MetalPipeline::MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
|
||||
MetalPipeline::MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
|
||||
MTLPrimitiveType primitive)
|
||||
: m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive)
|
||||
{
|
||||
|
@ -982,6 +956,29 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
|
|||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
@autoreleasepool
|
||||
{
|
||||
MTLComputePipelineDescriptor* desc = [[MTLComputePipelineDescriptor new] autorelease];
|
||||
[desc setComputeFunction:static_cast<MetalShader*>(config.compute_shader)->GetFunction()];
|
||||
|
||||
NSError* nserror = nil;
|
||||
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
|
||||
options:MTLPipelineOptionNone
|
||||
reflection:nil
|
||||
error:&nserror];
|
||||
if (pipeline == nil)
|
||||
{
|
||||
LogNSError(nserror, "Failed to create compute pipeline state");
|
||||
CocoaTools::NSErrorToErrorObject(error, "newComputePipelineStateWithDescriptor failed: ", nserror);
|
||||
return {};
|
||||
}
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, nil, MTLCullModeNone, MTLPrimitiveTypePoint));
|
||||
}
|
||||
}
|
||||
|
||||
MetalTexture::MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
|
||||
Format format)
|
||||
: GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture)
|
||||
|
@ -1559,14 +1556,14 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
|
|||
|
||||
const GPUTexture::Format src_format = dst->GetFormat();
|
||||
const GPUTexture::Format dst_format = dst->GetFormat();
|
||||
id<MTLComputePipelineState> resolve_pipeline = nil;
|
||||
GPUPipeline* resolve_pipeline;
|
||||
if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(),
|
||||
[src_format, dst_format](const auto& it) {
|
||||
return it.first.first == src_format && it.first.second == dst_format;
|
||||
});
|
||||
iter != m_resolve_pipelines.end())
|
||||
{
|
||||
resolve_pipeline = iter->second;
|
||||
resolve_pipeline = iter->second.get();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1579,32 +1576,41 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
|
|||
if (function == nil)
|
||||
Panic("Failed to get resolve kernel");
|
||||
|
||||
resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease];
|
||||
if (resolve_pipeline != nil)
|
||||
[resolve_pipeline retain];
|
||||
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline);
|
||||
MetalShader temp_shader(GPUShaderStage::Compute, m_shaders, function);
|
||||
GPUPipeline::ComputeConfig config;
|
||||
config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants;
|
||||
config.compute_shader = &temp_shader;
|
||||
|
||||
std::unique_ptr<GPUPipeline> pipeline = CreatePipeline(config, nullptr);
|
||||
if (!pipeline)
|
||||
Panic("Failed to create resolve pipeline");
|
||||
|
||||
GL_OBJECT_NAME(pipeline, is_depth ? "Depth Resolve" : "Color Resolve");
|
||||
resolve_pipeline =
|
||||
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), std::move(pipeline)).second.get();
|
||||
}
|
||||
}
|
||||
if (resolve_pipeline == nil)
|
||||
Panic("Failed to get resolve pipeline");
|
||||
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
s_stats.num_copies++;
|
||||
|
||||
const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth;
|
||||
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1);
|
||||
const id<MTLComputePipelineState> mtl_pipeline =
|
||||
static_cast<MetalPipeline*>(resolve_pipeline)->GetComputePipelineState();
|
||||
const u32 threadgroupHeight = mtl_pipeline.maxTotalThreadsPerThreadgroup / mtl_pipeline.threadExecutionWidth;
|
||||
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(mtl_pipeline.threadExecutionWidth, threadgroupHeight, 1);
|
||||
const MTLSize threadgroupsInGrid =
|
||||
MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width,
|
||||
(src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1);
|
||||
|
||||
id<MTLComputeCommandEncoder> computeEncoder = [m_render_cmdbuf computeCommandEncoder];
|
||||
[computeEncoder setComputePipelineState:resolve_pipeline];
|
||||
[computeEncoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
|
||||
[computeEncoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
|
||||
[computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
|
||||
[computeEncoder endEncoding];
|
||||
// Set up manually to not disturb state.
|
||||
BeginComputePass();
|
||||
[m_compute_encoder setComputePipelineState:mtl_pipeline];
|
||||
[m_compute_encoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
|
||||
[m_compute_encoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
|
||||
[m_compute_encoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
|
||||
EndComputePass();
|
||||
}
|
||||
|
||||
void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)
|
||||
|
@ -1645,7 +1651,7 @@ void MetalDevice::ClearDepth(GPUTexture* t, float d)
|
|||
|
||||
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
|
||||
if (m_current_pipeline)
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||
if (m_current_cull_mode != MTLCullModeNone)
|
||||
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||
if (depth != m_current_depth_state)
|
||||
|
@ -1674,6 +1680,8 @@ void MetalDevice::CommitClear(MetalTexture* tex)
|
|||
// TODO: We could combine it with the current render pass.
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
else if (InComputePass())
|
||||
EndComputePass();
|
||||
|
||||
@autoreleasepool
|
||||
{
|
||||
|
@ -1896,11 +1904,13 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
|
|||
}
|
||||
|
||||
void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
|
||||
GPUPipeline::RenderPassFlag feedback_loop)
|
||||
GPUPipeline::RenderPassFlag flags)
|
||||
{
|
||||
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
|
||||
(!m_features.framebuffer_fetch && ((feedback_loop & GPUPipeline::ColorFeedbackLoop) !=
|
||||
(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))));
|
||||
((flags & GPUPipeline::BindRenderTargetsAsImages) !=
|
||||
(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) ||
|
||||
(!m_features.framebuffer_fetch && ((flags & GPUPipeline::ColorFeedbackLoop) !=
|
||||
(m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))));
|
||||
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
|
||||
bool needs_rt_clear = false;
|
||||
|
||||
|
@ -1915,12 +1925,19 @@ void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
|
|||
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
|
||||
m_current_render_targets[i] = nullptr;
|
||||
m_num_current_render_targets = static_cast<u8>(num_rts);
|
||||
m_current_feedback_loop = feedback_loop;
|
||||
m_current_render_pass_flags = flags;
|
||||
|
||||
if (changed || needs_rt_clear || needs_ds_clear)
|
||||
{
|
||||
if (InRenderPass())
|
||||
{
|
||||
EndRenderPass();
|
||||
}
|
||||
else if (InComputePass() && (flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
|
||||
{
|
||||
CommitRenderTargetClears();
|
||||
BindRenderTargetsAsComputeImages();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1931,26 +1948,34 @@ void MetalDevice::SetPipeline(GPUPipeline* pipeline)
|
|||
return;
|
||||
|
||||
m_current_pipeline = static_cast<MetalPipeline*>(pipeline);
|
||||
if (InRenderPass())
|
||||
if (!m_current_pipeline->IsComputePipeline())
|
||||
{
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
|
||||
if (InRenderPass())
|
||||
{
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||
|
||||
if (m_current_depth_state != m_current_pipeline->GetDepthState())
|
||||
{
|
||||
m_current_depth_state = m_current_pipeline->GetDepthState();
|
||||
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
||||
if (m_current_depth_state != m_current_pipeline->GetDepthState())
|
||||
{
|
||||
m_current_depth_state = m_current_pipeline->GetDepthState();
|
||||
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
||||
}
|
||||
if (m_current_cull_mode != m_current_pipeline->GetCullMode())
|
||||
{
|
||||
m_current_cull_mode = m_current_pipeline->GetCullMode();
|
||||
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||
}
|
||||
}
|
||||
if (m_current_cull_mode != m_current_pipeline->GetCullMode())
|
||||
else
|
||||
{
|
||||
// Still need to set depth state before the draw begins.
|
||||
m_current_depth_state = m_current_pipeline->GetDepthState();
|
||||
m_current_cull_mode = m_current_pipeline->GetCullMode();
|
||||
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Still need to set depth state before the draw begins.
|
||||
m_current_depth_state = m_current_pipeline->GetDepthState();
|
||||
m_current_cull_mode = m_current_pipeline->GetCullMode();
|
||||
if (InComputePass())
|
||||
[m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1979,6 +2004,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
|||
m_current_textures[slot] = T;
|
||||
if (InRenderPass())
|
||||
[m_render_encoder setFragmentTexture:T atIndex:slot];
|
||||
else if (InComputePass())
|
||||
[m_compute_encoder setTexture:T atIndex:slot];
|
||||
}
|
||||
|
||||
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
|
||||
|
@ -1987,6 +2014,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
|||
m_current_samplers[slot] = S;
|
||||
if (InRenderPass())
|
||||
[m_render_encoder setFragmentSamplerState:S atIndex:slot];
|
||||
else if (InComputePass())
|
||||
[m_compute_encoder setTexture:T atIndex:slot];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2011,6 +2040,8 @@ void MetalDevice::UnbindTexture(MetalTexture* tex)
|
|||
m_current_textures[i] = nil;
|
||||
if (InRenderPass())
|
||||
[m_render_encoder setFragmentTexture:nil atIndex:i];
|
||||
else if (InComputePass())
|
||||
[m_compute_encoder setTexture:nil atIndex:0];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2070,7 +2101,7 @@ void MetalDevice::SetScissor(const GSVector4i rc)
|
|||
|
||||
void MetalDevice::BeginRenderPass()
|
||||
{
|
||||
DebugAssert(m_render_encoder == nil);
|
||||
DebugAssert(m_render_encoder == nil && !InComputePass());
|
||||
|
||||
// Inline writes :(
|
||||
if (m_inline_upload_encoder != nil)
|
||||
|
@ -2180,12 +2211,57 @@ void MetalDevice::BeginRenderPass()
|
|||
|
||||
void MetalDevice::EndRenderPass()
|
||||
{
|
||||
DebugAssert(InRenderPass() && !IsInlineUploading());
|
||||
DebugAssert(InRenderPass() && !IsInlineUploading() && !InComputePass());
|
||||
[m_render_encoder endEncoding];
|
||||
[m_render_encoder release];
|
||||
m_render_encoder = nil;
|
||||
}
|
||||
|
||||
void MetalDevice::BeginComputePass()
|
||||
{
|
||||
DebugAssert(!InRenderPass() && !IsInlineUploading() && !InComputePass());
|
||||
|
||||
if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
|
||||
CommitRenderTargetClears();
|
||||
|
||||
m_compute_encoder = [[m_render_cmdbuf computeCommandEncoder] retain];
|
||||
[m_compute_encoder setTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||
[m_compute_encoder setSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||
|
||||
if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
|
||||
BindRenderTargetsAsComputeImages();
|
||||
|
||||
if (m_current_pipeline && m_current_pipeline->IsComputePipeline())
|
||||
[m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()];
|
||||
}
|
||||
|
||||
void MetalDevice::CommitRenderTargetClears()
|
||||
{
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
{
|
||||
MetalTexture* rt = m_current_render_targets[i];
|
||||
if (rt->GetState() == GPUTexture::State::Invalidated)
|
||||
rt->SetState(GPUTexture::State::Dirty);
|
||||
else if (rt->GetState() == GPUTexture::State::Cleared)
|
||||
CommitClear(rt);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::BindRenderTargetsAsComputeImages()
|
||||
{
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
[m_compute_encoder setTexture:m_current_render_targets[i]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS + i];
|
||||
}
|
||||
|
||||
void MetalDevice::EndComputePass()
|
||||
{
|
||||
DebugAssert(InComputePass());
|
||||
|
||||
[m_compute_encoder endEncoding];
|
||||
[m_compute_encoder release];
|
||||
m_compute_encoder = nil;
|
||||
}
|
||||
|
||||
void MetalDevice::EndInlineUploading()
|
||||
{
|
||||
DebugAssert(IsInlineUploading() && !InRenderPass());
|
||||
|
@ -2198,6 +2274,8 @@ void MetalDevice::EndAnyEncoding()
|
|||
{
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
else if (InComputePass())
|
||||
EndComputePass();
|
||||
else if (IsInlineUploading())
|
||||
EndInlineUploading();
|
||||
}
|
||||
|
@ -2213,14 +2291,14 @@ void MetalDevice::SetInitialEncoderState()
|
|||
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||
if (m_current_depth_state != nil)
|
||||
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
||||
if (m_current_pipeline != nil)
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
|
||||
if (m_current_pipeline && m_current_pipeline->IsRenderPipeline())
|
||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||
[m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||
[m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||
if (m_current_ssbo)
|
||||
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
|
||||
|
||||
if (!m_features.framebuffer_fetch && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))
|
||||
if (!m_features.framebuffer_fetch && (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))
|
||||
{
|
||||
DebugAssert(m_current_render_targets[0]);
|
||||
[m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS];
|
||||
|
@ -2249,7 +2327,12 @@ void MetalDevice::SetScissorInRenderEncoder()
|
|||
void MetalDevice::PreDrawCheck()
|
||||
{
|
||||
if (!InRenderPass())
|
||||
{
|
||||
if (InComputePass())
|
||||
EndComputePass();
|
||||
|
||||
BeginRenderPass();
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDevice::Draw(u32 vertex_count, u32 base_vertex)
|
||||
|
@ -2392,6 +2475,25 @@ void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
|||
}
|
||||
}
|
||||
|
||||
void MetalDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z)
|
||||
{
|
||||
if (!InComputePass())
|
||||
{
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
BeginComputePass();
|
||||
}
|
||||
|
||||
DebugAssert(m_current_pipeline && m_current_pipeline->IsComputePipeline());
|
||||
id<MTLComputePipelineState> pipeline = m_current_pipeline->GetComputePipelineState();
|
||||
|
||||
// TODO: We could remap to the optimal group size..
|
||||
[m_compute_encoder dispatchThreads:MTLSizeMake(threads_x, threads_y, threads_z)
|
||||
threadsPerThreadgroup:MTLSizeMake(group_size_x, group_size_y, group_size_z)];
|
||||
}
|
||||
|
||||
id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline)
|
||||
{
|
||||
@autoreleasepool
|
||||
|
@ -2450,7 +2552,7 @@ GPUDevice::PresentResult MetalDevice::BeginPresent(GPUSwapChain* swap_chain, u32
|
|||
s_stats.num_render_passes++;
|
||||
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
|
||||
m_num_current_render_targets = 0;
|
||||
m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
|
||||
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
|
||||
m_current_depth_target = nullptr;
|
||||
m_current_pipeline = nullptr;
|
||||
m_current_depth_state = nil;
|
||||
|
|
|
@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
|
|||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
ERROR_LOG("Compute shaders are not yet supported.");
|
||||
return {};
|
||||
}
|
||||
|
||||
void OpenGLDevice::PushDebugGroup(const char* name)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
|
@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
|
|||
|
||||
m_features.geometry_shaders =
|
||||
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
|
||||
m_features.compute_shaders = false;
|
||||
|
||||
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
|
||||
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
|
||||
|
@ -1078,6 +1085,12 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
|
|||
Panic("Barriers are not supported");
|
||||
}
|
||||
|
||||
void OpenGLDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z)
|
||||
{
|
||||
Panic("Compute shaders are not supported");
|
||||
}
|
||||
|
||||
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
|
||||
u32* map_base_vertex)
|
||||
{
|
||||
|
|
|
@ -77,6 +77,7 @@ public:
|
|||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
|
@ -100,6 +101,8 @@ public:
|
|||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z) override;
|
||||
|
||||
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
|
||||
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;
|
||||
|
|
|
@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear()
|
|||
m_smap_constants = {};
|
||||
}
|
||||
|
||||
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/,
|
||||
bool clear /*= true*/)
|
||||
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear,
|
||||
Error* error)
|
||||
{
|
||||
VkPipeline pipeline;
|
||||
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
|
||||
SetErrorObject(error, "vkCreateComputePipelines() failed: ", res);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
|
|
|
@ -197,7 +197,7 @@ public:
|
|||
|
||||
void Clear();
|
||||
|
||||
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
|
||||
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error);
|
||||
|
||||
void SetShader(VkShaderModule module, const char* entry_point);
|
||||
|
||||
|
|
|
@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
|
|||
WARNING_LOG("Emulating texture buffers with SSBOs.");
|
||||
|
||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
|
||||
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
|
||||
|
||||
m_features.partial_msaa_resolve = true;
|
||||
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
|
||||
|
@ -2802,7 +2803,8 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
}
|
||||
|
||||
{
|
||||
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
|
||||
|
@ -2822,7 +2824,8 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
if (m_optional_extensions.vk_khr_push_descriptor)
|
||||
dslb.SetPushFlag();
|
||||
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
|
||||
|
@ -2837,14 +2840,13 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
|
||||
}
|
||||
|
||||
if (m_features.raster_order_views)
|
||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
}
|
||||
if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout");
|
||||
|
||||
for (u32 type = 0; type < 3; type++)
|
||||
{
|
||||
|
@ -2860,7 +2862,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
|
||||
|
@ -2873,7 +2875,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
@ -2887,7 +2889,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
@ -2901,7 +2903,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
|
||||
|
@ -2915,13 +2917,24 @@ bool VulkanDevice::CreatePipelineLayouts()
|
|||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
VkPipelineLayout& pl =
|
||||
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
|
||||
plb.AddDescriptorSet(m_single_texture_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2942,7 +2955,7 @@ void VulkanDevice::DestroyPipelineLayouts()
|
|||
l = VK_NULL_HANDLE;
|
||||
}
|
||||
};
|
||||
destroy_dsl(m_rov_ds_layout);
|
||||
destroy_dsl(m_image_ds_layout);
|
||||
destroy_dsl(m_feedback_loop_ds_layout);
|
||||
destroy_dsl(m_multi_texture_ds_layout);
|
||||
destroy_dsl(m_single_texture_buffer_ds_layout);
|
||||
|
@ -3674,12 +3687,56 @@ void VulkanDevice::PreDrawCheck()
|
|||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::PreDispatchCheck()
|
||||
{
|
||||
// All textures should be in shader read only optimal already, but just in case..
|
||||
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
|
||||
for (u32 i = 0; i < num_textures; i++)
|
||||
{
|
||||
if (m_current_textures[i])
|
||||
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
|
||||
}
|
||||
|
||||
// Binding as image, but we still need to clear it.
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
{
|
||||
VulkanTexture* rt = m_current_render_targets[i];
|
||||
if (rt->GetState() == GPUTexture::State::Cleared)
|
||||
rt->CommitClear(m_current_command_buffer);
|
||||
rt->SetState(GPUTexture::State::Dirty);
|
||||
rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
|
||||
rt->SetUseFenceCounter(GetCurrentFenceCounter());
|
||||
}
|
||||
|
||||
// If this is a new command buffer, bind the pipeline and such.
|
||||
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
|
||||
SetInitialPipelineState();
|
||||
|
||||
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
|
||||
const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
|
||||
const u32 dirty = m_dirty_flags & update_mask;
|
||||
m_dirty_flags = m_dirty_flags & ~update_mask;
|
||||
|
||||
if (dirty != 0)
|
||||
{
|
||||
if (!UpdateDescriptorSets(dirty))
|
||||
{
|
||||
SubmitCommandBuffer(false, "out of descriptor sets");
|
||||
PreDispatchCheck();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<GPUPipeline::Layout layout>
|
||||
bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||
{
|
||||
[[maybe_unused]] bool new_dynamic_offsets = false;
|
||||
|
||||
VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
|
||||
constexpr VkPipelineBindPoint vk_bind_point =
|
||||
((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS :
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout();
|
||||
std::array<VkDescriptorSet, 3> ds;
|
||||
u32 first_ds = 0;
|
||||
u32 num_ds = 0;
|
||||
|
@ -3700,7 +3757,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
|||
}
|
||||
|
||||
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
|
||||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
|
||||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
|
||||
layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
{
|
||||
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
|
||||
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
|
||||
|
@ -3727,7 +3785,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
|||
}
|
||||
|
||||
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
|
||||
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
|
||||
dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set);
|
||||
if (num_ds == 0)
|
||||
return true;
|
||||
}
|
||||
|
@ -3757,7 +3815,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
|||
{
|
||||
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||
{
|
||||
VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
|
||||
VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout);
|
||||
if (ids == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
|
@ -3792,8 +3850,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
|||
}
|
||||
|
||||
DebugAssert(num_ds > 0);
|
||||
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
|
||||
num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
|
||||
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(),
|
||||
static_cast<u32>(new_dynamic_offsets),
|
||||
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
|
||||
|
||||
return true;
|
||||
|
@ -3818,6 +3876,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
|
|||
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
||||
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
||||
|
||||
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
|
||||
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
}
|
||||
|
@ -3911,3 +3972,15 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
|
|||
DefaultCaseIsUnreachable();
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z)
|
||||
{
|
||||
PreDispatchCheck();
|
||||
s_stats.num_draws++;
|
||||
|
||||
const u32 groups_x = threads_x / group_size_x;
|
||||
const u32 groups_y = threads_y / group_size_y;
|
||||
const u32 groups_z = threads_z / group_size_z;
|
||||
vkCmdDispatch(GetCurrentCommandBuffer(), groups_x, groups_y, groups_z);
|
||||
}
|
||||
|
|
|
@ -113,6 +113,7 @@ public:
|
|||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
|
@ -136,6 +137,8 @@ public:
|
|||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||
u32 group_size_z) override;
|
||||
|
||||
bool SetGPUTimingEnabled(bool enabled) override;
|
||||
float GetAndResetAccumulatedGPUTime() override;
|
||||
|
@ -373,6 +376,7 @@ private:
|
|||
VkPipelineLayout GetCurrentVkPipelineLayout() const;
|
||||
void SetInitialPipelineState();
|
||||
void PreDrawCheck();
|
||||
void PreDispatchCheck();
|
||||
|
||||
template<GPUPipeline::Layout layout>
|
||||
bool UpdateDescriptorSetsForLayout(u32 dirty);
|
||||
|
@ -435,7 +439,7 @@ private:
|
|||
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE;
|
||||
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
|
||||
static_cast<size_t>(PipelineLayoutType::MaxCount)>
|
||||
m_pipeline_layouts = {};
|
||||
|
|
|
@ -275,3 +275,16 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
|||
return std::unique_ptr<GPUPipeline>(
|
||||
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
Vulkan::ComputePipelineBuilder cpb;
|
||||
cpb.SetShader(static_cast<const VulkanShader*>(config.compute_shader)->GetModule(), "main");
|
||||
cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast<size_t>(config.layout)]);
|
||||
|
||||
const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error);
|
||||
if (!pipeline)
|
||||
return {};
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue