GPUDevice: Add compute shader support
This commit is contained in:
parent
affbdfc350
commit
e647192437
|
@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
|
||||||
m_features.texture_buffers_emulated_with_ssbo = false;
|
m_features.texture_buffers_emulated_with_ssbo = false;
|
||||||
m_features.feedback_loops = false;
|
m_features.feedback_loops = false;
|
||||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
||||||
|
m_features.compute_shaders =
|
||||||
|
(!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0);
|
||||||
m_features.partial_msaa_resolve = false;
|
m_features.partial_msaa_resolve = false;
|
||||||
m_features.memory_import = false;
|
m_features.memory_import = false;
|
||||||
m_features.explicit_present = false;
|
m_features.explicit_present = false;
|
||||||
|
@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
|
||||||
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
||||||
s_stats.buffer_streamed += data_size;
|
s_stats.buffer_streamed += data_size;
|
||||||
|
|
||||||
if (m_uniform_buffer.IsUsingMapNoOverwrite())
|
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
|
||||||
{
|
|
||||||
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
|
|
||||||
const UINT num_constants = req_size / 16u;
|
|
||||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
|
||||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
DebugAssert(res.index_aligned == 0);
|
|
||||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
|
||||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void* D3D11Device::MapUniformBuffer(u32 size)
|
void* D3D11Device::MapUniformBuffer(u32 size)
|
||||||
|
@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
|
||||||
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
||||||
s_stats.buffer_streamed += size;
|
s_stats.buffer_streamed += size;
|
||||||
|
|
||||||
|
BindUniformBuffer(pos, req_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D11Device::BindUniformBuffer(u32 offset, u32 size)
|
||||||
|
{
|
||||||
if (m_uniform_buffer.IsUsingMapNoOverwrite())
|
if (m_uniform_buffer.IsUsingMapNoOverwrite())
|
||||||
{
|
{
|
||||||
const UINT first_constant = pos / 16u;
|
const UINT first_constant = offset / 16u;
|
||||||
const UINT num_constants = req_size / 16u;
|
const UINT num_constants = size / 16u;
|
||||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
if (m_current_compute_shader)
|
||||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
{
|
||||||
|
m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||||
|
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DebugAssert(pos == 0);
|
DebugAssert(offset == 0);
|
||||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
if (m_current_compute_shader)
|
||||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
{
|
||||||
|
m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||||
|
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
|
||||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||||
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
|
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
|
||||||
|
|
||||||
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
|
if (!m_current_compute_shader)
|
||||||
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
|
{
|
||||||
m_num_current_render_targets, uavs.data(), nullptr);
|
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
|
||||||
|
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
|
||||||
|
m_num_current_render_targets, uavs.data(), nullptr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
||||||
{
|
{
|
||||||
m_current_textures[slot] = T;
|
m_current_textures[slot] = T;
|
||||||
m_context->PSSetShaderResources(slot, 1, &T);
|
m_context->PSSetShaderResources(slot, 1, &T);
|
||||||
|
if (m_current_compute_shader)
|
||||||
|
m_context->CSSetShaderResources(slot, 1, &T);
|
||||||
}
|
}
|
||||||
if (m_current_samplers[slot] != S)
|
if (m_current_samplers[slot] != S)
|
||||||
{
|
{
|
||||||
m_current_samplers[slot] = S;
|
m_current_samplers[slot] = S;
|
||||||
m_context->PSSetSamplers(slot, 1, &S);
|
m_context->PSSetSamplers(slot, 1, &S);
|
||||||
|
if (m_current_compute_shader)
|
||||||
|
m_context->CSSetSamplers(slot, 1, &S);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
|
||||||
if (m_current_textures[slot] != B)
|
if (m_current_textures[slot] != B)
|
||||||
{
|
{
|
||||||
m_current_textures[slot] = B;
|
m_current_textures[slot] = B;
|
||||||
|
|
||||||
|
// Compute doesn't support texture buffers, yet...
|
||||||
m_context->PSSetShaderResources(slot, 1, &B);
|
m_context->PSSetShaderResources(slot, 1, &B);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc)
|
||||||
|
|
||||||
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
|
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
|
||||||
{
|
{
|
||||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
|
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
|
||||||
s_stats.num_draws++;
|
s_stats.num_draws++;
|
||||||
m_context->Draw(vertex_count, base_vertex);
|
m_context->Draw(vertex_count, base_vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
|
void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
|
||||||
{
|
{
|
||||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
|
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
|
||||||
s_stats.num_draws++;
|
s_stats.num_draws++;
|
||||||
m_context->DrawIndexed(index_count, base_index, base_vertex);
|
m_context->DrawIndexed(index_count, base_index, base_vertex);
|
||||||
}
|
}
|
||||||
|
@ -1129,3 +1151,15 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
||||||
{
|
{
|
||||||
Panic("Barriers are not supported");
|
Panic("Barriers are not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D11Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z)
|
||||||
|
{
|
||||||
|
DebugAssert(m_current_compute_shader);
|
||||||
|
s_stats.num_draws++;
|
||||||
|
|
||||||
|
const u32 groups_x = threads_x / group_size_x;
|
||||||
|
const u32 groups_y = threads_y / group_size_y;
|
||||||
|
const u32 groups_z = threads_z / group_size_z;
|
||||||
|
m_context->Dispatch(groups_x, groups_y, groups_z);
|
||||||
|
}
|
||||||
|
|
|
@ -75,6 +75,7 @@ public:
|
||||||
std::string_view source, const char* entry_point,
|
std::string_view source, const char* entry_point,
|
||||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||||
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||||
|
|
||||||
void PushDebugGroup(const char* name) override;
|
void PushDebugGroup(const char* name) override;
|
||||||
void PopDebugGroup() override;
|
void PopDebugGroup() override;
|
||||||
|
@ -98,6 +99,8 @@ public:
|
||||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||||
|
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z) override;
|
||||||
|
|
||||||
bool SetGPUTimingEnabled(bool enabled) override;
|
bool SetGPUTimingEnabled(bool enabled) override;
|
||||||
float GetAndResetAccumulatedGPUTime() override;
|
float GetAndResetAccumulatedGPUTime() override;
|
||||||
|
@ -140,6 +143,8 @@ private:
|
||||||
|
|
||||||
bool CreateBuffers();
|
bool CreateBuffers();
|
||||||
void DestroyBuffers();
|
void DestroyBuffers();
|
||||||
|
void BindUniformBuffer(u32 offset, u32 size);
|
||||||
|
void UnbindComputePipeline();
|
||||||
|
|
||||||
bool IsRenderTargetBound(const D3D11Texture* tex) const;
|
bool IsRenderTargetBound(const D3D11Texture* tex) const;
|
||||||
|
|
||||||
|
@ -180,6 +185,7 @@ private:
|
||||||
ID3D11VertexShader* m_current_vertex_shader = nullptr;
|
ID3D11VertexShader* m_current_vertex_shader = nullptr;
|
||||||
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
|
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
|
||||||
ID3D11PixelShader* m_current_pixel_shader = nullptr;
|
ID3D11PixelShader* m_current_pixel_shader = nullptr;
|
||||||
|
ID3D11ComputeShader* m_current_compute_shader = nullptr;
|
||||||
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
|
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
|
||||||
ID3D11DepthStencilState* m_current_depth_state = nullptr;
|
ID3D11DepthStencilState* m_current_depth_state = nullptr;
|
||||||
ID3D11BlendState* m_current_blend_state = nullptr;
|
ID3D11BlendState* m_current_blend_state = nullptr;
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include "d3d11_pipeline.h"
|
#include "d3d11_pipeline.h"
|
||||||
#include "d3d11_device.h"
|
#include "d3d11_device.h"
|
||||||
|
#include "d3d11_texture.h"
|
||||||
#include "d3d_common.h"
|
#include "d3d_common.h"
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
@ -121,10 +122,10 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromSource(GPUShaderStage st
|
||||||
|
|
||||||
D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
|
D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
|
||||||
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
|
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
|
||||||
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11PixelShader> ps,
|
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11DeviceChild> ps_or_cs,
|
||||||
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
|
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
|
||||||
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
|
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
|
||||||
m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride),
|
m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride),
|
||||||
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
|
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error)
|
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts,
|
||||||
|
Error* error)
|
||||||
{
|
{
|
||||||
ComPtr<ID3D11BlendState> dbs;
|
ComPtr<ID3D11BlendState> dbs;
|
||||||
|
|
||||||
|
@ -365,69 +367,124 @@ std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::Grap
|
||||||
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
|
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||||
|
{
|
||||||
|
if (!config.compute_shader) [[unlikely]]
|
||||||
|
{
|
||||||
|
Error::SetStringView(error, "Missing compute shader.");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::unique_ptr<GPUPipeline>(
|
||||||
|
new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||||
|
static_cast<const D3D11Shader*>(config.compute_shader)->GetComputeShader(),
|
||||||
|
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
void D3D11Device::SetPipeline(GPUPipeline* pipeline)
|
void D3D11Device::SetPipeline(GPUPipeline* pipeline)
|
||||||
{
|
{
|
||||||
if (m_current_pipeline == pipeline)
|
if (m_current_pipeline == pipeline)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline();
|
||||||
D3D11Pipeline* const PL = static_cast<D3D11Pipeline*>(pipeline);
|
D3D11Pipeline* const PL = static_cast<D3D11Pipeline*>(pipeline);
|
||||||
m_current_pipeline = PL;
|
m_current_pipeline = PL;
|
||||||
|
|
||||||
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
|
if (!PL->IsComputePipeline())
|
||||||
{
|
{
|
||||||
m_current_input_layout = il;
|
if (was_compute)
|
||||||
m_context->IASetInputLayout(il);
|
UnbindComputePipeline();
|
||||||
}
|
|
||||||
|
|
||||||
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
|
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
|
||||||
{
|
{
|
||||||
const UINT offset = 0;
|
m_current_input_layout = il;
|
||||||
m_current_vertex_stride = PL->GetVertexStride();
|
m_context->IASetInputLayout(il);
|
||||||
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
|
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
|
||||||
{
|
{
|
||||||
m_current_primitive_topology = topology;
|
const UINT offset = 0;
|
||||||
m_context->IASetPrimitiveTopology(topology);
|
m_current_vertex_stride = PL->GetVertexStride();
|
||||||
}
|
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
|
||||||
|
}
|
||||||
|
|
||||||
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
|
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
|
||||||
{
|
{
|
||||||
m_current_vertex_shader = vs;
|
m_current_primitive_topology = topology;
|
||||||
m_context->VSSetShader(vs, nullptr, 0);
|
m_context->IASetPrimitiveTopology(topology);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
|
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
|
||||||
{
|
{
|
||||||
m_current_geometry_shader = gs;
|
m_current_vertex_shader = vs;
|
||||||
m_context->GSSetShader(gs, nullptr, 0);
|
m_context->VSSetShader(vs, nullptr, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
|
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
|
||||||
{
|
{
|
||||||
m_current_pixel_shader = ps;
|
m_current_geometry_shader = gs;
|
||||||
m_context->PSSetShader(ps, nullptr, 0);
|
m_context->GSSetShader(gs, nullptr, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
|
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
|
||||||
{
|
{
|
||||||
m_current_rasterizer_state = rs;
|
m_current_pixel_shader = ps;
|
||||||
m_context->RSSetState(rs);
|
m_context->PSSetShader(ps, nullptr, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
|
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
|
||||||
{
|
{
|
||||||
m_current_depth_state = ds;
|
m_current_rasterizer_state = rs;
|
||||||
m_context->OMSetDepthStencilState(ds, 0);
|
m_context->RSSetState(rs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ID3D11BlendState* bs = PL->GetBlendState();
|
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
|
||||||
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
|
{
|
||||||
|
m_current_depth_state = ds;
|
||||||
|
m_context->OMSetDepthStencilState(ds, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ID3D11BlendState* bs = PL->GetBlendState();
|
||||||
|
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
|
||||||
|
{
|
||||||
|
m_current_blend_state = bs;
|
||||||
|
m_current_blend_factor = PL->GetBlendFactor();
|
||||||
|
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
m_current_blend_state = bs;
|
if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader)
|
||||||
m_current_blend_factor = PL->GetBlendFactor();
|
{
|
||||||
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
|
m_current_compute_shader = cs;
|
||||||
|
m_context->CSSetShader(cs, nullptr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!was_compute)
|
||||||
|
{
|
||||||
|
// need to bind all SRVs/samplers
|
||||||
|
u32 count;
|
||||||
|
for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++)
|
||||||
|
{
|
||||||
|
if (!m_current_textures[count])
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (count > 0)
|
||||||
|
{
|
||||||
|
m_context->CSSetShaderResources(0, count, m_current_textures.data());
|
||||||
|
m_context->CSSetSamplers(0, count, m_current_samplers.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||||
|
{
|
||||||
|
ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS];
|
||||||
|
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||||
|
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
|
||||||
|
|
||||||
|
m_context->OMSetRenderTargets(0, nullptr, nullptr);
|
||||||
|
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl)
|
||||||
if (m_current_pipeline != pl)
|
if (m_current_pipeline != pl)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (pl->IsComputePipeline())
|
||||||
|
UnbindComputePipeline();
|
||||||
|
|
||||||
// Let the runtime deal with the dead objects...
|
// Let the runtime deal with the dead objects...
|
||||||
m_current_pipeline = nullptr;
|
m_current_pipeline = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D11Device::UnbindComputePipeline()
|
||||||
|
{
|
||||||
|
m_current_compute_shader = nullptr;
|
||||||
|
|
||||||
|
ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {};
|
||||||
|
ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {};
|
||||||
|
ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {};
|
||||||
|
m_context->CSSetShader(nullptr, nullptr, 0);
|
||||||
|
m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs);
|
||||||
|
m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers);
|
||||||
|
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||||
|
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr);
|
||||||
|
}
|
||||||
|
|
|
@ -51,13 +51,18 @@ public:
|
||||||
|
|
||||||
void SetDebugName(std::string_view name) override;
|
void SetDebugName(std::string_view name) override;
|
||||||
|
|
||||||
|
ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; }
|
||||||
ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); }
|
ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); }
|
||||||
ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); }
|
ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); }
|
||||||
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
|
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
|
||||||
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
|
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
|
||||||
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
|
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
|
||||||
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
|
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
|
||||||
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); }
|
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast<ID3D11PixelShader*>(m_ps_or_cs.Get()); }
|
||||||
|
ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const
|
||||||
|
{
|
||||||
|
return static_cast<ID3D11ComputeShader*>(m_ps_or_cs.Get());
|
||||||
|
}
|
||||||
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
|
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
|
||||||
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
|
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
|
||||||
ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; }
|
ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; }
|
||||||
|
@ -66,7 +71,8 @@ public:
|
||||||
private:
|
private:
|
||||||
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
|
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
|
||||||
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
|
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
|
||||||
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);
|
ComPtr<ID3D11DeviceChild> ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride,
|
||||||
|
u32 blend_factor);
|
||||||
|
|
||||||
ComPtr<ID3D11RasterizerState> m_rs;
|
ComPtr<ID3D11RasterizerState> m_rs;
|
||||||
ComPtr<ID3D11DepthStencilState> m_ds;
|
ComPtr<ID3D11DepthStencilState> m_ds;
|
||||||
|
@ -74,7 +80,7 @@ private:
|
||||||
ComPtr<ID3D11InputLayout> m_il;
|
ComPtr<ID3D11InputLayout> m_il;
|
||||||
ComPtr<ID3D11VertexShader> m_vs;
|
ComPtr<ID3D11VertexShader> m_vs;
|
||||||
ComPtr<ID3D11GeometryShader> m_gs;
|
ComPtr<ID3D11GeometryShader> m_gs;
|
||||||
ComPtr<ID3D11PixelShader> m_ps;
|
ComPtr<ID3D11DeviceChild> m_ps_or_cs;
|
||||||
D3D11_PRIMITIVE_TOPOLOGY m_topology;
|
D3D11_PRIMITIVE_TOPOLOGY m_topology;
|
||||||
u32 m_vertex_stride;
|
u32 m_vertex_stride;
|
||||||
u32 m_blend_factor;
|
u32 m_blend_factor;
|
||||||
|
|
|
@ -115,6 +115,8 @@ public:
|
||||||
ComputePipelineBuilder();
|
ComputePipelineBuilder();
|
||||||
~ComputePipelineBuilder() = default;
|
~ComputePipelineBuilder() = default;
|
||||||
|
|
||||||
|
ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; }
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
Microsoft::WRL::ComPtr<ID3D12PipelineState> Create(ID3D12Device* device, Error* error, bool clear);
|
Microsoft::WRL::ComPtr<ID3D12PipelineState> Create(ID3D12Device* device, Error* error, bool clear);
|
||||||
|
|
|
@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
|
||||||
m_features.texture_buffers_emulated_with_ssbo = false;
|
m_features.texture_buffers_emulated_with_ssbo = false;
|
||||||
m_features.feedback_loops = false;
|
m_features.feedback_loops = false;
|
||||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
||||||
|
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
|
||||||
m_features.partial_msaa_resolve = true;
|
m_features.partial_msaa_resolve = true;
|
||||||
m_features.memory_import = false;
|
m_features.memory_import = false;
|
||||||
m_features.explicit_present = true;
|
m_features.explicit_present = true;
|
||||||
|
@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
|
||||||
1, // SingleTextureBufferAndPushConstants
|
1, // SingleTextureBufferAndPushConstants
|
||||||
0, // MultiTextureAndUBO
|
0, // MultiTextureAndUBO
|
||||||
2, // MultiTextureAndPushConstants
|
2, // MultiTextureAndPushConstants
|
||||||
|
2, // ComputeSingleTextureAndPushConstants
|
||||||
};
|
};
|
||||||
|
|
||||||
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
|
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||||
|
@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
|
||||||
|
|
||||||
const u32 push_param =
|
const u32 push_param =
|
||||||
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
|
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
|
||||||
GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||||
|
if (!IsUsingComputeRootSignature())
|
||||||
|
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||||
|
else
|
||||||
|
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void* D3D12Device::MapUniformBuffer(u32 size)
|
void* D3D12Device::MapUniformBuffer(u32 size)
|
||||||
|
@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
|
||||||
|
|
||||||
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
|
||||||
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
|
||||||
|
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
|
||||||
|
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||||
|
if (!(rs = rsb.Create(error, true)))
|
||||||
|
return false;
|
||||||
|
D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout");
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass()
|
||||||
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
rt->SetUseFenceValue(GetCurrentFenceValue());
|
rt->SetUseFenceValue(GetCurrentFenceValue());
|
||||||
rt->CommitClear(cmdlist);
|
rt->CommitClear(cmdlist);
|
||||||
|
rt->SetState(GPUTexture::State::Dirty);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (m_current_depth_target)
|
if (m_current_depth_target)
|
||||||
|
@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck()
|
||||||
BeginRenderPass();
|
BeginRenderPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D12Device::PreDispatchCheck()
|
||||||
|
{
|
||||||
|
if (InRenderPass())
|
||||||
|
EndRenderPass();
|
||||||
|
|
||||||
|
// Transition images.
|
||||||
|
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||||
|
|
||||||
|
// All textures should be in shader read only optimal already, but just in case..
|
||||||
|
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
|
||||||
|
for (u32 i = 0; i < num_textures; i++)
|
||||||
|
{
|
||||||
|
if (m_current_textures[i])
|
||||||
|
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
|
||||||
|
{
|
||||||
|
// Still need to clear the RTs.
|
||||||
|
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||||
|
{
|
||||||
|
D3D12Texture* const rt = m_current_render_targets[i];
|
||||||
|
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
|
rt->SetUseFenceValue(GetCurrentFenceValue());
|
||||||
|
rt->CommitClear(cmdlist);
|
||||||
|
rt->SetState(GPUTexture::State::Dirty);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is a new command buffer, bind the pipeline and such.
|
||||||
|
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
|
||||||
|
SetInitialPipelineState();
|
||||||
|
|
||||||
|
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
|
||||||
|
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
|
||||||
|
const u32 dirty = std::exchange(m_dirty_flags, 0);
|
||||||
|
if (dirty != 0)
|
||||||
|
{
|
||||||
|
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
|
||||||
|
{
|
||||||
|
UpdateRootSignature();
|
||||||
|
if (!UpdateRootParameters(dirty))
|
||||||
|
{
|
||||||
|
SubmitCommandList(false, "out of descriptors");
|
||||||
|
PreDispatchCheck();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
|
||||||
|
{
|
||||||
|
if (!UpdateRootParameters(dirty))
|
||||||
|
{
|
||||||
|
SubmitCommandList(false, "out of descriptors");
|
||||||
|
PreDispatchCheck();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool D3D12Device::IsUsingROVRootSignature() const
|
bool D3D12Device::IsUsingROVRootSignature() const
|
||||||
{
|
{
|
||||||
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
|
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool D3D12Device::IsUsingComputeRootSignature() const
|
||||||
|
{
|
||||||
|
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
|
||||||
|
}
|
||||||
|
|
||||||
void D3D12Device::UpdateRootSignature()
|
void D3D12Device::UpdateRootSignature()
|
||||||
{
|
{
|
||||||
GetCommandList()->SetGraphicsRootSignature(
|
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||||
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
|
if (!IsUsingComputeRootSignature())
|
||||||
|
{
|
||||||
|
cmdlist->SetGraphicsRootSignature(
|
||||||
|
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<GPUPipeline::Layout layout>
|
template<GPUPipeline::Layout layout>
|
||||||
|
@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
||||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||||
}
|
}
|
||||||
|
|
||||||
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
|
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||||
|
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
|
||||||
|
else
|
||||||
|
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
|
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
|
||||||
|
@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
|
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||||
|
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
|
||||||
|
else
|
||||||
|
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
|
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
|
||||||
|
@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
||||||
1 :
|
1 :
|
||||||
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
|
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
|
||||||
2);
|
2);
|
||||||
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
|
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||||
|
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
|
||||||
|
else
|
||||||
|
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty)
|
||||||
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
||||||
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
||||||
|
|
||||||
|
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
|
||||||
|
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
UnreachableCode();
|
UnreachableCode();
|
||||||
}
|
}
|
||||||
|
@ -2331,3 +2435,15 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
||||||
{
|
{
|
||||||
Panic("Barriers are not supported");
|
Panic("Barriers are not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D12Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z)
|
||||||
|
{
|
||||||
|
PreDispatchCheck();
|
||||||
|
s_stats.num_draws++;
|
||||||
|
|
||||||
|
const u32 groups_x = threads_x / group_size_x;
|
||||||
|
const u32 groups_y = threads_y / group_size_y;
|
||||||
|
const u32 groups_z = threads_z / group_size_z;
|
||||||
|
GetCommandList()->Dispatch(groups_x, groups_y, groups_z);
|
||||||
|
}
|
||||||
|
|
|
@ -96,6 +96,7 @@ public:
|
||||||
std::string_view source, const char* entry_point,
|
std::string_view source, const char* entry_point,
|
||||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||||
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||||
|
|
||||||
void PushDebugGroup(const char* name) override;
|
void PushDebugGroup(const char* name) override;
|
||||||
void PopDebugGroup() override;
|
void PopDebugGroup() override;
|
||||||
|
@ -119,6 +120,8 @@ public:
|
||||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||||
|
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z) override;
|
||||||
|
|
||||||
bool SetGPUTimingEnabled(bool enabled) override;
|
bool SetGPUTimingEnabled(bool enabled) override;
|
||||||
float GetAndResetAccumulatedGPUTime() override;
|
float GetAndResetAccumulatedGPUTime() override;
|
||||||
|
@ -275,8 +278,10 @@ private:
|
||||||
ID3D12RootSignature* GetCurrentRootSignature() const;
|
ID3D12RootSignature* GetCurrentRootSignature() const;
|
||||||
void SetInitialPipelineState();
|
void SetInitialPipelineState();
|
||||||
void PreDrawCheck();
|
void PreDrawCheck();
|
||||||
|
void PreDispatchCheck();
|
||||||
|
|
||||||
bool IsUsingROVRootSignature() const;
|
bool IsUsingROVRootSignature() const;
|
||||||
|
bool IsUsingComputeRootSignature() const;
|
||||||
void UpdateRootSignature();
|
void UpdateRootSignature();
|
||||||
template<GPUPipeline::Layout layout>
|
template<GPUPipeline::Layout layout>
|
||||||
bool UpdateParametersForLayout(u32 dirty);
|
bool UpdateParametersForLayout(u32 dirty);
|
||||||
|
|
|
@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
|
||||||
return SHA1Digest::DigestToString(digest);
|
return SHA1Digest::DigestToString(digest);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config)
|
||||||
|
{
|
||||||
|
SHA1Digest hash;
|
||||||
|
hash.Update(&config.layout, sizeof(config.layout));
|
||||||
|
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.compute_shader))
|
||||||
|
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
|
||||||
|
|
||||||
|
u8 digest[SHA1Digest::DIGEST_SIZE];
|
||||||
|
hash.Final(digest);
|
||||||
|
return SHA1Digest::DigestToString(digest);
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
|
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
|
||||||
{
|
{
|
||||||
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives =
|
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives =
|
||||||
|
@ -274,3 +286,46 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
|
||||||
pipeline, config.layout, primitives[static_cast<u8>(config.primitive)],
|
pipeline, config.layout, primitives[static_cast<u8>(config.primitive)],
|
||||||
config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant));
|
config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||||
|
{
|
||||||
|
D3D12::ComputePipelineBuilder cpb;
|
||||||
|
cpb.SetRootSignature(m_root_signatures[0][static_cast<u8>(config.layout)].Get());
|
||||||
|
cpb.SetShader(static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeData(),
|
||||||
|
static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeSize());
|
||||||
|
|
||||||
|
ComPtr<ID3D12PipelineState> pipeline;
|
||||||
|
if (m_pipeline_library)
|
||||||
|
{
|
||||||
|
const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config));
|
||||||
|
HRESULT hr =
|
||||||
|
m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
|
||||||
|
if (FAILED(hr))
|
||||||
|
{
|
||||||
|
// E_INVALIDARG = not found.
|
||||||
|
if (hr != E_INVALIDARG)
|
||||||
|
ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
|
||||||
|
|
||||||
|
// Need to create it normally.
|
||||||
|
pipeline = cpb.Create(m_device.Get(), error, false);
|
||||||
|
|
||||||
|
// Store if it wasn't an OOM or something else.
|
||||||
|
if (pipeline && hr == E_INVALIDARG)
|
||||||
|
{
|
||||||
|
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
|
||||||
|
if (FAILED(hr))
|
||||||
|
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pipeline = cpb.Create(m_device.Get(), error, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!pipeline)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
return std::unique_ptr<GPUPipeline>(
|
||||||
|
new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
|
||||||
|
}
|
||||||
|
|
|
@ -51,6 +51,7 @@ public:
|
||||||
void SetDebugName(std::string_view name) override;
|
void SetDebugName(std::string_view name) override;
|
||||||
|
|
||||||
static std::string GetPipelineName(const GraphicsConfig& config);
|
static std::string GetPipelineName(const GraphicsConfig& config);
|
||||||
|
static std::string GetPipelineName(const ComputeConfig& config);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
D3D12Pipeline(Microsoft::WRL::ComPtr<ID3D12PipelineState> pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology,
|
D3D12Pipeline(Microsoft::WRL::ComPtr<ID3D12PipelineState> pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology,
|
||||||
|
|
|
@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||||
|
|
||||||
// Need to know if there's UBOs for mapping.
|
// Need to know if there's UBOs for mapping.
|
||||||
const spvc_reflected_resource *ubos, *textures;
|
const spvc_reflected_resource *ubos, *textures;
|
||||||
size_t ubos_count, textures_count;
|
size_t ubos_count, textures_count, images_count;
|
||||||
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
|
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
|
||||||
&ubos_count)) != SPVC_SUCCESS ||
|
&ubos_count)) != SPVC_SUCCESS ||
|
||||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
|
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
|
||||||
&textures, &textures_count)) != SPVC_SUCCESS)
|
&textures, &textures_count)) != SPVC_SUCCESS ||
|
||||||
|
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
|
||||||
|
&textures, &images_count)) != SPVC_SUCCESS)
|
||||||
{
|
{
|
||||||
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
|
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
|
||||||
return {};
|
return {};
|
||||||
|
@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||||
[[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler);
|
[[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler);
|
||||||
[[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0;
|
[[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0;
|
||||||
[[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1;
|
[[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1;
|
||||||
|
[[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2;
|
||||||
|
|
||||||
switch (target_language)
|
switch (target_language)
|
||||||
{
|
{
|
||||||
|
@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (stage == GPUShaderStage::Compute)
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < images_count; i++)
|
||||||
|
{
|
||||||
|
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
|
||||||
|
.desc_set = IMAGE_DESCRIPTOR_SET,
|
||||||
|
.binding = i,
|
||||||
|
.cbv = {},
|
||||||
|
.uav = {.register_space = 0, .register_binding = i},
|
||||||
|
.srv = {},
|
||||||
|
.sampler = {}};
|
||||||
|
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||||
|
{
|
||||||
|
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
|
@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stage == GPUShaderStage::Fragment)
|
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
|
||||||
|
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
|
||||||
|
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
|
||||||
|
.msl_buffer = 0,
|
||||||
|
.msl_texture = 0,
|
||||||
|
.msl_sampler = 0};
|
||||||
|
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
|
||||||
|
{
|
||||||
|
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
|
||||||
|
static_cast<int>(sres));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
||||||
{
|
{
|
||||||
const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment,
|
const spvc_msl_resource_binding rb = {.stage = execmodel,
|
||||||
.desc_set = 1,
|
.desc_set = TEXTURE_DESCRIPTOR_SET,
|
||||||
.binding = i,
|
.binding = i,
|
||||||
.msl_buffer = i,
|
.msl_buffer = i,
|
||||||
.msl_texture = i,
|
.msl_texture = i,
|
||||||
|
@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!m_features.framebuffer_fetch)
|
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
|
||||||
|
{
|
||||||
|
const spvc_msl_resource_binding rb = {
|
||||||
|
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
|
||||||
|
|
||||||
|
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||||
|
{
|
||||||
|
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
|
||||||
|
static_cast<int>(sres));
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stage == GPUShaderStage::Compute)
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||||
{
|
{
|
||||||
const spvc_msl_resource_binding rb = {
|
const spvc_msl_resource_binding rb = {
|
||||||
.stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
|
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
|
||||||
|
|
||||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||||
{
|
{
|
||||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
|
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||||
static_cast<int>(sres));
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,6 +160,9 @@ public:
|
||||||
// Multiple textures, 128 byte UBO via push constants.
|
// Multiple textures, 128 byte UBO via push constants.
|
||||||
MultiTextureAndPushConstants,
|
MultiTextureAndPushConstants,
|
||||||
|
|
||||||
|
// 128 byte UBO via push constants, 1 texture, compute shader.
|
||||||
|
ComputeSingleTextureAndPushConstants,
|
||||||
|
|
||||||
MaxCount
|
MaxCount
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -416,6 +419,12 @@ public:
|
||||||
u32 GetRenderTargetCount() const;
|
u32 GetRenderTargetCount() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ComputeConfig
|
||||||
|
{
|
||||||
|
Layout layout;
|
||||||
|
GPUShader* compute_shader;
|
||||||
|
};
|
||||||
|
|
||||||
GPUPipeline();
|
GPUPipeline();
|
||||||
virtual ~GPUPipeline();
|
virtual ~GPUPipeline();
|
||||||
|
|
||||||
|
@ -501,9 +510,10 @@ public:
|
||||||
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
|
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
|
||||||
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
|
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
|
||||||
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
|
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
|
||||||
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
|
FEATURE_MASK_COMPUTE_SHADERS = (1 << 5),
|
||||||
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
|
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
|
||||||
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7),
|
FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
|
||||||
|
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class DrawBarrier : u32
|
enum class DrawBarrier : u32
|
||||||
|
@ -532,6 +542,7 @@ public:
|
||||||
bool texture_buffers_emulated_with_ssbo : 1;
|
bool texture_buffers_emulated_with_ssbo : 1;
|
||||||
bool feedback_loops : 1;
|
bool feedback_loops : 1;
|
||||||
bool geometry_shaders : 1;
|
bool geometry_shaders : 1;
|
||||||
|
bool compute_shaders : 1;
|
||||||
bool partial_msaa_resolve : 1;
|
bool partial_msaa_resolve : 1;
|
||||||
bool memory_import : 1;
|
bool memory_import : 1;
|
||||||
bool explicit_present : 1;
|
bool explicit_present : 1;
|
||||||
|
@ -625,11 +636,20 @@ public:
|
||||||
0, // SingleTextureBufferAndPushConstants
|
0, // SingleTextureBufferAndPushConstants
|
||||||
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
|
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
|
||||||
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
|
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
|
||||||
|
1, // ComputeSingleTextureAndPushConstants
|
||||||
};
|
};
|
||||||
|
|
||||||
return counts[static_cast<u8>(layout)];
|
return counts[static_cast<u8>(layout)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the number of thread groups to dispatch for a given total count and local size.
|
||||||
|
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
|
||||||
|
u32 local_size_y, u32 local_size_z)
|
||||||
|
{
|
||||||
|
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
|
||||||
|
(count_z + (local_size_z - 1)) / local_size_z);
|
||||||
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
|
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
|
||||||
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
|
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
|
||||||
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
|
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
|
||||||
|
@ -638,10 +658,6 @@ public:
|
||||||
|
|
||||||
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
|
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
|
||||||
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
|
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
|
||||||
// ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); }
|
|
||||||
// ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); }
|
|
||||||
// ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; }
|
|
||||||
// ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; }
|
|
||||||
|
|
||||||
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
|
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
|
||||||
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); }
|
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); }
|
||||||
|
@ -712,6 +728,8 @@ public:
|
||||||
Error* error = nullptr, const char* entry_point = "main");
|
Error* error = nullptr, const char* entry_point = "main");
|
||||||
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
|
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
|
||||||
Error* error = nullptr) = 0;
|
Error* error = nullptr) = 0;
|
||||||
|
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
|
||||||
|
Error* error = nullptr) = 0;
|
||||||
|
|
||||||
/// Debug messaging.
|
/// Debug messaging.
|
||||||
virtual void PushDebugGroup(const char* name) = 0;
|
virtual void PushDebugGroup(const char* name) = 0;
|
||||||
|
@ -753,6 +771,8 @@ public:
|
||||||
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
|
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
|
||||||
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
|
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
|
||||||
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
|
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
|
||||||
|
virtual void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z) = 0;
|
||||||
|
|
||||||
/// Returns false if the window was completely occluded.
|
/// Returns false if the window was completely occluded.
|
||||||
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;
|
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;
|
||||||
|
|
|
@ -78,7 +78,16 @@ class MetalPipeline final : public GPUPipeline
|
||||||
public:
|
public:
|
||||||
~MetalPipeline() override;
|
~MetalPipeline() override;
|
||||||
|
|
||||||
ALWAYS_INLINE id<MTLRenderPipelineState> GetPipelineState() const { return m_pipeline; }
|
ALWAYS_INLINE bool IsRenderPipeline() const { return (m_depth != nil); }
|
||||||
|
ALWAYS_INLINE bool IsComputePipeline() const { return (m_depth == nil); }
|
||||||
|
ALWAYS_INLINE id<MTLRenderPipelineState> GetRenderPipelineState() const
|
||||||
|
{
|
||||||
|
return (id<MTLRenderPipelineState>)m_pipeline;
|
||||||
|
}
|
||||||
|
ALWAYS_INLINE id<MTLComputePipelineState> GetComputePipelineState() const
|
||||||
|
{
|
||||||
|
return (id<MTLComputePipelineState>)m_pipeline;
|
||||||
|
}
|
||||||
ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
|
ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
|
||||||
ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
|
ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
|
||||||
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
|
ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
|
||||||
|
@ -86,10 +95,9 @@ public:
|
||||||
void SetDebugName(std::string_view name) override;
|
void SetDebugName(std::string_view name) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
|
MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, MTLPrimitiveType primitive);
|
||||||
MTLPrimitiveType primitive);
|
|
||||||
|
|
||||||
id<MTLRenderPipelineState> m_pipeline;
|
id m_pipeline;
|
||||||
id<MTLDepthStencilState> m_depth;
|
id<MTLDepthStencilState> m_depth;
|
||||||
MTLCullMode m_cull_mode;
|
MTLCullMode m_cull_mode;
|
||||||
MTLPrimitiveType m_primitive;
|
MTLPrimitiveType m_primitive;
|
||||||
|
@ -251,6 +259,7 @@ public:
|
||||||
std::string_view source, const char* entry_point,
|
std::string_view source, const char* entry_point,
|
||||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||||
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||||
|
|
||||||
void PushDebugGroup(const char* name) override;
|
void PushDebugGroup(const char* name) override;
|
||||||
void PopDebugGroup() override;
|
void PopDebugGroup() override;
|
||||||
|
@ -265,7 +274,7 @@ public:
|
||||||
void* MapUniformBuffer(u32 size) override;
|
void* MapUniformBuffer(u32 size) override;
|
||||||
void UnmapUniformBuffer(u32 size) override;
|
void UnmapUniformBuffer(u32 size) override;
|
||||||
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
|
void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
|
||||||
GPUPipeline::RenderPassFlag feedback_loop) override;
|
GPUPipeline::RenderPassFlag flags) override;
|
||||||
void SetPipeline(GPUPipeline* pipeline) override;
|
void SetPipeline(GPUPipeline* pipeline) override;
|
||||||
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
|
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
|
||||||
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
|
void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
|
||||||
|
@ -274,6 +283,8 @@ public:
|
||||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||||
|
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z) override;
|
||||||
|
|
||||||
bool SetGPUTimingEnabled(bool enabled) override;
|
bool SetGPUTimingEnabled(bool enabled) override;
|
||||||
float GetAndResetAccumulatedGPUTime() override;
|
float GetAndResetAccumulatedGPUTime() override;
|
||||||
|
@ -338,7 +349,6 @@ private:
|
||||||
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, std::string_view source,
|
std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, std::string_view source,
|
||||||
std::string_view entry_point, Error* error);
|
std::string_view entry_point, Error* error);
|
||||||
id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
|
id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
|
||||||
id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name);
|
|
||||||
ClearPipelineConfig GetCurrentClearPipelineConfig() const;
|
ClearPipelineConfig GetCurrentClearPipelineConfig() const;
|
||||||
id<MTLRenderPipelineState> GetClearDepthPipeline(const ClearPipelineConfig& config);
|
id<MTLRenderPipelineState> GetClearDepthPipeline(const ClearPipelineConfig& config);
|
||||||
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
|
id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
|
||||||
|
@ -349,9 +359,12 @@ private:
|
||||||
void CleanupObjects();
|
void CleanupObjects();
|
||||||
|
|
||||||
ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); }
|
ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); }
|
||||||
|
ALWAYS_INLINE bool InComputePass() const { return (m_compute_encoder != nil); }
|
||||||
ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); }
|
ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); }
|
||||||
void BeginRenderPass();
|
void BeginRenderPass();
|
||||||
void EndRenderPass();
|
void EndRenderPass();
|
||||||
|
void BeginComputePass();
|
||||||
|
void EndComputePass();
|
||||||
void EndInlineUploading();
|
void EndInlineUploading();
|
||||||
void EndAnyEncoding();
|
void EndAnyEncoding();
|
||||||
|
|
||||||
|
@ -359,6 +372,8 @@ private:
|
||||||
void SetInitialEncoderState();
|
void SetInitialEncoderState();
|
||||||
void SetViewportInRenderEncoder();
|
void SetViewportInRenderEncoder();
|
||||||
void SetScissorInRenderEncoder();
|
void SetScissorInRenderEncoder();
|
||||||
|
void CommitRenderTargetClears();
|
||||||
|
void BindRenderTargetsAsComputeImages();
|
||||||
|
|
||||||
void RenderBlankFrame(MetalSwapChain* swap_chain);
|
void RenderBlankFrame(MetalSwapChain* swap_chain);
|
||||||
|
|
||||||
|
@ -384,7 +399,7 @@ private:
|
||||||
|
|
||||||
id<MTLLibrary> m_shaders = nil;
|
id<MTLLibrary> m_shaders = nil;
|
||||||
id<MTLBinaryArchive> m_pipeline_archive = nil;
|
id<MTLBinaryArchive> m_pipeline_archive = nil;
|
||||||
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>>
|
std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, std::unique_ptr<GPUPipeline>>>
|
||||||
m_resolve_pipelines;
|
m_resolve_pipelines;
|
||||||
std::vector<std::pair<ClearPipelineConfig, id<MTLRenderPipelineState>>> m_clear_pipelines;
|
std::vector<std::pair<ClearPipelineConfig, id<MTLRenderPipelineState>>> m_clear_pipelines;
|
||||||
|
|
||||||
|
@ -394,9 +409,10 @@ private:
|
||||||
|
|
||||||
id<MTLCommandBuffer> m_render_cmdbuf = nil;
|
id<MTLCommandBuffer> m_render_cmdbuf = nil;
|
||||||
id<MTLRenderCommandEncoder> m_render_encoder = nil;
|
id<MTLRenderCommandEncoder> m_render_encoder = nil;
|
||||||
|
id<MTLComputeCommandEncoder> m_compute_encoder = nil;
|
||||||
|
|
||||||
u8 m_num_current_render_targets = 0;
|
u8 m_num_current_render_targets = 0;
|
||||||
GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
|
GPUPipeline::RenderPassFlag m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
|
||||||
std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
|
std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
|
||||||
MetalTexture* m_current_depth_target = nullptr;
|
MetalTexture* m_current_depth_target = nullptr;
|
||||||
|
|
||||||
|
|
|
@ -77,7 +77,8 @@ static void LogNSError(NSError* error, std::string_view message)
|
||||||
{
|
{
|
||||||
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, message);
|
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, message);
|
||||||
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Code: {}", static_cast<u32>(error.code));
|
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Code: {}", static_cast<u32>(error.code));
|
||||||
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}", [error.description UTF8String]);
|
Log::FastWrite(Log::Channel::GPUDevice, Log::Level::Error, " NSError Description: {}",
|
||||||
|
[error.description UTF8String]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static GPUTexture::Format GetTextureFormatForMTLFormat(MTLPixelFormat fmt)
|
static GPUTexture::Format GetTextureFormatForMTLFormat(MTLPixelFormat fmt)
|
||||||
|
@ -503,28 +504,6 @@ id<MTLFunction> MetalDevice::GetFunctionFromLibrary(id<MTLLibrary> library, NSSt
|
||||||
return function;
|
return function;
|
||||||
}
|
}
|
||||||
|
|
||||||
id<MTLComputePipelineState> MetalDevice::CreateComputePipeline(id<MTLFunction> function, NSString* name)
|
|
||||||
{
|
|
||||||
MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new];
|
|
||||||
if (name != nil)
|
|
||||||
[desc setLabel:name];
|
|
||||||
[desc setComputeFunction:function];
|
|
||||||
|
|
||||||
NSError* err = nil;
|
|
||||||
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
|
|
||||||
options:MTLPipelineOptionNone
|
|
||||||
reflection:nil
|
|
||||||
error:&err];
|
|
||||||
[desc release];
|
|
||||||
if (pipeline == nil)
|
|
||||||
{
|
|
||||||
LogNSError(err, "Create compute pipeline failed:");
|
|
||||||
return nil;
|
|
||||||
}
|
|
||||||
|
|
||||||
return pipeline;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MetalDevice::DestroyDevice()
|
void MetalDevice::DestroyDevice()
|
||||||
{
|
{
|
||||||
WaitForPreviousCommandBuffers();
|
WaitForPreviousCommandBuffers();
|
||||||
|
@ -564,11 +543,6 @@ void MetalDevice::DestroyDevice()
|
||||||
[it.second release];
|
[it.second release];
|
||||||
}
|
}
|
||||||
m_depth_states.clear();
|
m_depth_states.clear();
|
||||||
for (auto& it : m_resolve_pipelines)
|
|
||||||
{
|
|
||||||
if (it.second != nil)
|
|
||||||
[it.second release];
|
|
||||||
}
|
|
||||||
m_resolve_pipelines.clear();
|
m_resolve_pipelines.clear();
|
||||||
for (auto& it : m_clear_pipelines)
|
for (auto& it : m_clear_pipelines)
|
||||||
{
|
{
|
||||||
|
@ -755,7 +729,7 @@ std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage st
|
||||||
return CreateShaderFromMSL(stage, source, entry_point, error);
|
return CreateShaderFromMSL(stage, source, entry_point, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
MetalPipeline::MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
|
MetalPipeline::MetalPipeline(id pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
|
||||||
MTLPrimitiveType primitive)
|
MTLPrimitiveType primitive)
|
||||||
: m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive)
|
: m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive)
|
||||||
{
|
{
|
||||||
|
@ -982,6 +956,29 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||||
|
{
|
||||||
|
@autoreleasepool
|
||||||
|
{
|
||||||
|
MTLComputePipelineDescriptor* desc = [[MTLComputePipelineDescriptor new] autorelease];
|
||||||
|
[desc setComputeFunction:static_cast<MetalShader*>(config.compute_shader)->GetFunction()];
|
||||||
|
|
||||||
|
NSError* nserror = nil;
|
||||||
|
id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
|
||||||
|
options:MTLPipelineOptionNone
|
||||||
|
reflection:nil
|
||||||
|
error:&nserror];
|
||||||
|
if (pipeline == nil)
|
||||||
|
{
|
||||||
|
LogNSError(nserror, "Failed to create compute pipeline state");
|
||||||
|
CocoaTools::NSErrorToErrorObject(error, "newComputePipelineStateWithDescriptor failed: ", nserror);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, nil, MTLCullModeNone, MTLPrimitiveTypePoint));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
MetalTexture::MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
|
MetalTexture::MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
|
||||||
Format format)
|
Format format)
|
||||||
: GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture)
|
: GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture)
|
||||||
|
@ -1559,14 +1556,14 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
|
||||||
|
|
||||||
const GPUTexture::Format src_format = dst->GetFormat();
|
const GPUTexture::Format src_format = dst->GetFormat();
|
||||||
const GPUTexture::Format dst_format = dst->GetFormat();
|
const GPUTexture::Format dst_format = dst->GetFormat();
|
||||||
id<MTLComputePipelineState> resolve_pipeline = nil;
|
GPUPipeline* resolve_pipeline;
|
||||||
if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(),
|
if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(),
|
||||||
[src_format, dst_format](const auto& it) {
|
[src_format, dst_format](const auto& it) {
|
||||||
return it.first.first == src_format && it.first.second == dst_format;
|
return it.first.first == src_format && it.first.second == dst_format;
|
||||||
});
|
});
|
||||||
iter != m_resolve_pipelines.end())
|
iter != m_resolve_pipelines.end())
|
||||||
{
|
{
|
||||||
resolve_pipeline = iter->second;
|
resolve_pipeline = iter->second.get();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1579,32 +1576,41 @@ void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3
|
||||||
if (function == nil)
|
if (function == nil)
|
||||||
Panic("Failed to get resolve kernel");
|
Panic("Failed to get resolve kernel");
|
||||||
|
|
||||||
resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease];
|
MetalShader temp_shader(GPUShaderStage::Compute, m_shaders, function);
|
||||||
if (resolve_pipeline != nil)
|
GPUPipeline::ComputeConfig config;
|
||||||
[resolve_pipeline retain];
|
config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants;
|
||||||
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline);
|
config.compute_shader = &temp_shader;
|
||||||
|
|
||||||
|
std::unique_ptr<GPUPipeline> pipeline = CreatePipeline(config, nullptr);
|
||||||
|
if (!pipeline)
|
||||||
|
Panic("Failed to create resolve pipeline");
|
||||||
|
|
||||||
|
GL_OBJECT_NAME(pipeline, is_depth ? "Depth Resolve" : "Color Resolve");
|
||||||
|
resolve_pipeline =
|
||||||
|
m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), std::move(pipeline)).second.get();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (resolve_pipeline == nil)
|
|
||||||
Panic("Failed to get resolve pipeline");
|
|
||||||
|
|
||||||
if (InRenderPass())
|
if (InRenderPass())
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
|
||||||
s_stats.num_copies++;
|
s_stats.num_copies++;
|
||||||
|
|
||||||
const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth;
|
const id<MTLComputePipelineState> mtl_pipeline =
|
||||||
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1);
|
static_cast<MetalPipeline*>(resolve_pipeline)->GetComputePipelineState();
|
||||||
|
const u32 threadgroupHeight = mtl_pipeline.maxTotalThreadsPerThreadgroup / mtl_pipeline.threadExecutionWidth;
|
||||||
|
const MTLSize intrinsicThreadgroupSize = MTLSizeMake(mtl_pipeline.threadExecutionWidth, threadgroupHeight, 1);
|
||||||
const MTLSize threadgroupsInGrid =
|
const MTLSize threadgroupsInGrid =
|
||||||
MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width,
|
MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width,
|
||||||
(src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1);
|
(src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1);
|
||||||
|
|
||||||
id<MTLComputeCommandEncoder> computeEncoder = [m_render_cmdbuf computeCommandEncoder];
|
// Set up manually to not disturb state.
|
||||||
[computeEncoder setComputePipelineState:resolve_pipeline];
|
BeginComputePass();
|
||||||
[computeEncoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
|
[m_compute_encoder setComputePipelineState:mtl_pipeline];
|
||||||
[computeEncoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
|
[m_compute_encoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
|
||||||
[computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
|
[m_compute_encoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
|
||||||
[computeEncoder endEncoding];
|
[m_compute_encoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
|
||||||
|
EndComputePass();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)
|
void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)
|
||||||
|
@ -1645,7 +1651,7 @@ void MetalDevice::ClearDepth(GPUTexture* t, float d)
|
||||||
|
|
||||||
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
|
[m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
|
||||||
if (m_current_pipeline)
|
if (m_current_pipeline)
|
||||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
|
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||||
if (m_current_cull_mode != MTLCullModeNone)
|
if (m_current_cull_mode != MTLCullModeNone)
|
||||||
[m_render_encoder setCullMode:m_current_cull_mode];
|
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||||
if (depth != m_current_depth_state)
|
if (depth != m_current_depth_state)
|
||||||
|
@ -1674,6 +1680,8 @@ void MetalDevice::CommitClear(MetalTexture* tex)
|
||||||
// TODO: We could combine it with the current render pass.
|
// TODO: We could combine it with the current render pass.
|
||||||
if (InRenderPass())
|
if (InRenderPass())
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
else if (InComputePass())
|
||||||
|
EndComputePass();
|
||||||
|
|
||||||
@autoreleasepool
|
@autoreleasepool
|
||||||
{
|
{
|
||||||
|
@ -1896,11 +1904,13 @@ void MetalDevice::UnmapUniformBuffer(u32 size)
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
|
void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
|
||||||
GPUPipeline::RenderPassFlag feedback_loop)
|
GPUPipeline::RenderPassFlag flags)
|
||||||
{
|
{
|
||||||
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
|
bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
|
||||||
(!m_features.framebuffer_fetch && ((feedback_loop & GPUPipeline::ColorFeedbackLoop) !=
|
((flags & GPUPipeline::BindRenderTargetsAsImages) !=
|
||||||
(m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))));
|
(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) ||
|
||||||
|
(!m_features.framebuffer_fetch && ((flags & GPUPipeline::ColorFeedbackLoop) !=
|
||||||
|
(m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))));
|
||||||
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
|
bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
|
||||||
bool needs_rt_clear = false;
|
bool needs_rt_clear = false;
|
||||||
|
|
||||||
|
@ -1915,12 +1925,19 @@ void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
|
||||||
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
|
for (u32 i = num_rts; i < m_num_current_render_targets; i++)
|
||||||
m_current_render_targets[i] = nullptr;
|
m_current_render_targets[i] = nullptr;
|
||||||
m_num_current_render_targets = static_cast<u8>(num_rts);
|
m_num_current_render_targets = static_cast<u8>(num_rts);
|
||||||
m_current_feedback_loop = feedback_loop;
|
m_current_render_pass_flags = flags;
|
||||||
|
|
||||||
if (changed || needs_rt_clear || needs_ds_clear)
|
if (changed || needs_rt_clear || needs_ds_clear)
|
||||||
{
|
{
|
||||||
if (InRenderPass())
|
if (InRenderPass())
|
||||||
|
{
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
}
|
||||||
|
else if (InComputePass() && (flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
|
||||||
|
{
|
||||||
|
CommitRenderTargetClears();
|
||||||
|
BindRenderTargetsAsComputeImages();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1931,26 +1948,34 @@ void MetalDevice::SetPipeline(GPUPipeline* pipeline)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
m_current_pipeline = static_cast<MetalPipeline*>(pipeline);
|
m_current_pipeline = static_cast<MetalPipeline*>(pipeline);
|
||||||
if (InRenderPass())
|
if (!m_current_pipeline->IsComputePipeline())
|
||||||
{
|
{
|
||||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
|
if (InRenderPass())
|
||||||
|
{
|
||||||
|
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||||
|
|
||||||
if (m_current_depth_state != m_current_pipeline->GetDepthState())
|
if (m_current_depth_state != m_current_pipeline->GetDepthState())
|
||||||
{
|
{
|
||||||
m_current_depth_state = m_current_pipeline->GetDepthState();
|
m_current_depth_state = m_current_pipeline->GetDepthState();
|
||||||
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
||||||
|
}
|
||||||
|
if (m_current_cull_mode != m_current_pipeline->GetCullMode())
|
||||||
|
{
|
||||||
|
m_current_cull_mode = m_current_pipeline->GetCullMode();
|
||||||
|
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (m_current_cull_mode != m_current_pipeline->GetCullMode())
|
else
|
||||||
{
|
{
|
||||||
|
// Still need to set depth state before the draw begins.
|
||||||
|
m_current_depth_state = m_current_pipeline->GetDepthState();
|
||||||
m_current_cull_mode = m_current_pipeline->GetCullMode();
|
m_current_cull_mode = m_current_pipeline->GetCullMode();
|
||||||
[m_render_encoder setCullMode:m_current_cull_mode];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Still need to set depth state before the draw begins.
|
if (InComputePass())
|
||||||
m_current_depth_state = m_current_pipeline->GetDepthState();
|
[m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()];
|
||||||
m_current_cull_mode = m_current_pipeline->GetCullMode();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1979,6 +2004,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
||||||
m_current_textures[slot] = T;
|
m_current_textures[slot] = T;
|
||||||
if (InRenderPass())
|
if (InRenderPass())
|
||||||
[m_render_encoder setFragmentTexture:T atIndex:slot];
|
[m_render_encoder setFragmentTexture:T atIndex:slot];
|
||||||
|
else if (InComputePass())
|
||||||
|
[m_compute_encoder setTexture:T atIndex:slot];
|
||||||
}
|
}
|
||||||
|
|
||||||
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
|
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
|
||||||
|
@ -1987,6 +2014,8 @@ void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
||||||
m_current_samplers[slot] = S;
|
m_current_samplers[slot] = S;
|
||||||
if (InRenderPass())
|
if (InRenderPass())
|
||||||
[m_render_encoder setFragmentSamplerState:S atIndex:slot];
|
[m_render_encoder setFragmentSamplerState:S atIndex:slot];
|
||||||
|
else if (InComputePass())
|
||||||
|
[m_compute_encoder setTexture:T atIndex:slot];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2011,6 +2040,8 @@ void MetalDevice::UnbindTexture(MetalTexture* tex)
|
||||||
m_current_textures[i] = nil;
|
m_current_textures[i] = nil;
|
||||||
if (InRenderPass())
|
if (InRenderPass())
|
||||||
[m_render_encoder setFragmentTexture:nil atIndex:i];
|
[m_render_encoder setFragmentTexture:nil atIndex:i];
|
||||||
|
else if (InComputePass())
|
||||||
|
[m_compute_encoder setTexture:nil atIndex:0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2070,7 +2101,7 @@ void MetalDevice::SetScissor(const GSVector4i rc)
|
||||||
|
|
||||||
void MetalDevice::BeginRenderPass()
|
void MetalDevice::BeginRenderPass()
|
||||||
{
|
{
|
||||||
DebugAssert(m_render_encoder == nil);
|
DebugAssert(m_render_encoder == nil && !InComputePass());
|
||||||
|
|
||||||
// Inline writes :(
|
// Inline writes :(
|
||||||
if (m_inline_upload_encoder != nil)
|
if (m_inline_upload_encoder != nil)
|
||||||
|
@ -2180,12 +2211,57 @@ void MetalDevice::BeginRenderPass()
|
||||||
|
|
||||||
void MetalDevice::EndRenderPass()
|
void MetalDevice::EndRenderPass()
|
||||||
{
|
{
|
||||||
DebugAssert(InRenderPass() && !IsInlineUploading());
|
DebugAssert(InRenderPass() && !IsInlineUploading() && !InComputePass());
|
||||||
[m_render_encoder endEncoding];
|
[m_render_encoder endEncoding];
|
||||||
[m_render_encoder release];
|
[m_render_encoder release];
|
||||||
m_render_encoder = nil;
|
m_render_encoder = nil;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MetalDevice::BeginComputePass()
|
||||||
|
{
|
||||||
|
DebugAssert(!InRenderPass() && !IsInlineUploading() && !InComputePass());
|
||||||
|
|
||||||
|
if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
|
||||||
|
CommitRenderTargetClears();
|
||||||
|
|
||||||
|
m_compute_encoder = [[m_render_cmdbuf computeCommandEncoder] retain];
|
||||||
|
[m_compute_encoder setTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||||
|
[m_compute_encoder setSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||||
|
|
||||||
|
if ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != GPUPipeline::NoRenderPassFlags)
|
||||||
|
BindRenderTargetsAsComputeImages();
|
||||||
|
|
||||||
|
if (m_current_pipeline && m_current_pipeline->IsComputePipeline())
|
||||||
|
[m_compute_encoder setComputePipelineState:m_current_pipeline->GetComputePipelineState()];
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDevice::CommitRenderTargetClears()
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||||
|
{
|
||||||
|
MetalTexture* rt = m_current_render_targets[i];
|
||||||
|
if (rt->GetState() == GPUTexture::State::Invalidated)
|
||||||
|
rt->SetState(GPUTexture::State::Dirty);
|
||||||
|
else if (rt->GetState() == GPUTexture::State::Cleared)
|
||||||
|
CommitClear(rt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDevice::BindRenderTargetsAsComputeImages()
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||||
|
[m_compute_encoder setTexture:m_current_render_targets[i]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS + i];
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetalDevice::EndComputePass()
|
||||||
|
{
|
||||||
|
DebugAssert(InComputePass());
|
||||||
|
|
||||||
|
[m_compute_encoder endEncoding];
|
||||||
|
[m_compute_encoder release];
|
||||||
|
m_compute_encoder = nil;
|
||||||
|
}
|
||||||
|
|
||||||
void MetalDevice::EndInlineUploading()
|
void MetalDevice::EndInlineUploading()
|
||||||
{
|
{
|
||||||
DebugAssert(IsInlineUploading() && !InRenderPass());
|
DebugAssert(IsInlineUploading() && !InRenderPass());
|
||||||
|
@ -2198,6 +2274,8 @@ void MetalDevice::EndAnyEncoding()
|
||||||
{
|
{
|
||||||
if (InRenderPass())
|
if (InRenderPass())
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
else if (InComputePass())
|
||||||
|
EndComputePass();
|
||||||
else if (IsInlineUploading())
|
else if (IsInlineUploading())
|
||||||
EndInlineUploading();
|
EndInlineUploading();
|
||||||
}
|
}
|
||||||
|
@ -2213,14 +2291,14 @@ void MetalDevice::SetInitialEncoderState()
|
||||||
[m_render_encoder setCullMode:m_current_cull_mode];
|
[m_render_encoder setCullMode:m_current_cull_mode];
|
||||||
if (m_current_depth_state != nil)
|
if (m_current_depth_state != nil)
|
||||||
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
[m_render_encoder setDepthStencilState:m_current_depth_state];
|
||||||
if (m_current_pipeline != nil)
|
if (m_current_pipeline && m_current_pipeline->IsRenderPipeline())
|
||||||
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
|
[m_render_encoder setRenderPipelineState:m_current_pipeline->GetRenderPipelineState()];
|
||||||
[m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
[m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||||
[m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
[m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
|
||||||
if (m_current_ssbo)
|
if (m_current_ssbo)
|
||||||
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
|
[m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
|
||||||
|
|
||||||
if (!m_features.framebuffer_fetch && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))
|
if (!m_features.framebuffer_fetch && (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))
|
||||||
{
|
{
|
||||||
DebugAssert(m_current_render_targets[0]);
|
DebugAssert(m_current_render_targets[0]);
|
||||||
[m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS];
|
[m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS];
|
||||||
|
@ -2249,7 +2327,12 @@ void MetalDevice::SetScissorInRenderEncoder()
|
||||||
void MetalDevice::PreDrawCheck()
|
void MetalDevice::PreDrawCheck()
|
||||||
{
|
{
|
||||||
if (!InRenderPass())
|
if (!InRenderPass())
|
||||||
|
{
|
||||||
|
if (InComputePass())
|
||||||
|
EndComputePass();
|
||||||
|
|
||||||
BeginRenderPass();
|
BeginRenderPass();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::Draw(u32 vertex_count, u32 base_vertex)
|
void MetalDevice::Draw(u32 vertex_count, u32 base_vertex)
|
||||||
|
@ -2392,6 +2475,25 @@ void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MetalDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z)
|
||||||
|
{
|
||||||
|
if (!InComputePass())
|
||||||
|
{
|
||||||
|
if (InRenderPass())
|
||||||
|
EndRenderPass();
|
||||||
|
|
||||||
|
BeginComputePass();
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugAssert(m_current_pipeline && m_current_pipeline->IsComputePipeline());
|
||||||
|
id<MTLComputePipelineState> pipeline = m_current_pipeline->GetComputePipelineState();
|
||||||
|
|
||||||
|
// TODO: We could remap to the optimal group size..
|
||||||
|
[m_compute_encoder dispatchThreads:MTLSizeMake(threads_x, threads_y, threads_z)
|
||||||
|
threadsPerThreadgroup:MTLSizeMake(group_size_x, group_size_y, group_size_z)];
|
||||||
|
}
|
||||||
|
|
||||||
id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline)
|
id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline)
|
||||||
{
|
{
|
||||||
@autoreleasepool
|
@autoreleasepool
|
||||||
|
@ -2450,7 +2552,7 @@ GPUDevice::PresentResult MetalDevice::BeginPresent(GPUSwapChain* swap_chain, u32
|
||||||
s_stats.num_render_passes++;
|
s_stats.num_render_passes++;
|
||||||
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
|
std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
|
||||||
m_num_current_render_targets = 0;
|
m_num_current_render_targets = 0;
|
||||||
m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
|
m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
|
||||||
m_current_depth_target = nullptr;
|
m_current_depth_target = nullptr;
|
||||||
m_current_pipeline = nullptr;
|
m_current_pipeline = nullptr;
|
||||||
m_current_depth_state = nil;
|
m_current_depth_state = nil;
|
||||||
|
|
|
@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GPUPipeline> OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||||
|
{
|
||||||
|
ERROR_LOG("Compute shaders are not yet supported.");
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
void OpenGLDevice::PushDebugGroup(const char* name)
|
void OpenGLDevice::PushDebugGroup(const char* name)
|
||||||
{
|
{
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
|
@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
|
||||||
|
|
||||||
m_features.geometry_shaders =
|
m_features.geometry_shaders =
|
||||||
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
|
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
|
||||||
|
m_features.compute_shaders = false;
|
||||||
|
|
||||||
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
|
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
|
||||||
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
|
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
|
||||||
|
@ -1078,6 +1085,12 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
|
||||||
Panic("Barriers are not supported");
|
Panic("Barriers are not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OpenGLDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z)
|
||||||
|
{
|
||||||
|
Panic("Compute shaders are not supported");
|
||||||
|
}
|
||||||
|
|
||||||
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
|
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
|
||||||
u32* map_base_vertex)
|
u32* map_base_vertex)
|
||||||
{
|
{
|
||||||
|
|
|
@ -77,6 +77,7 @@ public:
|
||||||
std::string_view source, const char* entry_point,
|
std::string_view source, const char* entry_point,
|
||||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||||
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||||
|
|
||||||
void PushDebugGroup(const char* name) override;
|
void PushDebugGroup(const char* name) override;
|
||||||
void PopDebugGroup() override;
|
void PopDebugGroup() override;
|
||||||
|
@ -100,6 +101,8 @@ public:
|
||||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||||
|
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z) override;
|
||||||
|
|
||||||
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
|
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
|
||||||
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;
|
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;
|
||||||
|
|
|
@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear()
|
||||||
m_smap_constants = {};
|
m_smap_constants = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/,
|
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear,
|
||||||
bool clear /*= true*/)
|
Error* error)
|
||||||
{
|
{
|
||||||
VkPipeline pipeline;
|
VkPipeline pipeline;
|
||||||
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
|
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
|
||||||
if (res != VK_SUCCESS)
|
if (res != VK_SUCCESS)
|
||||||
{
|
{
|
||||||
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
|
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
|
||||||
|
SetErrorObject(error, "vkCreateComputePipelines() failed: ", res);
|
||||||
return VK_NULL_HANDLE;
|
return VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -197,7 +197,7 @@ public:
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
|
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error);
|
||||||
|
|
||||||
void SetShader(VkShaderModule module, const char* entry_point);
|
void SetShader(VkShaderModule module, const char* entry_point);
|
||||||
|
|
||||||
|
|
|
@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
|
||||||
WARNING_LOG("Emulating texture buffers with SSBOs.");
|
WARNING_LOG("Emulating texture buffers with SSBOs.");
|
||||||
|
|
||||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
|
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
|
||||||
|
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
|
||||||
|
|
||||||
m_features.partial_msaa_resolve = true;
|
m_features.partial_msaa_resolve = true;
|
||||||
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
|
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
|
||||||
|
@ -2802,7 +2803,8 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
|
||||||
|
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||||
if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
|
Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
|
||||||
|
@ -2822,7 +2824,8 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
if (m_optional_extensions.vk_khr_push_descriptor)
|
if (m_optional_extensions.vk_khr_push_descriptor)
|
||||||
dslb.SetPushFlag();
|
dslb.SetPushFlag();
|
||||||
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
||||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
|
||||||
|
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||||
if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
|
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
|
||||||
|
@ -2837,14 +2840,13 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
|
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_features.raster_order_views)
|
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
|
||||||
if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
|
||||||
return false;
|
|
||||||
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
|
|
||||||
}
|
}
|
||||||
|
if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
|
return false;
|
||||||
|
Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout");
|
||||||
|
|
||||||
for (u32 type = 0; type < 3; type++)
|
for (u32 type = 0; type < 3; type++)
|
||||||
{
|
{
|
||||||
|
@ -2860,7 +2862,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
if (feedback_loop)
|
if (feedback_loop)
|
||||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||||
else if (rov)
|
else if (rov)
|
||||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
plb.AddDescriptorSet(m_image_ds_layout);
|
||||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
|
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
|
||||||
|
@ -2873,7 +2875,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
if (feedback_loop)
|
if (feedback_loop)
|
||||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||||
else if (rov)
|
else if (rov)
|
||||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
plb.AddDescriptorSet(m_image_ds_layout);
|
||||||
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
@ -2887,7 +2889,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
if (feedback_loop)
|
if (feedback_loop)
|
||||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||||
else if (rov)
|
else if (rov)
|
||||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
plb.AddDescriptorSet(m_image_ds_layout);
|
||||||
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
@ -2901,7 +2903,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
if (feedback_loop)
|
if (feedback_loop)
|
||||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||||
else if (rov)
|
else if (rov)
|
||||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
plb.AddDescriptorSet(m_image_ds_layout);
|
||||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
|
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
|
||||||
|
@ -2915,13 +2917,24 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||||
if (feedback_loop)
|
if (feedback_loop)
|
||||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||||
else if (rov)
|
else if (rov)
|
||||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
plb.AddDescriptorSet(m_image_ds_layout);
|
||||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
|
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
VkPipelineLayout& pl =
|
||||||
|
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
|
||||||
|
plb.AddDescriptorSet(m_single_texture_ds_layout);
|
||||||
|
plb.AddDescriptorSet(m_image_ds_layout);
|
||||||
|
plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||||
|
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||||
|
return false;
|
||||||
|
Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout");
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2942,7 +2955,7 @@ void VulkanDevice::DestroyPipelineLayouts()
|
||||||
l = VK_NULL_HANDLE;
|
l = VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
destroy_dsl(m_rov_ds_layout);
|
destroy_dsl(m_image_ds_layout);
|
||||||
destroy_dsl(m_feedback_loop_ds_layout);
|
destroy_dsl(m_feedback_loop_ds_layout);
|
||||||
destroy_dsl(m_multi_texture_ds_layout);
|
destroy_dsl(m_multi_texture_ds_layout);
|
||||||
destroy_dsl(m_single_texture_buffer_ds_layout);
|
destroy_dsl(m_single_texture_buffer_ds_layout);
|
||||||
|
@ -3674,12 +3687,56 @@ void VulkanDevice::PreDrawCheck()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VulkanDevice::PreDispatchCheck()
|
||||||
|
{
|
||||||
|
// All textures should be in shader read only optimal already, but just in case..
|
||||||
|
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
|
||||||
|
for (u32 i = 0; i < num_textures; i++)
|
||||||
|
{
|
||||||
|
if (m_current_textures[i])
|
||||||
|
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Binding as image, but we still need to clear it.
|
||||||
|
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||||
|
{
|
||||||
|
VulkanTexture* rt = m_current_render_targets[i];
|
||||||
|
if (rt->GetState() == GPUTexture::State::Cleared)
|
||||||
|
rt->CommitClear(m_current_command_buffer);
|
||||||
|
rt->SetState(GPUTexture::State::Dirty);
|
||||||
|
rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
|
||||||
|
rt->SetUseFenceCounter(GetCurrentFenceCounter());
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is a new command buffer, bind the pipeline and such.
|
||||||
|
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
|
||||||
|
SetInitialPipelineState();
|
||||||
|
|
||||||
|
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
|
||||||
|
const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
|
||||||
|
const u32 dirty = m_dirty_flags & update_mask;
|
||||||
|
m_dirty_flags = m_dirty_flags & ~update_mask;
|
||||||
|
|
||||||
|
if (dirty != 0)
|
||||||
|
{
|
||||||
|
if (!UpdateDescriptorSets(dirty))
|
||||||
|
{
|
||||||
|
SubmitCommandBuffer(false, "out of descriptor sets");
|
||||||
|
PreDispatchCheck();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<GPUPipeline::Layout layout>
|
template<GPUPipeline::Layout layout>
|
||||||
bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||||
{
|
{
|
||||||
[[maybe_unused]] bool new_dynamic_offsets = false;
|
[[maybe_unused]] bool new_dynamic_offsets = false;
|
||||||
|
|
||||||
VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
|
constexpr VkPipelineBindPoint vk_bind_point =
|
||||||
|
((layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants) ? VK_PIPELINE_BIND_POINT_GRAPHICS :
|
||||||
|
VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||||
|
const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout();
|
||||||
std::array<VkDescriptorSet, 3> ds;
|
std::array<VkDescriptorSet, 3> ds;
|
||||||
u32 first_ds = 0;
|
u32 first_ds = 0;
|
||||||
u32 num_ds = 0;
|
u32 num_ds = 0;
|
||||||
|
@ -3700,7 +3757,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||||
}
|
}
|
||||||
|
|
||||||
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
|
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
|
||||||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
|
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
|
||||||
|
layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||||
{
|
{
|
||||||
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
|
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
|
||||||
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
|
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
|
||||||
|
@ -3727,7 +3785,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
|
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
|
||||||
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
|
dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set);
|
||||||
if (num_ds == 0)
|
if (num_ds == 0)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -3757,7 +3815,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||||
{
|
{
|
||||||
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||||
{
|
{
|
||||||
VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
|
VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout);
|
||||||
if (ids == VK_NULL_HANDLE)
|
if (ids == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -3792,8 +3850,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||||
}
|
}
|
||||||
|
|
||||||
DebugAssert(num_ds > 0);
|
DebugAssert(num_ds > 0);
|
||||||
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
|
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(),
|
||||||
num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
|
static_cast<u32>(new_dynamic_offsets),
|
||||||
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
|
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -3818,6 +3876,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
|
||||||
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
||||||
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
||||||
|
|
||||||
|
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
|
||||||
|
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
UnreachableCode();
|
UnreachableCode();
|
||||||
}
|
}
|
||||||
|
@ -3911,3 +3972,15 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
|
||||||
DefaultCaseIsUnreachable();
|
DefaultCaseIsUnreachable();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VulkanDevice::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z)
|
||||||
|
{
|
||||||
|
PreDispatchCheck();
|
||||||
|
s_stats.num_draws++;
|
||||||
|
|
||||||
|
const u32 groups_x = threads_x / group_size_x;
|
||||||
|
const u32 groups_y = threads_y / group_size_y;
|
||||||
|
const u32 groups_z = threads_z / group_size_z;
|
||||||
|
vkCmdDispatch(GetCurrentCommandBuffer(), groups_x, groups_y, groups_z);
|
||||||
|
}
|
||||||
|
|
|
@ -113,6 +113,7 @@ public:
|
||||||
std::string_view source, const char* entry_point,
|
std::string_view source, const char* entry_point,
|
||||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||||
|
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||||
|
|
||||||
void PushDebugGroup(const char* name) override;
|
void PushDebugGroup(const char* name) override;
|
||||||
void PopDebugGroup() override;
|
void PopDebugGroup() override;
|
||||||
|
@ -136,6 +137,8 @@ public:
|
||||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||||
|
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
|
||||||
|
u32 group_size_z) override;
|
||||||
|
|
||||||
bool SetGPUTimingEnabled(bool enabled) override;
|
bool SetGPUTimingEnabled(bool enabled) override;
|
||||||
float GetAndResetAccumulatedGPUTime() override;
|
float GetAndResetAccumulatedGPUTime() override;
|
||||||
|
@ -373,6 +376,7 @@ private:
|
||||||
VkPipelineLayout GetCurrentVkPipelineLayout() const;
|
VkPipelineLayout GetCurrentVkPipelineLayout() const;
|
||||||
void SetInitialPipelineState();
|
void SetInitialPipelineState();
|
||||||
void PreDrawCheck();
|
void PreDrawCheck();
|
||||||
|
void PreDispatchCheck();
|
||||||
|
|
||||||
template<GPUPipeline::Layout layout>
|
template<GPUPipeline::Layout layout>
|
||||||
bool UpdateDescriptorSetsForLayout(u32 dirty);
|
bool UpdateDescriptorSetsForLayout(u32 dirty);
|
||||||
|
@ -435,7 +439,7 @@ private:
|
||||||
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
|
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
|
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
|
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE;
|
VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE;
|
||||||
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
|
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
|
||||||
static_cast<size_t>(PipelineLayoutType::MaxCount)>
|
static_cast<size_t>(PipelineLayoutType::MaxCount)>
|
||||||
m_pipeline_layouts = {};
|
m_pipeline_layouts = {};
|
||||||
|
|
|
@ -275,3 +275,16 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
||||||
return std::unique_ptr<GPUPipeline>(
|
return std::unique_ptr<GPUPipeline>(
|
||||||
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
|
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||||
|
{
|
||||||
|
Vulkan::ComputePipelineBuilder cpb;
|
||||||
|
cpb.SetShader(static_cast<const VulkanShader*>(config.compute_shader)->GetModule(), "main");
|
||||||
|
cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast<size_t>(config.layout)]);
|
||||||
|
|
||||||
|
const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error);
|
||||||
|
if (!pipeline)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
return std::unique_ptr<GPUPipeline>(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags));
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue