[D3D12] Constant buffer binding

This commit is contained in:
Triang3l 2018-07-30 15:59:43 +03:00
parent 84e7ae16e7
commit e0eede73b9
9 changed files with 857 additions and 350 deletions

View File

@ -9,8 +9,11 @@
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_shader.h"
#include "xenia/gpu/xenos.h"
@ -29,6 +32,303 @@ void D3D12CommandProcessor::ClearCaches() {
cache_clear_requested_ = true;
}
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
assert_true(vertex_shader->is_translated());
assert_true(pixel_shader == nullptr || pixel_shader->is_translated());
uint32_t pixel_textures =
pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0;
uint32_t pixel_samplers =
pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0;
uint32_t vertex_textures = vertex_shader->GetTextureSRVCount();
uint32_t vertex_samplers = vertex_shader->GetSamplerCount();
// Max 96 textures (if all kinds of tfetch instructions are used for all fetch
// registers) and 32 samplers (one sampler per used fetch), but different
// shader stages have different texture sets.
uint32_t index = pixel_textures | (pixel_samplers << 7) |
(vertex_textures << 12) | (vertex_samplers << 19);
// Try an existing root signature.
auto it = root_signatures_.find(index);
if (it != root_signatures_.end()) {
return it->second;
}
// Create a new one.
D3D12_ROOT_SIGNATURE_DESC desc;
D3D12_ROOT_PARAMETER parameters[kRootParameter_Count_TwoStageTextures];
D3D12_DESCRIPTOR_RANGE ranges[kRootParameter_Count_TwoStageTextures];
desc.NumParameters = kRootParameter_Count_NoTextures;
desc.pParameters = parameters;
desc.NumStaticSamplers = 0;
desc.pStaticSamplers = nullptr;
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
// Fetch constants.
{
auto& parameter = parameters[kRootParameter_FetchConstants];
auto& range = ranges[kRootParameter_FetchConstants];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 1;
range.BaseShaderRegister = 10;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Vertex float constants.
{
auto& parameter = parameters[kRootParameter_VertexFloatConstants];
auto& range = ranges[kRootParameter_VertexFloatConstants];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 8;
range.BaseShaderRegister = 2;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Pixel float constants.
{
auto& parameter = parameters[kRootParameter_PixelFloatConstants];
auto& range = ranges[kRootParameter_PixelFloatConstants];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 8;
range.BaseShaderRegister = 2;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Common constants - system and loop/bool.
{
auto& parameter = parameters[kRootParameter_CommonConstants];
auto& range = ranges[kRootParameter_CommonConstants];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 2;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Shared memory.
{
auto& parameter = parameters[kRootParameter_SharedMemory];
auto& range = ranges[kRootParameter_SharedMemory];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.NumDescriptors = 1;
range.BaseShaderRegister = 0;
range.RegisterSpace = 1;
range.OffsetInDescriptorsFromTableStart = 0;
}
if (pixel_textures > 0 || vertex_textures > 0) {
desc.NumParameters = kRootParameter_Count_OneStageTextures;
// Pixel or vertex textures.
{
auto& parameter = parameters[kRootParameter_PixelOrVertexTextures];
auto& range = ranges[kRootParameter_PixelOrVertexTextures];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
if (pixel_textures > 0) {
assert_true(pixel_samplers > 0);
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.NumDescriptors = pixel_textures;
} else {
assert_true(vertex_samplers > 0);
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.NumDescriptors = vertex_textures;
}
}
// Pixel or vertex samplers.
{
auto& parameter = parameters[kRootParameter_PixelOrVertexSamplers];
auto& range = ranges[kRootParameter_PixelOrVertexSamplers];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
if (pixel_samplers > 0) {
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.NumDescriptors = pixel_samplers;
} else {
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.NumDescriptors = vertex_samplers;
}
}
if (pixel_textures > 0 && vertex_textures > 0) {
assert_true(vertex_samplers > 0);
desc.NumParameters = UINT(kRootParameter_Count_TwoStageTextures);
// Vertex textures.
{
auto& parameter = parameters[kRootParameter_VertexTextures];
auto& range = ranges[kRootParameter_VertexTextures];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.NumDescriptors = vertex_textures;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Vertex samplers.
{
auto& parameter = parameters[kRootParameter_VertexSamplers];
auto& range = ranges[kRootParameter_VertexSamplers];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
range.NumDescriptors = vertex_samplers;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
}
}
ID3DBlob* blob;
ID3DBlob* error_blob = nullptr;
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1,
&blob, &error_blob))) {
XELOGE(
"Failed to serialize a root signature with %u pixel textures, %u "
"pixel samplers, %u vertex textures and %u vertex samplers",
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
if (error_blob != nullptr) {
XELOGE("%s",
reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
error_blob->Release();
}
return nullptr;
}
if (error_blob != nullptr) {
error_blob->Release();
}
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
ID3D12RootSignature* root_signature;
if (FAILED(device->CreateRootSignature(0, blob->GetBufferPointer(),
blob->GetBufferSize(),
IID_PPV_ARGS(&root_signature)))) {
XELOGE(
"Failed to create a root signature with %u pixel textures, %u pixel "
"samplers, %u vertex textures and %u vertex samplers",
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
blob->Release();
return nullptr;
}
blob->Release();
root_signatures_.insert({index, root_signature});
return root_signature;
}
uint64_t D3D12CommandProcessor::RequestViewDescriptors(
uint64_t previous_full_update, uint32_t count_for_partial_update,
uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
uint32_t descriptor_index;
uint64_t current_full_update =
view_heap_pool_->Request(previous_full_update, count_for_partial_update,
count_for_full_update, descriptor_index);
if (current_full_update == 0) {
// There was an error.
return 0;
}
ID3D12DescriptorHeap* heap = view_heap_pool_->GetLastRequestHeap();
if (current_view_heap_ != heap) {
// Bind the new descriptor heaps if needed.
current_view_heap_ = heap;
ID3D12DescriptorHeap* heaps[2];
uint32_t heap_count = 0;
heaps[heap_count++] = heap;
if (current_sampler_heap_ != nullptr) {
heaps[heap_count++] = current_sampler_heap_;
}
command_lists_[current_queue_frame_]->GetCommandList()->SetDescriptorHeaps(
heap_count, heaps);
}
uint32_t descriptor_offset =
descriptor_index *
GetD3D12Context()->GetD3D12Provider()->GetDescriptorSizeView();
cpu_handle_out.ptr =
view_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset;
gpu_handle_out.ptr =
view_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset;
return current_full_update;
}
uint64_t D3D12CommandProcessor::RequestSamplerDescriptors(
uint64_t previous_full_update, uint32_t count_for_partial_update,
uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
uint32_t descriptor_index;
uint64_t current_full_update = sampler_heap_pool_->Request(
previous_full_update, count_for_partial_update, count_for_full_update,
descriptor_index);
if (current_full_update == 0) {
// There was an error.
return 0;
}
ID3D12DescriptorHeap* heap = sampler_heap_pool_->GetLastRequestHeap();
if (current_sampler_heap_ != heap) {
// Bind the new descriptor heaps if needed.
current_sampler_heap_ = heap;
ID3D12DescriptorHeap* heaps[2];
uint32_t heap_count = 0;
heaps[heap_count++] = heap;
if (current_view_heap_ != nullptr) {
heaps[heap_count++] = current_view_heap_;
}
command_lists_[current_queue_frame_]->GetCommandList()->SetDescriptorHeaps(
heap_count, heaps);
}
uint32_t descriptor_offset =
descriptor_index *
GetD3D12Context()->GetD3D12Provider()->GetDescriptorSizeSampler();
cpu_handle_out.ptr =
view_heap_pool_->GetLastRequestHeapCPUStart().ptr + descriptor_offset;
gpu_handle_out.ptr =
view_heap_pool_->GetLastRequestHeapGPUStart().ptr + descriptor_offset;
return current_full_update;
}
bool D3D12CommandProcessor::SetupContext() {
if (!CommandProcessor::SetupContext()) {
XELOGE("Failed to initialize base command processor context");
@ -51,13 +351,22 @@ bool D3D12CommandProcessor::SetupContext() {
}
}
constant_buffer_pool_ =
std::make_unique<ui::d3d12::UploadBufferPool>(context, 1024 * 1024);
view_heap_pool_ = std::make_unique<ui::d3d12::DescriptorHeapPool>(
context, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 32768);
// Can't create a shader-visible heap with more than 2048 samplers.
sampler_heap_pool_ = std::make_unique<ui::d3d12::DescriptorHeapPool>(
context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048);
shared_memory_ = std::make_unique<SharedMemory>(memory_, context);
if (!shared_memory_->Initialize()) {
XELOGE("Failed to initialize shared memory");
return false;
}
pipeline_cache_ = std::make_unique<PipelineCache>(register_file_, context);
pipeline_cache_ =
std::make_unique<PipelineCache>(this, register_file_, context);
return true;
}
@ -66,7 +375,18 @@ void D3D12CommandProcessor::ShutdownContext() {
auto context = GetD3D12Context();
context->AwaitAllFramesCompletion();
sampler_heap_pool_.reset();
view_heap_pool_.reset();
constant_buffer_pool_.reset();
pipeline_cache_.reset();
// Root signatured are used by pipelines, thus freed after the pipelines.
for (auto it : root_signatures_) {
it.second->Release();
}
root_signatures_.clear();
shared_memory_.reset();
for (uint32_t i = 0; i < ui::d3d12::D3D12Context::kQueuedFrames; ++i) {
@ -77,6 +397,22 @@ void D3D12CommandProcessor::ShutdownContext() {
CommandProcessor::ShutdownContext();
}
void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
CommandProcessor::WriteRegister(index, value);
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
uint32_t component_index = index - XE_GPU_REG_SHADER_CONSTANT_000_X;
cbuffer_bindings_float_[component_index >> 7].up_to_date = false;
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
cbuffer_bindings_bool_loop_.up_to_date = false;
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 &&
index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) {
cbuffer_bindings_fetch_.up_to_date = false;
}
}
void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
uint32_t frontbuffer_width,
uint32_t frontbuffer_height) {
@ -87,7 +423,20 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
if (cache_clear_requested_) {
cache_clear_requested_ = false;
GetD3D12Context()->AwaitAllFramesCompletion();
sampler_heap_pool_->ClearCache();
view_heap_pool_->ClearCache();
constant_buffer_pool_->ClearCache();
pipeline_cache_->ClearCache();
for (auto it : root_signatures_) {
it.second->Release();
}
root_signatures_.clear();
// TODO(Triang3l): Shared memory cache clear.
// shared_memory_->ClearCache();
}
}
@ -102,6 +451,7 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type,
bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
uint32_t index_count,
IndexBufferInfo* index_buffer_info) {
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES
@ -145,8 +495,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
return true;
}
bool full_update = BeginFrame();
bool new_frame = BeginFrame();
ID3D12GraphicsCommandList* command_list =
command_lists_[current_queue_frame_]->GetCommandList();
// Get the pipeline and translate the shaders so used textures are known.
ID3D12PipelineState* pipeline;
ID3D12RootSignature* root_signature;
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
@ -158,6 +511,18 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
return false;
}
// Bind the pipeline.
if (current_pipeline_ != pipeline) {
current_pipeline_ = pipeline;
command_list->SetPipelineState(pipeline);
}
// Update constant buffers, descriptors and root parameters.
if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
root_signature)) {
return false;
}
// Shared memory test.
if (index_buffer_info != nullptr && index_buffer_info->guest_base != 0) {
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
@ -181,6 +546,21 @@ bool D3D12CommandProcessor::BeginFrame() {
context->BeginSwap();
current_queue_frame_ = context->GetCurrentQueueFrame();
// Reset bindings, particularly because the buffers backing them are recycled.
current_pipeline_ = nullptr;
current_graphics_root_signature_ = nullptr;
current_graphics_root_up_to_date_ = 0;
current_view_heap_ = nullptr;
current_sampler_heap_ = nullptr;
cbuffer_bindings_system_.up_to_date = false;
for (uint32_t i = 0; i < xe::countof(cbuffer_bindings_float_); ++i) {
cbuffer_bindings_float_[i].up_to_date = false;
}
cbuffer_bindings_bool_loop_.up_to_date = false;
cbuffer_bindings_fetch_.up_to_date = false;
draw_view_full_update_ = 0;
draw_sampler_full_update_ = 0;
command_lists_setup_[current_queue_frame_]->BeginRecording();
command_lists_[current_queue_frame_]->BeginRecording();
@ -214,6 +594,237 @@ bool D3D12CommandProcessor::EndFrame() {
return true;
}
bool D3D12CommandProcessor::UpdateBindings(
ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) {
auto provider = GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto& regs = *register_file_;
// Bind the new root signature.
if (current_graphics_root_signature_ != root_signature) {
current_graphics_root_signature_ = root_signature;
// We don't know which root parameters are up to date anymore.
current_graphics_root_up_to_date_ = 0;
command_list->SetGraphicsRootSignature(root_signature);
}
// Begin updating descriptors.
bool write_common_constant_views = false;
bool write_vertex_float_constant_views = false;
bool write_pixel_float_constant_views = false;
bool write_fetch_constant_view = false;
// Update constant buffers.
// TODO(Triang3l): Update the system constant buffer - will crash without it.
ID3D12Resource* constant_buffer;
uint32_t constant_buffer_offset;
if (!cbuffer_bindings_system_.up_to_date) {
uint8_t* system_constants = constant_buffer_pool_->RequestFull(
xe::align(uint32_t(sizeof(cbuffer_system_)), 256u), constant_buffer,
constant_buffer_offset);
if (system_constants == nullptr) {
return false;
}
std::memcpy(system_constants, &cbuffer_system_, sizeof(cbuffer_system_));
cbuffer_bindings_system_.buffer_address =
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
cbuffer_bindings_system_.up_to_date = true;
write_common_constant_views = true;
}
if (!cbuffer_bindings_bool_loop_.up_to_date) {
uint8_t* bool_loop_constants = constant_buffer_pool_->RequestFull(
256, constant_buffer, constant_buffer_offset);
if (bool_loop_constants == nullptr) {
return false;
}
std::memcpy(bool_loop_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
40 * sizeof(uint32_t));
cbuffer_bindings_bool_loop_.buffer_address =
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
cbuffer_bindings_bool_loop_.up_to_date = true;
write_common_constant_views = true;
}
for (uint32_t i = 0; i < 16; ++i) {
ConstantBufferBinding& float_binding = cbuffer_bindings_float_[i];
if (float_binding.up_to_date) {
continue;
}
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
512, constant_buffer, constant_buffer_offset);
if (float_constants == nullptr) {
return false;
}
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 7)].f32,
32 * 4 * sizeof(uint32_t));
float_binding.buffer_address =
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
float_binding.up_to_date = true;
if (i < 8) {
write_vertex_float_constant_views = true;
} else {
write_pixel_float_constant_views = true;
}
}
if (!cbuffer_bindings_fetch_.up_to_date) {
uint8_t* fetch_constants = constant_buffer_pool_->RequestFull(
768, constant_buffer, constant_buffer_offset);
if (fetch_constants == nullptr) {
return false;
}
std::memcpy(fetch_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
32 * 6 * sizeof(uint32_t));
cbuffer_bindings_fetch_.buffer_address =
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
cbuffer_bindings_fetch_.up_to_date = true;
write_fetch_constant_view = true;
}
// Update the descriptors.
uint32_t view_count_partial_update = 0;
if (write_common_constant_views) {
// System and bool/loop constants.
view_count_partial_update += 2;
}
if (write_vertex_float_constant_views) {
// Vertex float constants.
view_count_partial_update += 8;
}
if (write_pixel_float_constant_views) {
// Pixel float constants.
view_count_partial_update += 8;
}
if (write_fetch_constant_view) {
// Fetch constants.
++view_count_partial_update;
}
// All the constants + shared memory.
uint32_t view_count_full_update = 20;
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle;
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle;
uint32_t view_handle_size = provider->GetDescriptorSizeView();
uint64_t view_full_update_index = RequestViewDescriptors(
draw_view_full_update_, view_count_partial_update, view_count_full_update,
view_cpu_handle, view_gpu_handle);
if (view_full_update_index == 0) {
return false;
}
if (draw_view_full_update_ != view_full_update_index) {
// Need to update all descriptors.
draw_view_full_update_ = view_full_update_index;
write_common_constant_views = true;
write_vertex_float_constant_views = true;
write_pixel_float_constant_views = true;
write_fetch_constant_view = true;
// If updating fully, write the shared memory descriptor (t0, space1).
shared_memory_->CreateSRV(view_cpu_handle);
gpu_handle_shared_memory_ = view_gpu_handle;
view_cpu_handle.ptr += view_handle_size;
view_gpu_handle.ptr += view_handle_size;
current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_SharedMemory);
}
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_desc;
if (write_common_constant_views) {
gpu_handle_common_constants_ = view_gpu_handle;
// System constants (b0).
constant_buffer_desc.BufferLocation =
cbuffer_bindings_system_.buffer_address;
constant_buffer_desc.SizeInBytes =
xe::align(uint32_t(sizeof(cbuffer_system_)), 256u);
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += view_handle_size;
view_gpu_handle.ptr += view_handle_size;
// Bool/loop constants (b1).
constant_buffer_desc.BufferLocation =
cbuffer_bindings_bool_loop_.buffer_address;
constant_buffer_desc.SizeInBytes = 256;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += view_handle_size;
view_gpu_handle.ptr += view_handle_size;
current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_CommonConstants);
}
if (write_vertex_float_constant_views) {
gpu_handle_vertex_float_constants_ = view_gpu_handle;
// Vertex float constants (b2-b9).
for (uint32_t i = 0; i < 8; ++i) {
constant_buffer_desc.BufferLocation =
cbuffer_bindings_float_[i].buffer_address;
constant_buffer_desc.SizeInBytes = 512;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += view_handle_size;
view_gpu_handle.ptr += view_handle_size;
}
current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_VertexFloatConstants);
}
if (write_pixel_float_constant_views) {
gpu_handle_pixel_float_constants_ = view_gpu_handle;
// Pixel float constants (b2-b9).
for (uint32_t i = 0; i < 8; ++i) {
constant_buffer_desc.BufferLocation =
cbuffer_bindings_float_[8 + i].buffer_address;
constant_buffer_desc.SizeInBytes = 512;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += view_handle_size;
view_gpu_handle.ptr += view_handle_size;
}
current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_PixelFloatConstants);
}
if (write_fetch_constant_view) {
gpu_handle_fetch_constants_ = view_gpu_handle;
// Fetch constants (b10).
constant_buffer_desc.BufferLocation =
cbuffer_bindings_fetch_.buffer_address;
constant_buffer_desc.SizeInBytes = 768;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += view_handle_size;
view_gpu_handle.ptr += view_handle_size;
current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants);
}
// Update the root parameters.
if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_FetchConstants))) {
command_list->SetGraphicsRootDescriptorTable(kRootParameter_FetchConstants,
gpu_handle_fetch_constants_);
current_graphics_root_up_to_date_ |= 1u << kRootParameter_FetchConstants;
}
if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_VertexFloatConstants))) {
command_list->SetGraphicsRootDescriptorTable(
kRootParameter_VertexFloatConstants,
gpu_handle_vertex_float_constants_);
current_graphics_root_up_to_date_ |= 1u
<< kRootParameter_VertexFloatConstants;
}
if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_PixelFloatConstants))) {
command_list->SetGraphicsRootDescriptorTable(
kRootParameter_PixelFloatConstants, gpu_handle_pixel_float_constants_);
current_graphics_root_up_to_date_ |= 1u
<< kRootParameter_PixelFloatConstants;
}
if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_CommonConstants))) {
command_list->SetGraphicsRootDescriptorTable(kRootParameter_CommonConstants,
gpu_handle_common_constants_);
current_graphics_root_up_to_date_ |= 1u << kRootParameter_CommonConstants;
}
if (!(current_graphics_root_up_to_date_ &
(1u << kRootParameter_SharedMemory))) {
command_list->SetGraphicsRootDescriptorTable(kRootParameter_SharedMemory,
gpu_handle_shared_memory_);
current_graphics_root_up_to_date_ |= 1u << kRootParameter_SharedMemory;
}
return true;
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
#include <memory>
#include <unordered_map>
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
@ -20,6 +21,7 @@
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/d3d12/command_list.h"
#include "xenia/ui/d3d12/d3d12_context.h"
#include "xenia/ui/d3d12/pools.h"
namespace xe {
namespace gpu {
@ -38,10 +40,29 @@ class D3D12CommandProcessor : public CommandProcessor {
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
}
// Finds or creates root signature for a pipeline.
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader);
// Request and automatically rebind descriptors on the draw command list.
// Refer to DescriptorHeapPool::Request for partial/full update explanation.
uint64_t RequestViewDescriptors(uint64_t previous_full_update,
uint32_t count_for_partial_update,
uint32_t count_for_full_update,
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
uint64_t RequestSamplerDescriptors(
uint64_t previous_full_update, uint32_t count_for_partial_update,
uint32_t count_for_full_update,
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
protected:
bool SetupContext() override;
void ShutdownContext() override;
void WriteRegister(uint32_t index, uint32_t value) override;
void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
uint32_t frontbuffer_height) override;
@ -54,11 +75,61 @@ class D3D12CommandProcessor : public CommandProcessor {
bool IssueCopy() override;
private:
enum RootParameter : UINT {
// These are always present.
// Very frequently changed, especially for UI draws, and for models drawn in
// multiple parts - contains fetch constants with vertex addresses (b10).
kRootParameter_FetchConstants,
// Quite frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices) - 8 pages of float constants
// (b2-b9).
kRootParameter_VertexFloatConstants,
// Less frequently changed (per-material) - 8 pages of float constants
// (b2-b9).
kRootParameter_PixelFloatConstants,
// Rarely changed - system constants like viewport and alpha testing (b0)
// and loop and bool constants (b1).
kRootParameter_CommonConstants,
// Never changed - shared memory byte address buffer (t0, space1).
kRootParameter_SharedMemory,
kRootParameter_Count_NoTextures,
// These are there only if textures are fetched (they are changed pretty
// frequently, but for the ease of maintenance they're in the end).
// If the pixel shader samples textures, these are for pixel textures
// (changed more frequently), otherwise, if the vertex shader samples
// textures, these are for vertex textures.
// Used textures of all types (t0+, space0).
kRootParameter_PixelOrVertexTextures = kRootParameter_Count_NoTextures,
// Used samplers (s0+).
kRootParameter_PixelOrVertexSamplers,
kRootParameter_Count_OneStageTextures,
// These are only present if both pixel and vertex shaders sample textures
// for vertex textures.
// Used textures of all types (t0+, space0).
kRootParameter_VertexTextures = kRootParameter_Count_OneStageTextures,
// Used samplers (s0+).
kRootParameter_VertexSamplers,
kRootParameter_Count_TwoStageTextures,
};
// Returns true if a new frame was started.
bool BeginFrame();
// Returns true if an open frame was ended.
bool EndFrame();
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader,
ID3D12RootSignature* root_signature);
bool cache_clear_requested_ = false;
std::unique_ptr<ui::d3d12::CommandList>
@ -67,9 +138,58 @@ class D3D12CommandProcessor : public CommandProcessor {
command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {};
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
// Root signatures for different descriptor counts.
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_;
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
uint32_t current_queue_frame_ = UINT32_MAX;
// Currently bound graphics or compute pipeline.
ID3D12PipelineState* current_pipeline_;
// Currently bound graphics root signature.
ID3D12RootSignature* current_graphics_root_signature_;
// Whether root parameters are up to date - reset if a new signature is bound.
uint32_t current_graphics_root_up_to_date_;
// Currently bound descriptor heaps - update by RequestViewDescriptors and
// RequestSamplerDescriptors.
ID3D12DescriptorHeap* current_view_heap_;
ID3D12DescriptorHeap* current_sampler_heap_;
// System shader constants.
struct SystemConstants {
float viewport_inv_scale_x;
float viewport_inv_scale_y;
uint32_t vertex_index_endian;
uint32_t textures_are_3d;
} cbuffer_system_;
// Constant buffer bindings.
struct ConstantBufferBinding {
D3D12_GPU_VIRTUAL_ADDRESS buffer_address;
bool up_to_date;
};
ConstantBufferBinding cbuffer_bindings_system_;
ConstantBufferBinding cbuffer_bindings_float_[16];
ConstantBufferBinding cbuffer_bindings_bool_loop_;
ConstantBufferBinding cbuffer_bindings_fetch_;
// Pages with the descriptors currently used for handling Xenos draw calls.
uint64_t draw_view_full_update_;
uint64_t draw_sampler_full_update_;
// Latest descriptor handles used for handling Xenos draw calls.
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_common_constants_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_vertex_float_constants_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_pixel_float_constants_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_;
};
} // namespace d3d12

View File

@ -11,10 +11,12 @@
#include <cinttypes>
#include <cmath>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/hlsl_shader_translator.h"
@ -22,10 +24,21 @@ namespace xe {
namespace gpu {
namespace d3d12 {
PipelineCache::PipelineCache(RegisterFile* register_file,
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file,
ui::d3d12::D3D12Context* context)
: register_file_(register_file), context_(context) {
: command_processor_(command_processor),
register_file_(register_file),
context_(context) {
shader_translator_.reset(new HlslShaderTranslator());
// Set pipeline state description values we never change.
// Zero out tessellation, stream output, blend state and formats for render
// targets 4+, node mask, cached PSO, flags and other things.
std::memset(&update_desc_, 0, sizeof(update_desc_));
update_desc_.BlendState.IndependentBlendEnable = TRUE;
update_desc_.SampleMask = UINT_MAX;
update_desc_.SampleDesc.Count = 1;
}
PipelineCache::~PipelineCache() { Shutdown(); }
@ -114,12 +127,6 @@ void PipelineCache::ClearCache() {
pipelines_.clear();
COUNT_profile_set("gpu/pipeline_cache/pipelines", 0);
// Destroy all root signatures.
for (auto it : root_signatures_) {
it.second->Release();
}
root_signatures_.clear();
// Destroy all shaders.
for (auto it : shader_map_) {
delete it.second;
@ -259,6 +266,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
return UpdateStatus::kError;
}
update_desc_.pRootSignature =
command_processor_->GetRootSignature(vertex_shader, pixel_shader);
if (update_desc_.pRootSignature == nullptr) {
return UpdateStatus::kError;
}
update_desc_.VS.pShaderBytecode = vertex_shader->GetDXBC();
update_desc_.VS.BytecodeLength = vertex_shader->GetDXBCSize();
if (pixel_shader != nullptr) {
@ -268,17 +280,9 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
update_desc_.PS.pShaderBytecode = nullptr;
update_desc_.PS.BytecodeLength = 0;
}
update_desc_.DS.pShaderBytecode = nullptr;
update_desc_.DS.BytecodeLength = 0;
update_desc_.HS.pShaderBytecode = nullptr;
update_desc_.HS.BytecodeLength = 0;
// TODO(Triang3l): Geometry shaders.
update_desc_.GS.pShaderBytecode = nullptr;
update_desc_.GS.BytecodeLength = 0;
update_desc_.pRootSignature = GetRootSignature(vertex_shader, pixel_shader);
if (update_desc_.pRootSignature == nullptr) {
return UpdateStatus::kError;
}
update_desc_.PrimitiveTopologyType =
primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE
: D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
@ -329,8 +333,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
return UpdateStatus::kCompatible;
}
update_desc_.BlendState.AlphaToCoverageEnable = FALSE;
update_desc_.BlendState.IndependentBlendEnable = TRUE;
static const D3D12_BLEND kBlendFactorMap[] = {
/* 0 */ D3D12_BLEND_ZERO,
/* 1 */ D3D12_BLEND_ONE,
@ -384,11 +386,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState(
blend_desc.DestBlendAlpha = D3D12_BLEND_ZERO;
blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD;
}
blend_desc.LogicOpEnable = FALSE;
blend_desc.LogicOp = D3D12_LOGIC_OP_NOOP;
blend_desc.RenderTargetWriteMask = (color_mask >> (i * 4)) & 0xF;
}
update_desc_.SampleMask = UINT_MAX;
return UpdateStatus::kMismatch;
}
@ -516,11 +515,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
poly_offset_scale * (1.0f / 16.0f);
update_desc_.RasterizerState.DepthClipEnable =
!depth_clamp_enable ? TRUE : FALSE;
update_desc_.RasterizerState.MultisampleEnable = FALSE;
update_desc_.RasterizerState.AntialiasedLineEnable = FALSE;
update_desc_.RasterizerState.ForcedSampleCount = 0;
update_desc_.RasterizerState.ConservativeRaster =
D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
return UpdateStatus::kMismatch;
}
@ -629,21 +623,7 @@ PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) {
return it->second;
}
// Set the unused fields of the pipeline description.
update_desc_.StreamOutput.pSODeclaration = nullptr;
update_desc_.StreamOutput.NumEntries = 0;
update_desc_.StreamOutput.pBufferStrides = nullptr;
update_desc_.StreamOutput.NumStrides = 0;
update_desc_.StreamOutput.RasterizedStream = 0;
update_desc_.InputLayout.pInputElementDescs = nullptr;
update_desc_.InputLayout.NumElements = 0;
update_desc_.SampleDesc.Count = 1;
update_desc_.SampleDesc.Quality = 0;
update_desc_.NodeMask = 0;
// TODO(Triang3l): Cache create pipelines.
update_desc_.CachedPSO.pCachedBlob = nullptr;
update_desc_.CachedPSO.CachedBlobSizeInBytes = 0;
update_desc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
// TODO(Triang3l): Cache create pipelines using CachedPSO.
auto device = context_->GetD3D12Provider()->GetDevice();
ID3D12PipelineState* state;
@ -662,217 +642,6 @@ PipelineCache::Pipeline* PipelineCache::GetPipeline(uint64_t hash_key) {
return pipeline;
}
ID3D12RootSignature* PipelineCache::GetRootSignature(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
uint32_t pixel_textures =
pixel_shader != nullptr ? pixel_shader->GetTextureSRVCount() : 0;
uint32_t pixel_samplers =
pixel_shader != nullptr ? pixel_shader->GetSamplerCount() : 0;
uint32_t vertex_textures = vertex_shader->GetTextureSRVCount();
uint32_t vertex_samplers = vertex_shader->GetSamplerCount();
// Max 96 textures (if all kinds of tfetch instructions are used for all fetch
// registers) and 32 samplers (one sampler per used fetch), but different
// shader stages have different texture sets.
uint32_t index = pixel_textures | (pixel_samplers << 7) |
(vertex_textures << 12) | (vertex_samplers << 19);
// Try an existing root signature.
auto it = root_signatures_.find(index);
if (it != root_signatures_.end()) {
return it->second;
}
// Create a new one.
D3D12_ROOT_SIGNATURE_DESC desc;
D3D12_ROOT_PARAMETER parameters[RootParameter::kCountWithTwoStageTextures];
D3D12_DESCRIPTOR_RANGE ranges[RootParameter::kCountWithTwoStageTextures];
desc.NumParameters = UINT(RootParameter::kCountNoTextures);
desc.pParameters = parameters;
desc.NumStaticSamplers = 0;
desc.pStaticSamplers = nullptr;
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
// Vertex constants - float and fetch.
{
auto& parameter = parameters[size_t(RootParameter::kVertexConstants)];
auto& range = ranges[size_t(RootParameter::kVertexConstants)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 9;
range.BaseShaderRegister = 2;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Pixel constants - float.
{
auto& parameter = parameters[size_t(RootParameter::kPixelConstants)];
auto& range = ranges[size_t(RootParameter::kPixelConstants)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 8;
range.BaseShaderRegister = 2;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Common constants - system and loop/bool.
{
auto& parameter = parameters[size_t(RootParameter::kCommonConstants)];
auto& range = ranges[size_t(RootParameter::kCommonConstants)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 2;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Virtual shared memory.
{
auto& parameter = parameters[size_t(RootParameter::kVirtualMemory)];
auto& range = ranges[size_t(RootParameter::kVirtualMemory)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.NumDescriptors = 1;
range.BaseShaderRegister = 0;
range.RegisterSpace = 1;
range.OffsetInDescriptorsFromTableStart = 0;
}
if (pixel_textures > 0 || vertex_textures > 0) {
desc.NumParameters = UINT(RootParameter::kCountWithOneStageTextures);
// Pixel or vertex textures.
{
auto& parameter =
parameters[size_t(RootParameter::kPixelOrVertexTextures)];
auto& range = ranges[size_t(RootParameter::kPixelOrVertexTextures)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
if (pixel_textures > 0) {
assert_true(pixel_samplers > 0);
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.NumDescriptors = pixel_textures;
} else {
assert_true(vertex_samplers > 0);
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.NumDescriptors = vertex_textures;
}
}
// Pixel or vertex samplers.
{
auto& parameter =
parameters[size_t(RootParameter::kPixelOrVertexSamplers)];
auto& range = ranges[size_t(RootParameter::kPixelOrVertexSamplers)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
if (pixel_samplers > 0) {
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.NumDescriptors = pixel_samplers;
} else {
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.NumDescriptors = vertex_samplers;
}
}
if (pixel_textures > 0 && vertex_textures > 0) {
assert_true(vertex_samplers > 0);
desc.NumParameters = UINT(RootParameter::kCountWithTwoStageTextures);
// Vertex textures.
{
auto& parameter = parameters[size_t(RootParameter::kVertexTextures)];
auto& range = ranges[size_t(RootParameter::kVertexTextures)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
range.NumDescriptors = vertex_textures;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
// Vertex samplers.
{
auto& parameter = parameters[size_t(RootParameter::kVertexSamplers)];
auto& range = ranges[size_t(RootParameter::kVertexSamplers)];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.pDescriptorRanges = &range;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
range.NumDescriptors = vertex_samplers;
range.BaseShaderRegister = 0;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0;
}
}
}
ID3DBlob* blob;
ID3DBlob* error_blob = nullptr;
if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1,
&blob, &error_blob))) {
XELOGE(
"Failed to serialize a root signature with %u pixel textures, %u "
"pixel samplers, %u vertex textures and %u vertex samplers",
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
if (error_blob != nullptr) {
XELOGE("%s",
reinterpret_cast<const char*>(error_blob->GetBufferPointer()));
error_blob->Release();
}
return nullptr;
}
if (error_blob != nullptr) {
error_blob->Release();
}
auto device = context_->GetD3D12Provider()->GetDevice();
ID3D12RootSignature* root_signature;
if (FAILED(device->CreateRootSignature(0, blob->GetBufferPointer(),
blob->GetBufferSize(),
IID_PPV_ARGS(&root_signature)))) {
XELOGE(
"Failed to create a root signature with %u pixel textures, %u pixel "
"samplers, %u vertex textures and %u vertex samplers",
pixel_textures, pixel_samplers, vertex_textures, vertex_samplers);
blob->Release();
return nullptr;
}
blob->Release();
root_signatures_.insert({index, root_signature});
return root_signature;
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -24,6 +24,8 @@ namespace xe {
namespace gpu {
namespace d3d12 {
class D3D12CommandProcessor;
class PipelineCache {
public:
enum class UpdateStatus {
@ -32,7 +34,8 @@ class PipelineCache {
kError,
};
PipelineCache(RegisterFile* register_file, ui::d3d12::D3D12Context* context);
PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file, ui::d3d12::D3D12Context* context);
~PipelineCache();
void Shutdown();
@ -49,50 +52,6 @@ class PipelineCache {
void ClearCache();
enum class RootParameter {
// These are always present.
// Most frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices, also vertex offsets for
// objects drawn in multiple parts).
// This constants 8 pages of float constants (b2-b9) and fetch constants
// (b10).
kVertexConstants,
// Less frequently changed (per-material) - 8 pages of float constants
// (b2-b9).
kPixelConstants,
// Rarely changed - system constants like viewport and alpha testing (b0)
// and loop and bool constants (b1).
kCommonConstants,
// Never changed - shared memory byte address buffer (t0, space1).
kVirtualMemory,
kCountNoTextures,
// These are there only if textures are fetched (they are changed pretty
// frequently, but for the ease of maintenance they're in the end).
// If the pixel shader samples textures, these are for pixel textures
// (changed more frequently), otherwise, if the vertex shader samples
// textures, these are for vertex textures.
// Used textures of all types (t0+, space0).
kPixelOrVertexTextures = kCountNoTextures,
// Used samplers (s0+).
kPixelOrVertexSamplers,
kCountWithOneStageTextures,
// These are only present if both pixel and vertex shaders sample textures
// for vertex textures.
// Used textures of all types (t0+, space0).
kVertexTextures = kCountWithOneStageTextures,
// Used samplers (s0+).
kVertexSamplers,
kCountWithTwoStageTextures,
};
private:
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
bool SetShadowRegister(float* dest, uint32_t register_name);
@ -104,11 +63,11 @@ class PipelineCache {
PrimitiveType primitive_type,
IndexFormat index_format);
// pRootSignature, VS, PS, DS, HS, GS, PrimitiveTopologyType.
// pRootSignature, VS, PS, GS, PrimitiveTopologyType.
UpdateStatus UpdateShaderStages(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader,
PrimitiveType primitive_type);
// BlendState, SampleMask.
// BlendState.
UpdateStatus UpdateBlendState(D3D12Shader* pixel_shader);
// RasterizerState.
UpdateStatus UpdateRasterizerState(PrimitiveType primitive_type);
@ -119,19 +78,15 @@ class PipelineCache {
// NumRenderTargets, RTVFormats, DSVFormat.
UpdateStatus UpdateRenderTargetFormats();
RegisterFile* register_file_ = nullptr;
ui::d3d12::D3D12Context* context_ = nullptr;
D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_;
ui::d3d12::D3D12Context* context_;
// Reusable shader translator.
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
// All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
// Root signatures for different descriptor counts.
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_;
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader);
// Hash state used to incrementally produce pipeline hashes during update.
// By the time the full update pass has run the hash will represent the
// current state in a way that can uniquely identify the produced
@ -139,12 +94,11 @@ class PipelineCache {
XXH64_state_t hash_state_;
struct Pipeline {
ID3D12PipelineState* state;
// From root_signatures_ - not owned.
// Root signature taken from the command processor.
ID3D12RootSignature* root_signature;
};
// All previously generated pipelines mapped by hash.
std::unordered_map<uint64_t, Pipeline*> pipelines_;
// Sets StreamOutput, InputLayout, SampleDesc, NodeMask, CachedPSO, Flags.
Pipeline* GetPipeline(uint64_t hash_key);
// Previously used pipeline. This matches our current state settings

View File

@ -62,6 +62,7 @@ bool SharedMemory::Initialize() {
Shutdown();
return false;
}
buffer_gpu_address_ = buffer_->GetGPUVirtualAddress();
std::memset(heaps_, 0, sizeof(heaps_));
heap_creation_failed_ = false;
@ -406,6 +407,32 @@ void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) {
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list);
}
void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
desc.Buffer.FirstElement = 0;
desc.Buffer.NumElements = kBufferSize >> 2;
desc.Buffer.StructureByteStride = 0;
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
context_->GetD3D12Provider()->GetDevice()->CreateShaderResourceView(
buffer_, &desc, handle);
}
void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
D3D12_UNORDERED_ACCESS_VIEW_DESC desc;
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
desc.Buffer.FirstElement = 0;
desc.Buffer.NumElements = kBufferSize >> 2;
desc.Buffer.StructureByteStride = 0;
desc.Buffer.CounterOffsetInBytes = 0;
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
context_->GetD3D12Provider()->GetDevice()->CreateUnorderedAccessView(
buffer_, nullptr, &desc, handle);
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -33,6 +33,10 @@ class SharedMemory {
bool Initialize();
void Shutdown();
D3D12_GPU_VIRTUAL_ADDRESS GetGPUAddress() const {
return buffer_gpu_address_;
}
void BeginFrame();
// Returns true if anything has been written to command_list been done.
// The draw command list is needed for the transition.
@ -51,6 +55,9 @@ class SharedMemory {
// Makes the buffer usable for texture tiling after a resolve.
void UseForWriting(ID3D12GraphicsCommandList* command_list);
void CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
private:
Memory* memory_;
@ -61,6 +68,7 @@ class SharedMemory {
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
static constexpr uint32_t kAddressMask = kBufferSize - 1;
ID3D12Resource* buffer_ = nullptr;
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0;
D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
// D3D resource tiles are 64 KB in size.

View File

@ -198,7 +198,7 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
" uint2 xe_vertex_fetch[96];\n"
"};\n"
"\n"
"ByteAddressBuffer xe_virtual_memory : register(t0, space1);\n"
"ByteAddressBuffer xe_shared_memory : register(t0, space1);\n"
"\n"
"#define XE_BYTE_SWAP_OVERLOAD(XeByteSwapType) \\\n"
"XeByteSwapType XeByteSwap(XeByteSwapType v, uint endian) { \\\n"
@ -806,7 +806,7 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction(
bool conditional_emitted = BeginPredicatedInstruction(
instr.is_predicated, instr.predicate_condition);
// Load the element from the virtual memory as uints and swap.
// Load the element from the shared memory as uints and swap.
EmitLoadOperand(0, instr.operands[0]);
const char* load_swizzle;
const char* load_function_suffix;
@ -832,9 +832,8 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction(
load_function_suffix = "";
break;
}
EmitSourceDepth(
"xe_vertex_element%s = XeByteSwap(xe_virtual_memory.Load%s(\n",
load_swizzle, load_function_suffix);
EmitSourceDepth("xe_vertex_element%s = XeByteSwap(xe_shared_memory.Load%s(\n",
load_swizzle, load_function_suffix);
EmitSourceDepth(" (xe_vertex_fetch[%uu].x & 0x1FFFFFFCu)",
instr.operands[1].storage_index);
if (instr.attributes.stride != 0) {

View File

@ -68,8 +68,8 @@ void UploadBufferPool::ClearCache() {
uint8_t* UploadBufferPool::RequestFull(uint32_t size,
ID3D12Resource*& buffer_out,
uint32_t& offset_out) {
assert_true(size != 0 && size <= page_size_);
if (size == 0 || size > page_size_) {
assert_true(size <= page_size_);
if (size > page_size_) {
return nullptr;
}
if (page_size_ - current_size_ < size || current_mapping_ == nullptr) {
@ -89,10 +89,6 @@ uint8_t* UploadBufferPool::RequestPartial(uint32_t size,
ID3D12Resource*& buffer_out,
uint32_t& offset_out,
uint32_t& size_out) {
assert_true(size != 0);
if (size == 0) {
return nullptr;
}
if (current_size_ == page_size_ || current_mapping_ == nullptr) {
// Start a new page if can't fit any bytes or don't have an open page.
if (!BeginNextPage()) {
@ -216,7 +212,7 @@ void DescriptorHeapPool::BeginFrame() {
void DescriptorHeapPool::EndFrame() { EndPage(); }
void DescriptorHeapPool::ClearCache() {
assert(current_size_ == 0);
assert_true(current_size_ == 0);
while (unsent_ != nullptr) {
auto next = unsent_->next;
unsent_->heap->Release();
@ -232,36 +228,39 @@ void DescriptorHeapPool::ClearCache() {
sent_last_ = nullptr;
}
uint64_t DescriptorHeapPool::GetPageForRequest(uint32_t count) const {
uint64_t page = current_page_;
if (page_size_ - current_size_ < count) {
++page;
}
return page;
}
bool DescriptorHeapPool::Request(uint32_t count, uint32_t& index_out) {
assert_true(count != 0 && count <= page_size_);
if (count == 0 || count > page_size_) {
return false;
uint64_t DescriptorHeapPool::Request(uint64_t previous_full_update,
uint32_t count_for_partial_update,
uint32_t count_for_full_update,
uint32_t& index_out) {
assert_true(count_for_partial_update <= count_for_full_update);
assert_true(count_for_full_update <= page_size_);
if (count_for_partial_update > count_for_full_update ||
count_for_full_update > page_size_) {
return 0;
}
if (page_creation_failed_) {
// Don't increment the page index every call if there was a failure as well.
return false;
// Don't touch the page index every call if there was a failure as well.
return 0;
}
// Go to the next page if there's not enough free space on the current one.
// If the last full update happened on the current page, a partial update is
// possible.
uint32_t count = previous_full_update == current_page_
? count_for_partial_update
: count_for_full_update;
// Go to the next page if there's not enough free space on the current one,
// or because the previous page may be outdated. In this case, a full update
// is necessary.
if (page_size_ - current_size_ < count) {
EndPage();
++current_page_;
count = count_for_full_update;
}
// Create the page if needed (may be the first call for the page).
if (unsent_ == nullptr) {
if (page_creation_failed_) {
return false;
}
auto device = context_->GetD3D12Provider()->GetDevice();
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
heap_desc.Type = type_;
@ -273,7 +272,7 @@ bool DescriptorHeapPool::Request(uint32_t count, uint32_t& index_out) {
XELOGE("Failed to create a heap for %u shader-visible descriptors",
page_size_);
page_creation_failed_ = true;
return false;
return 0;
}
unsent_ = new DescriptorHeap;
unsent_->heap = heap;
@ -289,7 +288,7 @@ bool DescriptorHeapPool::Request(uint32_t count, uint32_t& index_out) {
}
index_out = current_size_;
current_size_ += count;
return true;
return current_page_;
}
void DescriptorHeapPool::EndPage() {

View File

@ -71,13 +71,34 @@ class DescriptorHeapPool {
void EndFrame();
void ClearCache();
// To check if a rebind will be required, and thus may possibly need to write
// all the descriptors needed for a draw call rather than only the modified
// ones. The page number can never be 0 if a frame has started, and it's
// changed every frame, so it's safe to use 0 to indicate that the descriptors
// for some data have never been written.
uint64_t GetPageForRequest(uint32_t count) const;
bool Request(uint32_t count, uint32_t& index_out);
// Because all descriptors for a single draw call must be in the same heap,
// sometimes all descriptors, rather than only the modified portion of it,
// needs to be written.
//
// This may happen if there's not enough free space even for a partial update
// in the current heap, or if the heap which contains the unchanged part of
// the descriptors is outdated.
//
// If something uses this pool to do partial updates, it must let this
// function determine whether a partial update is possible. For this purpose,
// this function returns a full update number - and it must be called with its
// previous return value for the set of descriptors it's updating.
//
// If this function returns a value that is the same as previous_full_update,
// a partial update needs to be done - and space for count_for_partial_update
// is allocated.
//
// If it's different, all descriptors must be written again - and space for
// count_for_full_update is allocated.
//
// If 0 is returned, there was an error.
//
// This MUST be called even if there's nothing to write in a partial update
// (with count_for_partial_update being 0), because a full update may still be
// required.
uint64_t Request(uint64_t previous_full_update,
uint32_t count_for_partial_update,
uint32_t count_for_full_update, uint32_t& index_out);
// The current heap, for binding and actually writing - may be called only
// after a successful request because before a request, the heap may not exist
@ -89,7 +110,6 @@ class DescriptorHeapPool {
D3D12_GPU_DESCRIPTOR_HANDLE GetLastRequestHeapGPUStart() const {
return current_heap_gpu_start_;
}
uint64_t GetLastRequestPageNumber() const { return current_page_; }
private:
D3D12Context* context_;