4224 lines
178 KiB
C++
4224 lines
178 KiB
C++
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* Copyright 2020 Ben Vanik. All rights reserved. *
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
******************************************************************************
|
|
*/
|
|
|
|
#include "third_party/xxhash/xxhash.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <utility>
|
|
|
|
#include "xenia/base/assert.h"
|
|
#include "xenia/base/cvar.h"
|
|
#include "xenia/base/logging.h"
|
|
#include "xenia/base/math.h"
|
|
#include "xenia/base/profiling.h"
|
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
|
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
|
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
|
#include "xenia/gpu/gpu_flags.h"
|
|
#include "xenia/gpu/xenos.h"
|
|
#include "xenia/ui/d3d12/d3d12_util.h"
|
|
|
|
DEFINE_bool(d3d12_bindless, true,
|
|
"Use bindless resources where available - may improve performance, "
|
|
"but may make debugging more complicated.",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_edram_rov, true,
|
|
"Use rasterizer-ordered views for render target emulation where "
|
|
"available.",
|
|
"D3D12");
|
|
// Some games (such as Banjo-Kazooie) are not aware of the half-pixel offset and
|
|
// may be blurry or have texture sampling artifacts, in this case the user may
|
|
// disable half-pixel offset by setting this to false.
|
|
DEFINE_bool(d3d12_half_pixel_offset, true,
|
|
"Enable half-pixel vertex offset (D3D9 PA_SU_VTX_CNTL PIX_CENTER).",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_readback_memexport, false,
|
|
"Read data written by memory export in shaders on the CPU. This "
|
|
"may be needed in some games (but many only access exported data "
|
|
"on the GPU, and this flag isn't needed to handle such behavior), "
|
|
"but causes mid-frame synchronization, so it has a huge "
|
|
"performance impact.",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_readback_resolve, false,
|
|
"Read render-to-texture results on the CPU. This may be needed in "
|
|
"some games, for instance, for screenshots in saved games, but "
|
|
"causes mid-frame synchronization, so it has a huge performance "
|
|
"impact.",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_ssaa_custom_sample_positions, false,
|
|
"Enable custom SSAA sample positions for the RTV/DSV rendering "
|
|
"path where available instead of centers (experimental, not very "
|
|
"high-quality).",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_submit_on_primary_buffer_end, true,
|
|
"Submit the command list when a PM4 primary buffer ends if it's "
|
|
"possible to submit immediately to try to reduce frame latency.",
|
|
"D3D12");
|
|
|
|
namespace xe {
|
|
namespace gpu {
|
|
namespace d3d12 {
|
|
|
|
constexpr uint32_t D3D12CommandProcessor::kQueueFrames;
|
|
constexpr uint32_t
|
|
D3D12CommandProcessor::RootBindfulExtraParameterIndices::kUnavailable;
|
|
constexpr uint32_t D3D12CommandProcessor::kViewBindfulHeapSize;
|
|
constexpr uint32_t D3D12CommandProcessor::kViewBindlessHeapSize;
|
|
constexpr uint32_t D3D12CommandProcessor::kSamplerHeapSize;
|
|
constexpr uint32_t D3D12CommandProcessor::kSwapTextureWidth;
|
|
constexpr uint32_t D3D12CommandProcessor::kSwapTextureHeight;
|
|
constexpr uint32_t D3D12CommandProcessor::kScratchBufferSizeIncrement;
|
|
|
|
D3D12CommandProcessor::D3D12CommandProcessor(
|
|
D3D12GraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
|
: CommandProcessor(graphics_system, kernel_state) {}
|
|
D3D12CommandProcessor::~D3D12CommandProcessor() = default;
|
|
|
|
void D3D12CommandProcessor::ClearCaches() {
|
|
CommandProcessor::ClearCaches();
|
|
cache_clear_requested_ = true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::InitializeShaderStorage(
|
|
const std::filesystem::path& storage_root, uint32_t title_id,
|
|
bool blocking) {
|
|
CommandProcessor::InitializeShaderStorage(storage_root, title_id, blocking);
|
|
pipeline_cache_->InitializeShaderStorage(storage_root, title_id, blocking);
|
|
}
|
|
|
|
void D3D12CommandProcessor::RequestFrameTrace(
|
|
const std::filesystem::path& root_path) {
|
|
// Capture with PIX if attached.
|
|
if (GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis() != nullptr) {
|
|
pix_capture_requested_.store(true, std::memory_order_relaxed);
|
|
return;
|
|
}
|
|
CommandProcessor::RequestFrameTrace(root_path);
|
|
}
|
|
|
|
void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
|
uint32_t length) {
|
|
shared_memory_->MemoryInvalidationCallback(base_ptr, length, true);
|
|
primitive_converter_->MemoryInvalidationCallback(base_ptr, length, true);
|
|
}
|
|
|
|
void D3D12CommandProcessor::RestoreEDRAMSnapshot(const void* snapshot) {
|
|
// Starting a new frame because descriptors may be needed.
|
|
BeginSubmission(true);
|
|
render_target_cache_->RestoreEDRAMSnapshot(snapshot);
|
|
}
|
|
|
|
uint32_t D3D12CommandProcessor::GetCurrentColorMask(
|
|
const D3D12Shader* pixel_shader) const {
|
|
if (pixel_shader == nullptr) {
|
|
return 0;
|
|
}
|
|
auto& regs = *register_file_;
|
|
uint32_t color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32 & 0xFFFF;
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (!pixel_shader->writes_color_target(i)) {
|
|
color_mask &= ~(0xF << (i * 4));
|
|
}
|
|
}
|
|
return color_mask;
|
|
}
|
|
|
|
void D3D12CommandProcessor::PushTransitionBarrier(
|
|
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
|
D3D12_RESOURCE_STATES new_state, UINT subresource) {
|
|
if (old_state == new_state) {
|
|
return;
|
|
}
|
|
D3D12_RESOURCE_BARRIER barrier;
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.Transition.pResource = resource;
|
|
barrier.Transition.Subresource = subresource;
|
|
barrier.Transition.StateBefore = old_state;
|
|
barrier.Transition.StateAfter = new_state;
|
|
barriers_.push_back(barrier);
|
|
}
|
|
|
|
void D3D12CommandProcessor::PushAliasingBarrier(ID3D12Resource* old_resource,
|
|
ID3D12Resource* new_resource) {
|
|
D3D12_RESOURCE_BARRIER barrier;
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.Aliasing.pResourceBefore = old_resource;
|
|
barrier.Aliasing.pResourceAfter = new_resource;
|
|
barriers_.push_back(barrier);
|
|
}
|
|
|
|
void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) {
|
|
D3D12_RESOURCE_BARRIER barrier;
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.UAV.pResource = resource;
|
|
barriers_.push_back(barrier);
|
|
}
|
|
|
|
void D3D12CommandProcessor::SubmitBarriers() {
|
|
UINT barrier_count = UINT(barriers_.size());
|
|
if (barrier_count != 0) {
|
|
deferred_command_list_->D3DResourceBarrier(barrier_count, barriers_.data());
|
|
barriers_.clear();
|
|
}
|
|
}
|
|
|
|
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
|
|
assert_true(vertex_shader->is_translated());
|
|
|
|
if (bindless_resources_used_) {
|
|
return vertex_shader->host_vertex_shader_type() !=
|
|
Shader::HostVertexShaderType::kVertex
|
|
? root_signature_bindless_ds_
|
|
: root_signature_bindless_vs_;
|
|
}
|
|
|
|
assert_true(pixel_shader == nullptr || pixel_shader->is_translated());
|
|
|
|
D3D12_SHADER_VISIBILITY vertex_visibility;
|
|
if (vertex_shader->host_vertex_shader_type() !=
|
|
Shader::HostVertexShaderType::kVertex) {
|
|
vertex_visibility = D3D12_SHADER_VISIBILITY_DOMAIN;
|
|
} else {
|
|
vertex_visibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
|
}
|
|
|
|
uint32_t texture_count_vertex, sampler_count_vertex;
|
|
vertex_shader->GetTextureBindings(texture_count_vertex);
|
|
vertex_shader->GetSamplerBindings(sampler_count_vertex);
|
|
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
|
|
if (pixel_shader != nullptr) {
|
|
pixel_shader->GetTextureBindings(texture_count_pixel);
|
|
pixel_shader->GetSamplerBindings(sampler_count_pixel);
|
|
}
|
|
|
|
// Better put the pixel texture/sampler in the lower bits probably because it
|
|
// changes often.
|
|
uint32_t index = 0;
|
|
uint32_t index_offset = 0;
|
|
index |= texture_count_pixel << index_offset;
|
|
index_offset += D3D12Shader::kMaxTextureBindingIndexBits;
|
|
index |= sampler_count_pixel << index_offset;
|
|
index_offset += D3D12Shader::kMaxSamplerBindingIndexBits;
|
|
index |= texture_count_vertex << index_offset;
|
|
index_offset += D3D12Shader::kMaxTextureBindingIndexBits;
|
|
index |= sampler_count_vertex << index_offset;
|
|
index_offset += D3D12Shader::kMaxSamplerBindingIndexBits;
|
|
index |= uint32_t(vertex_visibility == D3D12_SHADER_VISIBILITY_DOMAIN)
|
|
<< index_offset;
|
|
++index_offset;
|
|
assert_true(index_offset <= 32);
|
|
|
|
// Try an existing root signature.
|
|
auto it = root_signatures_bindful_.find(index);
|
|
if (it != root_signatures_bindful_.end()) {
|
|
return it->second;
|
|
}
|
|
|
|
// Create a new one.
|
|
D3D12_ROOT_SIGNATURE_DESC desc;
|
|
D3D12_ROOT_PARAMETER parameters[kRootParameter_Bindful_Count_Max];
|
|
desc.NumParameters = kRootParameter_Bindful_Count_Base;
|
|
desc.pParameters = parameters;
|
|
desc.NumStaticSamplers = 0;
|
|
desc.pStaticSamplers = nullptr;
|
|
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
|
|
// Base parameters.
|
|
|
|
// Fetch constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_FetchConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
// Vertex float constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsVertex];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = vertex_visibility;
|
|
}
|
|
|
|
// Pixel float constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsPixel];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
}
|
|
|
|
// System constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_SystemConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
// Bool and loop constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_BoolLoopConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
// Shared memory and, if ROVs are used, EDRAM.
|
|
D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[3];
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_SharedMemoryAndEDRAM];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 2;
|
|
parameter.DescriptorTable.pDescriptorRanges =
|
|
shared_memory_and_edram_ranges;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
shared_memory_and_edram_ranges[0].RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
shared_memory_and_edram_ranges[0].NumDescriptors = 1;
|
|
shared_memory_and_edram_ranges[0].BaseShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::SRVMainRegister::kSharedMemory);
|
|
shared_memory_and_edram_ranges[0].RegisterSpace =
|
|
uint32_t(DxbcShaderTranslator::SRVSpace::kMain);
|
|
shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
|
shared_memory_and_edram_ranges[1].RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
shared_memory_and_edram_ranges[1].NumDescriptors = 1;
|
|
shared_memory_and_edram_ranges[1].BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
|
|
shared_memory_and_edram_ranges[1].RegisterSpace = 0;
|
|
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
|
if (edram_rov_used_) {
|
|
++parameter.DescriptorTable.NumDescriptorRanges;
|
|
shared_memory_and_edram_ranges[2].RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
shared_memory_and_edram_ranges[2].NumDescriptors = 1;
|
|
shared_memory_and_edram_ranges[2].BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kEDRAM);
|
|
shared_memory_and_edram_ranges[2].RegisterSpace = 0;
|
|
shared_memory_and_edram_ranges[2].OffsetInDescriptorsFromTableStart = 2;
|
|
}
|
|
}
|
|
|
|
// Extra parameters.
|
|
|
|
// Pixel textures.
|
|
D3D12_DESCRIPTOR_RANGE range_textures_pixel;
|
|
if (texture_count_pixel > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_textures_pixel;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
range_textures_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range_textures_pixel.NumDescriptors = texture_count_pixel;
|
|
range_textures_pixel.BaseShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart);
|
|
range_textures_pixel.RegisterSpace =
|
|
uint32_t(DxbcShaderTranslator::SRVSpace::kMain);
|
|
range_textures_pixel.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
// Pixel samplers.
|
|
D3D12_DESCRIPTOR_RANGE range_samplers_pixel;
|
|
if (sampler_count_pixel > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_samplers_pixel;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
range_samplers_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
range_samplers_pixel.NumDescriptors = sampler_count_pixel;
|
|
range_samplers_pixel.BaseShaderRegister = 0;
|
|
range_samplers_pixel.RegisterSpace = 0;
|
|
range_samplers_pixel.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
// Vertex textures.
|
|
D3D12_DESCRIPTOR_RANGE range_textures_vertex;
|
|
if (texture_count_vertex > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_textures_vertex;
|
|
parameter.ShaderVisibility = vertex_visibility;
|
|
range_textures_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range_textures_vertex.NumDescriptors = texture_count_vertex;
|
|
range_textures_vertex.BaseShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart);
|
|
range_textures_vertex.RegisterSpace =
|
|
uint32_t(DxbcShaderTranslator::SRVSpace::kMain);
|
|
range_textures_vertex.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
// Vertex samplers.
|
|
D3D12_DESCRIPTOR_RANGE range_samplers_vertex;
|
|
if (sampler_count_vertex > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_samplers_vertex;
|
|
parameter.ShaderVisibility = vertex_visibility;
|
|
range_samplers_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
range_samplers_vertex.NumDescriptors = sampler_count_vertex;
|
|
range_samplers_vertex.BaseShaderRegister = 0;
|
|
range_samplers_vertex.RegisterSpace = 0;
|
|
range_samplers_vertex.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
ID3D12RootSignature* root_signature = ui::d3d12::util::CreateRootSignature(
|
|
GetD3D12Context()->GetD3D12Provider(), desc);
|
|
if (root_signature == nullptr) {
|
|
XELOGE(
|
|
"Failed to create a root signature with {} pixel textures, {} pixel "
|
|
"samplers, {} vertex textures and {} vertex samplers",
|
|
texture_count_pixel, sampler_count_pixel, texture_count_vertex,
|
|
sampler_count_vertex);
|
|
return nullptr;
|
|
}
|
|
root_signatures_bindful_.insert({index, root_signature});
|
|
return root_signature;
|
|
}
|
|
|
|
uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices(
|
|
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
|
|
RootBindfulExtraParameterIndices& indices_out) {
|
|
uint32_t texture_count_pixel = 0, sampler_count_pixel = 0;
|
|
if (pixel_shader != nullptr) {
|
|
pixel_shader->GetTextureBindings(texture_count_pixel);
|
|
pixel_shader->GetSamplerBindings(sampler_count_pixel);
|
|
}
|
|
uint32_t texture_count_vertex, sampler_count_vertex;
|
|
vertex_shader->GetTextureBindings(texture_count_vertex);
|
|
vertex_shader->GetSamplerBindings(sampler_count_vertex);
|
|
|
|
uint32_t index = kRootParameter_Bindful_Count_Base;
|
|
if (texture_count_pixel != 0) {
|
|
indices_out.textures_pixel = index++;
|
|
} else {
|
|
indices_out.textures_pixel = RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
if (sampler_count_pixel != 0) {
|
|
indices_out.samplers_pixel = index++;
|
|
} else {
|
|
indices_out.samplers_pixel = RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
if (texture_count_vertex != 0) {
|
|
indices_out.textures_vertex = index++;
|
|
} else {
|
|
indices_out.textures_vertex =
|
|
RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
if (sampler_count_vertex != 0) {
|
|
indices_out.samplers_vertex = index++;
|
|
} else {
|
|
indices_out.samplers_vertex =
|
|
RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
return index;
|
|
}
|
|
|
|
uint64_t D3D12CommandProcessor::RequestViewBindfulDescriptors(
|
|
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
|
uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
|
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
|
|
assert_false(bindless_resources_used_);
|
|
assert_true(submission_open_);
|
|
uint32_t descriptor_index;
|
|
uint64_t current_heap_index = view_bindful_heap_pool_->Request(
|
|
frame_current_, previous_heap_index, count_for_partial_update,
|
|
count_for_full_update, descriptor_index);
|
|
if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
|
// There was an error.
|
|
return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid;
|
|
}
|
|
ID3D12DescriptorHeap* heap = view_bindful_heap_pool_->GetLastRequestHeap();
|
|
if (view_bindful_heap_current_ != heap) {
|
|
view_bindful_heap_current_ = heap;
|
|
deferred_command_list_->SetDescriptorHeaps(view_bindful_heap_current_,
|
|
sampler_bindful_heap_current_);
|
|
}
|
|
auto provider = GetD3D12Context()->GetD3D12Provider();
|
|
cpu_handle_out = provider->OffsetViewDescriptor(
|
|
view_bindful_heap_pool_->GetLastRequestHeapCPUStart(), descriptor_index);
|
|
gpu_handle_out = provider->OffsetViewDescriptor(
|
|
view_bindful_heap_pool_->GetLastRequestHeapGPUStart(), descriptor_index);
|
|
return current_heap_index;
|
|
}
|
|
|
|
uint32_t D3D12CommandProcessor::RequestPersistentViewBindlessDescriptor() {
|
|
assert_true(bindless_resources_used_);
|
|
if (!view_bindless_heap_free_.empty()) {
|
|
uint32_t descriptor_index = view_bindless_heap_free_.back();
|
|
view_bindless_heap_free_.pop_back();
|
|
return descriptor_index;
|
|
}
|
|
if (view_bindless_heap_allocated_ >= kViewBindlessHeapSize) {
|
|
return UINT32_MAX;
|
|
}
|
|
return view_bindless_heap_allocated_++;
|
|
}
|
|
|
|
void D3D12CommandProcessor::ReleaseViewBindlessDescriptorImmediately(
|
|
uint32_t descriptor_index) {
|
|
assert_true(bindless_resources_used_);
|
|
view_bindless_heap_free_.push_back(descriptor_index);
|
|
}
|
|
|
|
bool D3D12CommandProcessor::RequestOneUseSingleViewDescriptors(
|
|
uint32_t count, ui::d3d12::util::DescriptorCPUGPUHandlePair* handles_out) {
|
|
assert_true(submission_open_);
|
|
if (!count) {
|
|
return true;
|
|
}
|
|
assert_not_null(handles_out);
|
|
auto provider = GetD3D12Context()->GetD3D12Provider();
|
|
if (bindless_resources_used_) {
|
|
// Request separate bindless descriptors that will be freed when this
|
|
// submission is completed by the GPU.
|
|
if (count > kViewBindlessHeapSize - view_bindless_heap_allocated_ +
|
|
view_bindless_heap_free_.size()) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
uint32_t descriptor_index;
|
|
if (!view_bindless_heap_free_.empty()) {
|
|
descriptor_index = view_bindless_heap_free_.back();
|
|
view_bindless_heap_free_.pop_back();
|
|
} else {
|
|
descriptor_index = view_bindless_heap_allocated_++;
|
|
}
|
|
view_bindless_one_use_descriptors_.push_back(
|
|
std::make_pair(descriptor_index, submission_current_));
|
|
handles_out[i] =
|
|
std::make_pair(provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_, descriptor_index),
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_gpu_start_, descriptor_index));
|
|
}
|
|
} else {
|
|
// Request a range within the current heap for bindful resources path.
|
|
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle_start;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_start;
|
|
if (RequestViewBindfulDescriptors(
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, count, count,
|
|
cpu_handle_start, gpu_handle_start) ==
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
handles_out[i] =
|
|
std::make_pair(provider->OffsetViewDescriptor(cpu_handle_start, i),
|
|
provider->OffsetViewDescriptor(gpu_handle_start, i));
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair
|
|
D3D12CommandProcessor::GetSystemBindlessViewHandlePair(
|
|
SystemBindlessView view) const {
|
|
assert_true(bindless_resources_used_);
|
|
auto provider = GetD3D12Context()->GetD3D12Provider();
|
|
return std::make_pair(provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_, uint32_t(view)),
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_gpu_start_, uint32_t(view)));
|
|
}
|
|
|
|
uint64_t D3D12CommandProcessor::RequestSamplerBindfulDescriptors(
|
|
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
|
uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
|
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
|
|
assert_false(bindless_resources_used_);
|
|
assert_true(submission_open_);
|
|
uint32_t descriptor_index;
|
|
uint64_t current_heap_index = sampler_bindful_heap_pool_->Request(
|
|
frame_current_, previous_heap_index, count_for_partial_update,
|
|
count_for_full_update, descriptor_index);
|
|
if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
|
// There was an error.
|
|
return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid;
|
|
}
|
|
ID3D12DescriptorHeap* heap = sampler_bindful_heap_pool_->GetLastRequestHeap();
|
|
if (sampler_bindful_heap_current_ != heap) {
|
|
sampler_bindful_heap_current_ = heap;
|
|
deferred_command_list_->SetDescriptorHeaps(view_bindful_heap_current_,
|
|
sampler_bindful_heap_current_);
|
|
}
|
|
uint32_t descriptor_offset =
|
|
descriptor_index *
|
|
GetD3D12Context()->GetD3D12Provider()->GetSamplerDescriptorSize();
|
|
cpu_handle_out.ptr =
|
|
sampler_bindful_heap_pool_->GetLastRequestHeapCPUStart().ptr +
|
|
descriptor_offset;
|
|
gpu_handle_out.ptr =
|
|
sampler_bindful_heap_pool_->GetLastRequestHeapGPUStart().ptr +
|
|
descriptor_offset;
|
|
return current_heap_index;
|
|
}
|
|
|
|
ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
|
|
uint32_t size, D3D12_RESOURCE_STATES state) {
|
|
assert_true(submission_open_);
|
|
assert_false(scratch_buffer_used_);
|
|
if (!submission_open_ || scratch_buffer_used_ || size == 0) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (size <= scratch_buffer_size_) {
|
|
PushTransitionBarrier(scratch_buffer_, scratch_buffer_state_, state);
|
|
scratch_buffer_state_ = state;
|
|
scratch_buffer_used_ = true;
|
|
return scratch_buffer_;
|
|
}
|
|
|
|
size = xe::align(size, kScratchBufferSizeIncrement);
|
|
|
|
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
|
D3D12_RESOURCE_DESC buffer_desc;
|
|
ui::d3d12::util::FillBufferResourceDesc(
|
|
buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
|
ID3D12Resource* buffer;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE,
|
|
&buffer_desc, state, nullptr, IID_PPV_ARGS(&buffer)))) {
|
|
XELOGE("Failed to create a {} MB scratch GPU buffer", size >> 20);
|
|
return nullptr;
|
|
}
|
|
if (scratch_buffer_ != nullptr) {
|
|
buffers_for_deletion_.push_back(
|
|
std::make_pair(scratch_buffer_, submission_current_));
|
|
}
|
|
scratch_buffer_ = buffer;
|
|
scratch_buffer_size_ = size;
|
|
scratch_buffer_state_ = state;
|
|
scratch_buffer_used_ = true;
|
|
return scratch_buffer_;
|
|
}
|
|
|
|
void D3D12CommandProcessor::ReleaseScratchGPUBuffer(
|
|
ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state) {
|
|
assert_true(submission_open_);
|
|
assert_true(scratch_buffer_used_);
|
|
scratch_buffer_used_ = false;
|
|
if (buffer == scratch_buffer_) {
|
|
scratch_buffer_state_ = new_state;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) {
|
|
if (current_sample_positions_ == sample_positions) {
|
|
return;
|
|
}
|
|
// Evaluating attributes by sample index - which is done for per-sample
|
|
// depth - is undefined with programmable sample positions, so can't use them
|
|
// for ROV output. There's hardly any difference between 2,6 (of 0 and 3 with
|
|
// 4x MSAA) and 4,4 anyway.
|
|
// https://docs.microsoft.com/en-us/windows/desktop/api/d3d12/nf-d3d12-id3d12graphicscommandlist1-setsamplepositions
|
|
if (cvars::d3d12_ssaa_custom_sample_positions && !edram_rov_used_ &&
|
|
command_list_1_) {
|
|
auto provider = GetD3D12Context()->GetD3D12Provider();
|
|
auto tier = provider->GetProgrammableSamplePositionsTier();
|
|
if (tier >= D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_2) {
|
|
// Depth buffer transitions are affected by sample positions.
|
|
SubmitBarriers();
|
|
// Standard sample positions in Direct3D 10.1, but adjusted to take the
|
|
// fact that SSAA samples are already shifted by 1/4 of a pixel.
|
|
// TODO(Triang3l): Find what sample positions are used by Xenos, though
|
|
// they are not necessarily better. The purpose is just to make 2x SSAA
|
|
// work a little bit better for tall stairs.
|
|
// FIXME(Triang3l): This is currently even uglier than without custom
|
|
// sample positions.
|
|
if (sample_positions >= MsaaSamples::k2X) {
|
|
// Sample 1 is lower-left on Xenos, but upper-right in Direct3D 12.
|
|
D3D12_SAMPLE_POSITION d3d_sample_positions[4];
|
|
if (sample_positions >= MsaaSamples::k4X) {
|
|
// Upper-left.
|
|
d3d_sample_positions[0].X = -2 + 4;
|
|
d3d_sample_positions[0].Y = -6 + 4;
|
|
// Upper-right.
|
|
d3d_sample_positions[1].X = 6 - 4;
|
|
d3d_sample_positions[1].Y = -2 + 4;
|
|
// Lower-left.
|
|
d3d_sample_positions[2].X = -6 + 4;
|
|
d3d_sample_positions[2].Y = 2 - 4;
|
|
// Lower-right.
|
|
d3d_sample_positions[3].X = 2 - 4;
|
|
d3d_sample_positions[3].Y = 6 - 4;
|
|
} else {
|
|
// Upper.
|
|
d3d_sample_positions[0].X = -4;
|
|
d3d_sample_positions[0].Y = -4 + 4;
|
|
d3d_sample_positions[1].X = -4;
|
|
d3d_sample_positions[1].Y = -4 + 4;
|
|
// Lower.
|
|
d3d_sample_positions[2].X = 4;
|
|
d3d_sample_positions[2].Y = 4 - 4;
|
|
d3d_sample_positions[3].X = 4;
|
|
d3d_sample_positions[3].Y = 4 - 4;
|
|
}
|
|
deferred_command_list_->D3DSetSamplePositions(1, 4,
|
|
d3d_sample_positions);
|
|
} else {
|
|
deferred_command_list_->D3DSetSamplePositions(0, 0, nullptr);
|
|
}
|
|
}
|
|
}
|
|
current_sample_positions_ = sample_positions;
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) {
|
|
if (current_external_pipeline_ != pipeline) {
|
|
deferred_command_list_->D3DSetPipelineState(pipeline);
|
|
current_external_pipeline_ = pipeline;
|
|
current_cached_pipeline_ = nullptr;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::FlushAndUnbindRenderTargets() {
|
|
render_target_cache_->FlushAndUnbindRenderTargets();
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetExternalGraphicsPipeline(
|
|
ID3D12PipelineState* pipeline, bool changing_rts_and_sample_positions,
|
|
bool changing_viewport, bool changing_blend_factor,
|
|
bool changing_stencil_ref) {
|
|
if (current_external_pipeline_ != pipeline) {
|
|
deferred_command_list_->D3DSetPipelineState(pipeline);
|
|
current_external_pipeline_ = pipeline;
|
|
current_cached_pipeline_ = nullptr;
|
|
}
|
|
current_graphics_root_signature_ = nullptr;
|
|
current_graphics_root_up_to_date_ = 0;
|
|
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
|
if (changing_rts_and_sample_positions) {
|
|
render_target_cache_->ForceApplyOnNextUpdate();
|
|
}
|
|
if (changing_viewport) {
|
|
ff_viewport_update_needed_ = true;
|
|
ff_scissor_update_needed_ = true;
|
|
}
|
|
if (changing_blend_factor) {
|
|
ff_blend_factor_update_needed_ = true;
|
|
}
|
|
if (changing_stencil_ref) {
|
|
ff_stencil_ref_update_needed_ = true;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::NotifyShaderBindingsLayoutUIDsInvalidated() {
|
|
if (bindless_resources_used_) {
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
} else {
|
|
bindful_textures_written_vertex_ = false;
|
|
bindful_textures_written_pixel_ = false;
|
|
bindful_samplers_written_vertex_ = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
}
|
|
|
|
std::string D3D12CommandProcessor::GetWindowTitleText() const {
|
|
if (render_target_cache_) {
|
|
if (!edram_rov_used_) {
|
|
return "Direct3D 12 - no ROV, inaccurate";
|
|
}
|
|
// Currently scaling is only supported with ROV.
|
|
if (texture_cache_ != nullptr && texture_cache_->IsResolutionScale2X()) {
|
|
return "Direct3D 12 - 2x";
|
|
}
|
|
}
|
|
return "Direct3D 12";
|
|
}
|
|
|
|
std::unique_ptr<xe::ui::RawImage> D3D12CommandProcessor::Capture() {
|
|
ID3D12Resource* readback_buffer =
|
|
RequestReadbackBuffer(uint32_t(swap_texture_copy_size_));
|
|
if (!readback_buffer) {
|
|
return nullptr;
|
|
}
|
|
BeginSubmission(false);
|
|
PushTransitionBarrier(swap_texture_,
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
SubmitBarriers();
|
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
|
location_source.pResource = swap_texture_;
|
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
location_source.SubresourceIndex = 0;
|
|
location_dest.pResource = readback_buffer;
|
|
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
location_dest.PlacedFootprint = swap_texture_copy_footprint_;
|
|
deferred_command_list_->CopyTexture(location_dest, location_source);
|
|
PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_COPY_SOURCE,
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
|
if (!EndSubmission(false)) {
|
|
return nullptr;
|
|
}
|
|
AwaitAllSubmissionsCompletion();
|
|
D3D12_RANGE readback_range;
|
|
readback_range.Begin = swap_texture_copy_footprint_.Offset;
|
|
readback_range.End = swap_texture_copy_size_;
|
|
void* readback_mapping;
|
|
if (FAILED(readback_buffer->Map(0, &readback_range, &readback_mapping))) {
|
|
return nullptr;
|
|
}
|
|
std::unique_ptr<xe::ui::RawImage> raw_image(new xe::ui::RawImage());
|
|
auto swap_texture_size = GetSwapTextureSize();
|
|
raw_image->width = swap_texture_size.first;
|
|
raw_image->height = swap_texture_size.second;
|
|
raw_image->stride = swap_texture_size.first * 4;
|
|
raw_image->data.resize(raw_image->stride * swap_texture_size.second);
|
|
const uint8_t* readback_source_data =
|
|
reinterpret_cast<const uint8_t*>(readback_mapping) +
|
|
swap_texture_copy_footprint_.Offset;
|
|
for (uint32_t i = 0; i < swap_texture_size.second; ++i) {
|
|
std::memcpy(raw_image->data.data() + i * raw_image->stride,
|
|
readback_source_data +
|
|
i * swap_texture_copy_footprint_.Footprint.RowPitch,
|
|
raw_image->stride);
|
|
}
|
|
return raw_image;
|
|
}
|
|
|
|
bool D3D12CommandProcessor::SetupContext() {
|
|
if (!CommandProcessor::SetupContext()) {
|
|
XELOGE("Failed to initialize base command processor context");
|
|
return false;
|
|
}
|
|
|
|
auto provider = GetD3D12Context()->GetD3D12Provider();
|
|
auto device = provider->GetDevice();
|
|
auto direct_queue = provider->GetDirectQueue();
|
|
|
|
submission_open_ = false;
|
|
submission_current_ = 1;
|
|
submission_completed_ = 0;
|
|
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
|
|
IID_PPV_ARGS(&submission_fence_)))) {
|
|
XELOGE("Failed to create the submission fence");
|
|
return false;
|
|
}
|
|
submission_fence_completion_event_ =
|
|
CreateEvent(nullptr, false, false, nullptr);
|
|
if (submission_fence_completion_event_ == nullptr) {
|
|
XELOGE("Failed to create the submission fence completion event");
|
|
return false;
|
|
}
|
|
|
|
frame_open_ = false;
|
|
frame_current_ = 1;
|
|
frame_completed_ = 0;
|
|
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
|
|
|
|
// Create the command list and one allocator because it's needed for a command
|
|
// list.
|
|
ID3D12CommandAllocator* command_allocator;
|
|
if (FAILED(device->CreateCommandAllocator(
|
|
D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&command_allocator)))) {
|
|
XELOGE("Failed to create a command allocator");
|
|
return false;
|
|
}
|
|
command_allocator_writable_first_ = new CommandAllocator;
|
|
command_allocator_writable_first_->command_allocator = command_allocator;
|
|
command_allocator_writable_first_->last_usage_submission = 0;
|
|
command_allocator_writable_first_->next = nullptr;
|
|
command_allocator_writable_last_ = command_allocator_writable_first_;
|
|
command_allocator_submitted_first_ = nullptr;
|
|
command_allocator_submitted_last_ = nullptr;
|
|
if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT,
|
|
command_allocator, nullptr,
|
|
IID_PPV_ARGS(&command_list_)))) {
|
|
XELOGE("Failed to create the graphics command list");
|
|
return false;
|
|
}
|
|
// Initially in open state, wait until a deferred command list submission.
|
|
command_list_->Close();
|
|
// Optional - added in Creators Update (SDK 10.0.15063.0).
|
|
command_list_->QueryInterface(IID_PPV_ARGS(&command_list_1_));
|
|
deferred_command_list_ = std::make_unique<DeferredCommandList>(this);
|
|
|
|
bindless_resources_used_ =
|
|
cvars::d3d12_bindless &&
|
|
provider->GetResourceBindingTier() >= D3D12_RESOURCE_BINDING_TIER_2;
|
|
edram_rov_used_ =
|
|
cvars::d3d12_edram_rov && provider->AreRasterizerOrderedViewsSupported();
|
|
|
|
// Initialize resource binding.
|
|
constant_buffer_pool_ =
|
|
std::make_unique<ui::d3d12::UploadBufferPool>(device, 1024 * 1024);
|
|
if (bindless_resources_used_) {
|
|
D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc;
|
|
view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
view_bindless_heap_desc.NumDescriptors = kViewBindlessHeapSize;
|
|
view_bindless_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
view_bindless_heap_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&view_bindless_heap_desc, IID_PPV_ARGS(&view_bindless_heap_)))) {
|
|
XELOGE("Failed to create the bindless CBV/SRV/UAV descriptor heap");
|
|
return false;
|
|
}
|
|
view_bindless_heap_cpu_start_ =
|
|
view_bindless_heap_->GetCPUDescriptorHandleForHeapStart();
|
|
view_bindless_heap_gpu_start_ =
|
|
view_bindless_heap_->GetGPUDescriptorHandleForHeapStart();
|
|
view_bindless_heap_allocated_ = uint32_t(SystemBindlessView::kCount);
|
|
|
|
D3D12_DESCRIPTOR_HEAP_DESC sampler_bindless_heap_desc;
|
|
sampler_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
|
|
sampler_bindless_heap_desc.NumDescriptors = kSamplerHeapSize;
|
|
sampler_bindless_heap_desc.Flags =
|
|
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
sampler_bindless_heap_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&sampler_bindless_heap_desc,
|
|
IID_PPV_ARGS(&sampler_bindless_heap_current_)))) {
|
|
XELOGE("Failed to create the bindless sampler descriptor heap");
|
|
return false;
|
|
}
|
|
sampler_bindless_heap_cpu_start_ =
|
|
sampler_bindless_heap_current_->GetCPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_gpu_start_ =
|
|
sampler_bindless_heap_current_->GetGPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
} else {
|
|
view_bindful_heap_pool_ = std::make_unique<ui::d3d12::DescriptorHeapPool>(
|
|
device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, kViewBindfulHeapSize);
|
|
sampler_bindful_heap_pool_ =
|
|
std::make_unique<ui::d3d12::DescriptorHeapPool>(
|
|
device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, kSamplerHeapSize);
|
|
}
|
|
|
|
if (bindless_resources_used_) {
|
|
// Global bindless resource root signatures.
|
|
D3D12_ROOT_SIGNATURE_DESC root_signature_bindless_desc;
|
|
D3D12_ROOT_PARAMETER
|
|
root_parameters_bindless[kRootParameter_Bindless_Count];
|
|
root_signature_bindless_desc.NumParameters = kRootParameter_Bindless_Count;
|
|
root_signature_bindless_desc.pParameters = root_parameters_bindless;
|
|
root_signature_bindless_desc.NumStaticSamplers = 0;
|
|
root_signature_bindless_desc.pStaticSamplers = nullptr;
|
|
root_signature_bindless_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
// Fetch constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_FetchConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
// Vertex float constants.
|
|
{
|
|
auto& parameter = root_parameters_bindless
|
|
[kRootParameter_Bindless_FloatConstantsVertex];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
|
}
|
|
// Pixel float constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_FloatConstantsPixel];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
}
|
|
// Pixel shader descriptor indices.
|
|
{
|
|
auto& parameter = root_parameters_bindless
|
|
[kRootParameter_Bindless_DescriptorIndicesPixel];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
}
|
|
// Vertex shader descriptor indices.
|
|
{
|
|
auto& parameter = root_parameters_bindless
|
|
[kRootParameter_Bindless_DescriptorIndicesVertex];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
|
}
|
|
// System constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_SystemConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
// Bool and loop constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_BoolLoopConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
// Sampler heap.
|
|
D3D12_DESCRIPTOR_RANGE root_bindless_sampler_range;
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_SamplerHeap];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
// Will be appending.
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges =
|
|
&root_bindless_sampler_range;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
root_bindless_sampler_range.RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
root_bindless_sampler_range.NumDescriptors = UINT_MAX;
|
|
root_bindless_sampler_range.BaseShaderRegister = 0;
|
|
root_bindless_sampler_range.RegisterSpace = 0;
|
|
root_bindless_sampler_range.OffsetInDescriptorsFromTableStart = 0;
|
|
}
|
|
// View heap.
|
|
D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[6];
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_ViewHeap];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
// Will be appending.
|
|
parameter.DescriptorTable.NumDescriptorRanges = 0;
|
|
parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
// 2D array textures.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = UINT_MAX;
|
|
range.BaseShaderRegister = 0;
|
|
range.RegisterSpace =
|
|
UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures2DArray);
|
|
range.OffsetInDescriptorsFromTableStart = 0;
|
|
}
|
|
// 3D textures.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = UINT_MAX;
|
|
range.BaseShaderRegister = 0;
|
|
range.RegisterSpace =
|
|
UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures3D);
|
|
range.OffsetInDescriptorsFromTableStart = 0;
|
|
}
|
|
// Cube textures.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = UINT_MAX;
|
|
range.BaseShaderRegister = 0;
|
|
range.RegisterSpace =
|
|
UINT(DxbcShaderTranslator::SRVSpace::kBindlessTexturesCube);
|
|
range.OffsetInDescriptorsFromTableStart = 0;
|
|
}
|
|
// Shared memory SRV.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = 1;
|
|
range.BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory);
|
|
range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain);
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kSharedMemoryRawSRV);
|
|
}
|
|
// Shared memory UAV.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
range.NumDescriptors = 1;
|
|
range.BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
|
|
range.RegisterSpace = 0;
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kSharedMemoryRawUAV);
|
|
}
|
|
// EDRAM.
|
|
if (edram_rov_used_) {
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
range.NumDescriptors = 1;
|
|
range.BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kEDRAM);
|
|
range.RegisterSpace = 0;
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kEDRAMR32UintUAV);
|
|
}
|
|
}
|
|
root_signature_bindless_vs_ = ui::d3d12::util::CreateRootSignature(
|
|
provider, root_signature_bindless_desc);
|
|
if (!root_signature_bindless_vs_) {
|
|
XELOGE(
|
|
"Failed to create the global root signature for bindless resources, "
|
|
"the version for use without tessellation");
|
|
return false;
|
|
}
|
|
root_parameters_bindless[kRootParameter_Bindless_FloatConstantsVertex]
|
|
.ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN;
|
|
root_parameters_bindless[kRootParameter_Bindless_DescriptorIndicesVertex]
|
|
.ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN;
|
|
root_signature_bindless_ds_ = ui::d3d12::util::CreateRootSignature(
|
|
provider, root_signature_bindless_desc);
|
|
if (!root_signature_bindless_ds_) {
|
|
XELOGE(
|
|
"Failed to create the global root signature for bindless resources, "
|
|
"the version for use with tessellation");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
shared_memory_ =
|
|
std::make_unique<SharedMemory>(this, memory_, &trace_writer_);
|
|
if (!shared_memory_->Initialize()) {
|
|
XELOGE("Failed to initialize shared memory");
|
|
return false;
|
|
}
|
|
|
|
texture_cache_ = std::make_unique<TextureCache>(
|
|
this, register_file_, bindless_resources_used_, shared_memory_.get());
|
|
if (!texture_cache_->Initialize(edram_rov_used_)) {
|
|
XELOGE("Failed to initialize the texture cache");
|
|
return false;
|
|
}
|
|
|
|
render_target_cache_ = std::make_unique<RenderTargetCache>(
|
|
this, register_file_, &trace_writer_, bindless_resources_used_,
|
|
edram_rov_used_);
|
|
if (!render_target_cache_->Initialize(texture_cache_.get())) {
|
|
XELOGE("Failed to initialize the render target cache");
|
|
return false;
|
|
}
|
|
|
|
pipeline_cache_ = std::make_unique<PipelineCache>(
|
|
this, register_file_, bindless_resources_used_, edram_rov_used_,
|
|
texture_cache_->IsResolutionScale2X() ? 2 : 1);
|
|
if (!pipeline_cache_->Initialize()) {
|
|
XELOGE("Failed to initialize the graphics pipeline state cache");
|
|
return false;
|
|
}
|
|
|
|
primitive_converter_ = std::make_unique<PrimitiveConverter>(
|
|
this, register_file_, memory_, &trace_writer_);
|
|
if (!primitive_converter_->Initialize()) {
|
|
XELOGE("Failed to initialize the geometric primitive converter");
|
|
return false;
|
|
}
|
|
|
|
// Create gamma ramp resources. The PWL gamma ramp is 16-bit, but 6 bits are
|
|
// hardwired to zero, so DXGI_FORMAT_R10G10B10A2_UNORM can be used for it too.
|
|
// https://www.x.org/docs/AMD/old/42590_m76_rrg_1.01o.pdf
|
|
dirty_gamma_ramp_normal_ = true;
|
|
dirty_gamma_ramp_pwl_ = true;
|
|
D3D12_RESOURCE_DESC gamma_ramp_desc;
|
|
gamma_ramp_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D;
|
|
gamma_ramp_desc.Alignment = 0;
|
|
gamma_ramp_desc.Width = 256;
|
|
gamma_ramp_desc.Height = 1;
|
|
gamma_ramp_desc.DepthOrArraySize = 1;
|
|
// Normal gamma is 256x1, PWL gamma is 128x1.
|
|
gamma_ramp_desc.MipLevels = 2;
|
|
gamma_ramp_desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM;
|
|
gamma_ramp_desc.SampleDesc.Count = 1;
|
|
gamma_ramp_desc.SampleDesc.Quality = 0;
|
|
gamma_ramp_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
gamma_ramp_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
|
// The first action will be uploading.
|
|
gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE,
|
|
&gamma_ramp_desc, gamma_ramp_texture_state_, nullptr,
|
|
IID_PPV_ARGS(&gamma_ramp_texture_)))) {
|
|
XELOGE("Failed to create the gamma ramp texture");
|
|
return false;
|
|
}
|
|
// Get the layout for the upload buffer.
|
|
gamma_ramp_desc.DepthOrArraySize = kQueueFrames;
|
|
UINT64 gamma_ramp_upload_size;
|
|
device->GetCopyableFootprints(&gamma_ramp_desc, 0, kQueueFrames * 2, 0,
|
|
gamma_ramp_footprints_, nullptr, nullptr,
|
|
&gamma_ramp_upload_size);
|
|
// Create the upload buffer for the gamma ramp.
|
|
ui::d3d12::util::FillBufferResourceDesc(
|
|
gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE);
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE,
|
|
&gamma_ramp_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
|
|
IID_PPV_ARGS(&gamma_ramp_upload_)))) {
|
|
XELOGE("Failed to create the gamma ramp upload buffer");
|
|
return false;
|
|
}
|
|
if (FAILED(gamma_ramp_upload_->Map(
|
|
0, nullptr, reinterpret_cast<void**>(&gamma_ramp_upload_mapping_)))) {
|
|
XELOGE("Failed to map the gamma ramp upload buffer");
|
|
gamma_ramp_upload_mapping_ = nullptr;
|
|
return false;
|
|
}
|
|
|
|
D3D12_RESOURCE_DESC swap_texture_desc;
|
|
swap_texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
swap_texture_desc.Alignment = 0;
|
|
auto swap_texture_size = GetSwapTextureSize();
|
|
swap_texture_desc.Width = swap_texture_size.first;
|
|
swap_texture_desc.Height = swap_texture_size.second;
|
|
swap_texture_desc.DepthOrArraySize = 1;
|
|
swap_texture_desc.MipLevels = 1;
|
|
swap_texture_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat;
|
|
swap_texture_desc.SampleDesc.Count = 1;
|
|
swap_texture_desc.SampleDesc.Quality = 0;
|
|
swap_texture_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
swap_texture_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
|
// Can be sampled at any time, switch to render target when needed, then back.
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE,
|
|
&swap_texture_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
|
nullptr, IID_PPV_ARGS(&swap_texture_)))) {
|
|
XELOGE("Failed to create the command processor front buffer");
|
|
return false;
|
|
}
|
|
device->GetCopyableFootprints(&swap_texture_desc, 0, 1, 0,
|
|
&swap_texture_copy_footprint_, nullptr, nullptr,
|
|
&swap_texture_copy_size_);
|
|
D3D12_DESCRIPTOR_HEAP_DESC swap_descriptor_heap_desc;
|
|
swap_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
|
|
swap_descriptor_heap_desc.NumDescriptors = 1;
|
|
swap_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
|
swap_descriptor_heap_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&swap_descriptor_heap_desc,
|
|
IID_PPV_ARGS(&swap_texture_rtv_descriptor_heap_)))) {
|
|
XELOGE("Failed to create the command processor front buffer RTV heap");
|
|
return false;
|
|
}
|
|
swap_texture_rtv_ =
|
|
swap_texture_rtv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart();
|
|
D3D12_RENDER_TARGET_VIEW_DESC swap_rtv_desc;
|
|
swap_rtv_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat;
|
|
swap_rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
|
swap_rtv_desc.Texture2D.MipSlice = 0;
|
|
swap_rtv_desc.Texture2D.PlaneSlice = 0;
|
|
device->CreateRenderTargetView(swap_texture_, &swap_rtv_desc,
|
|
swap_texture_rtv_);
|
|
swap_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
swap_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&swap_descriptor_heap_desc,
|
|
IID_PPV_ARGS(&swap_texture_srv_descriptor_heap_)))) {
|
|
XELOGE("Failed to create the command processor front buffer SRV heap");
|
|
return false;
|
|
}
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC swap_srv_desc;
|
|
swap_srv_desc.Format = ui::d3d12::D3D12Context::kSwapChainFormat;
|
|
swap_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
|
swap_srv_desc.Shader4ComponentMapping =
|
|
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
swap_srv_desc.Texture2D.MostDetailedMip = 0;
|
|
swap_srv_desc.Texture2D.MipLevels = 1;
|
|
swap_srv_desc.Texture2D.PlaneSlice = 0;
|
|
swap_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(
|
|
swap_texture_, &swap_srv_desc,
|
|
swap_texture_srv_descriptor_heap_->GetCPUDescriptorHandleForHeapStart());
|
|
|
|
if (bindless_resources_used_) {
|
|
// Create the system bindless descriptors once all resources are
|
|
// initialized.
|
|
// kNullTexture2DArray.
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc;
|
|
null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
null_srv_desc.Shader4ComponentMapping =
|
|
D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
|
|
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
|
|
null_srv_desc.Texture2DArray.MostDetailedMip = 0;
|
|
null_srv_desc.Texture2DArray.MipLevels = 1;
|
|
null_srv_desc.Texture2DArray.FirstArraySlice = 0;
|
|
null_srv_desc.Texture2DArray.ArraySize = 1;
|
|
null_srv_desc.Texture2DArray.PlaneSlice = 0;
|
|
null_srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(
|
|
nullptr, &null_srv_desc,
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kNullTexture2DArray)));
|
|
// kNullTexture3D.
|
|
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
|
|
null_srv_desc.Texture3D.MostDetailedMip = 0;
|
|
null_srv_desc.Texture3D.MipLevels = 1;
|
|
null_srv_desc.Texture3D.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(
|
|
nullptr, &null_srv_desc,
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kNullTexture3D)));
|
|
// kNullTextureCube.
|
|
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
|
|
null_srv_desc.TextureCube.MostDetailedMip = 0;
|
|
null_srv_desc.TextureCube.MipLevels = 1;
|
|
null_srv_desc.TextureCube.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(
|
|
nullptr, &null_srv_desc,
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kNullTextureCube)));
|
|
// kSharedMemoryRawSRV.
|
|
shared_memory_->WriteRawSRVDescriptor(provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryRawSRV)));
|
|
// kSharedMemoryRawUAV.
|
|
shared_memory_->WriteRawUAVDescriptor(provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryRawUAV)));
|
|
// kEDRAMR32UintUAV.
|
|
render_target_cache_->WriteEDRAMR32UintUAVDescriptor(
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEDRAMR32UintUAV)));
|
|
// kEDRAMRawSRV.
|
|
render_target_cache_->WriteEDRAMRawSRVDescriptor(
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEDRAMRawSRV)));
|
|
// kEDRAMRawUAV.
|
|
render_target_cache_->WriteEDRAMRawUAVDescriptor(
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEDRAMRawUAV)));
|
|
// kGammaRampNormalSRV.
|
|
WriteGammaRampSRV(false,
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kGammaRampNormalSRV)));
|
|
// kGammaRampPWLSRV.
|
|
WriteGammaRampSRV(true,
|
|
provider->OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kGammaRampPWLSRV)));
|
|
}
|
|
|
|
pix_capture_requested_.store(false, std::memory_order_relaxed);
|
|
pix_capturing_ = false;
|
|
|
|
// Just not to expose uninitialized memory.
|
|
std::memset(&system_constants_, 0, sizeof(system_constants_));
|
|
|
|
return true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::ShutdownContext() {
|
|
AwaitAllSubmissionsCompletion();
|
|
|
|
ui::d3d12::util::ReleaseAndNull(readback_buffer_);
|
|
readback_buffer_size_ = 0;
|
|
|
|
ui::d3d12::util::ReleaseAndNull(scratch_buffer_);
|
|
scratch_buffer_size_ = 0;
|
|
|
|
for (auto& buffer_for_deletion : buffers_for_deletion_) {
|
|
buffer_for_deletion.first->Release();
|
|
}
|
|
buffers_for_deletion_.clear();
|
|
|
|
if (swap_texture_srv_descriptor_heap_ != nullptr) {
|
|
{
|
|
std::lock_guard<std::mutex> lock(swap_state_.mutex);
|
|
swap_state_.pending = false;
|
|
swap_state_.front_buffer_texture = 0;
|
|
}
|
|
// TODO(Triang3l): Ensure this is synchronized. The display context may not
|
|
// exist at this point, so awaiting its fence doesn't always work.
|
|
swap_texture_srv_descriptor_heap_->Release();
|
|
swap_texture_srv_descriptor_heap_ = nullptr;
|
|
}
|
|
ui::d3d12::util::ReleaseAndNull(swap_texture_rtv_descriptor_heap_);
|
|
ui::d3d12::util::ReleaseAndNull(swap_texture_);
|
|
|
|
// Don't need the data anymore, so zero range.
|
|
if (gamma_ramp_upload_mapping_ != nullptr) {
|
|
D3D12_RANGE gamma_ramp_written_range;
|
|
gamma_ramp_written_range.Begin = 0;
|
|
gamma_ramp_written_range.End = 0;
|
|
gamma_ramp_upload_->Unmap(0, &gamma_ramp_written_range);
|
|
gamma_ramp_upload_mapping_ = nullptr;
|
|
}
|
|
ui::d3d12::util::ReleaseAndNull(gamma_ramp_upload_);
|
|
ui::d3d12::util::ReleaseAndNull(gamma_ramp_texture_);
|
|
|
|
primitive_converter_.reset();
|
|
|
|
pipeline_cache_.reset();
|
|
|
|
render_target_cache_.reset();
|
|
|
|
texture_cache_.reset();
|
|
|
|
shared_memory_.reset();
|
|
|
|
// Shut down binding - bindless descriptors may be owned by subsystems like
|
|
// the texture cache.
|
|
|
|
// Root signatured are used by pipeline states, thus freed after the pipeline
|
|
// states.
|
|
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
|
|
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
|
|
for (auto it : root_signatures_bindful_) {
|
|
it.second->Release();
|
|
}
|
|
root_signatures_bindful_.clear();
|
|
|
|
if (bindless_resources_used_) {
|
|
texture_cache_bindless_sampler_map_.clear();
|
|
for (const auto& sampler_bindless_heap_overflowed :
|
|
sampler_bindless_heaps_overflowed_) {
|
|
sampler_bindless_heap_overflowed.first->Release();
|
|
}
|
|
sampler_bindless_heaps_overflowed_.clear();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
ui::d3d12::util::ReleaseAndNull(sampler_bindless_heap_current_);
|
|
view_bindless_one_use_descriptors_.clear();
|
|
view_bindless_heap_free_.clear();
|
|
ui::d3d12::util::ReleaseAndNull(view_bindless_heap_);
|
|
} else {
|
|
sampler_bindful_heap_pool_.reset();
|
|
view_bindful_heap_pool_.reset();
|
|
}
|
|
constant_buffer_pool_.reset();
|
|
|
|
deferred_command_list_.reset();
|
|
ui::d3d12::util::ReleaseAndNull(command_list_1_);
|
|
ui::d3d12::util::ReleaseAndNull(command_list_);
|
|
ClearCommandAllocatorCache();
|
|
|
|
frame_open_ = false;
|
|
frame_current_ = 1;
|
|
frame_completed_ = 0;
|
|
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
|
|
|
|
// First release the fence since it may reference the event.
|
|
ui::d3d12::util::ReleaseAndNull(submission_fence_);
|
|
if (submission_fence_completion_event_) {
|
|
CloseHandle(submission_fence_completion_event_);
|
|
submission_fence_completion_event_ = nullptr;
|
|
}
|
|
submission_open_ = false;
|
|
submission_current_ = 1;
|
|
submission_completed_ = 0;
|
|
|
|
CommandProcessor::ShutdownContext();
|
|
}
|
|
|
|
void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|
CommandProcessor::WriteRegister(index, value);
|
|
|
|
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
|
|
if (frame_open_) {
|
|
uint32_t float_constant_index =
|
|
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
|
|
if (float_constant_index >= 256) {
|
|
float_constant_index -= 256;
|
|
if (current_float_constant_map_pixel_[float_constant_index >> 6] &
|
|
(1ull << (float_constant_index & 63))) {
|
|
cbuffer_binding_float_pixel_.up_to_date = false;
|
|
}
|
|
} else {
|
|
if (current_float_constant_map_vertex_[float_constant_index >> 6] &
|
|
(1ull << (float_constant_index & 63))) {
|
|
cbuffer_binding_float_vertex_.up_to_date = false;
|
|
}
|
|
}
|
|
}
|
|
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
|
|
cbuffer_binding_bool_loop_.up_to_date = false;
|
|
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) {
|
|
cbuffer_binding_fetch_.up_to_date = false;
|
|
if (texture_cache_ != nullptr) {
|
|
texture_cache_->TextureFetchConstantWritten(
|
|
(index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6);
|
|
}
|
|
} else if (index == XE_GPU_REG_DC_LUT_PWL_DATA) {
|
|
UpdateGammaRampValue(GammaRampType::kPWL, value);
|
|
} else if (index == XE_GPU_REG_DC_LUT_30_COLOR) {
|
|
UpdateGammaRampValue(GammaRampType::kNormal, value);
|
|
} else if (index == XE_GPU_REG_DC_LUT_RW_MODE) {
|
|
gamma_ramp_rw_subindex_ = 0;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
|
uint32_t frontbuffer_width,
|
|
uint32_t frontbuffer_height) {
|
|
// FIXME(Triang3l): frontbuffer_ptr is currently unreliable, in the trace
|
|
// player it's set to 0, but it's not needed anyway since the fetch constant
|
|
// contains the address.
|
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
// In case the swap command is the only one in the frame.
|
|
BeginSubmission(true);
|
|
|
|
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
|
|
|
// Upload the new gamma ramps, using the upload buffer for the current frame
|
|
// (will close the frame after this anyway, so can't write multiple times per
|
|
// frame).
|
|
uint32_t gamma_ramp_frame = uint32_t(frame_current_ % kQueueFrames);
|
|
if (dirty_gamma_ramp_normal_) {
|
|
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint =
|
|
gamma_ramp_footprints_[gamma_ramp_frame * 2];
|
|
volatile uint32_t* mapping = reinterpret_cast<uint32_t*>(
|
|
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
|
|
for (uint32_t i = 0; i < 256; ++i) {
|
|
uint32_t value = gamma_ramp_.normal[i].value;
|
|
// Swap red and blue (Project Sylpheed has settings allowing separate
|
|
// configuration).
|
|
mapping[i] = ((value & 1023) << 20) | (value & (1023 << 10)) |
|
|
((value >> 20) & 1023);
|
|
}
|
|
PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_,
|
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
|
gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
SubmitBarriers();
|
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
|
location_source.pResource = gamma_ramp_upload_;
|
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
location_source.PlacedFootprint = gamma_ramp_footprint;
|
|
location_dest.pResource = gamma_ramp_texture_;
|
|
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
location_dest.SubresourceIndex = 0;
|
|
deferred_command_list_->CopyTexture(location_dest, location_source);
|
|
dirty_gamma_ramp_normal_ = false;
|
|
}
|
|
if (dirty_gamma_ramp_pwl_) {
|
|
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint =
|
|
gamma_ramp_footprints_[gamma_ramp_frame * 2 + 1];
|
|
volatile uint32_t* mapping = reinterpret_cast<uint32_t*>(
|
|
gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset);
|
|
for (uint32_t i = 0; i < 128; ++i) {
|
|
// TODO(Triang3l): Find a game to test if red and blue need to be swapped.
|
|
mapping[i] = (gamma_ramp_.pwl[i].values[0].base >> 6) |
|
|
(uint32_t(gamma_ramp_.pwl[i].values[1].base >> 6) << 10) |
|
|
(uint32_t(gamma_ramp_.pwl[i].values[2].base >> 6) << 20);
|
|
}
|
|
PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_,
|
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
|
gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
SubmitBarriers();
|
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
|
location_source.pResource = gamma_ramp_upload_;
|
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
location_source.PlacedFootprint = gamma_ramp_footprint;
|
|
location_dest.pResource = gamma_ramp_texture_;
|
|
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
location_dest.SubresourceIndex = 1;
|
|
deferred_command_list_->CopyTexture(location_dest, location_source);
|
|
dirty_gamma_ramp_pwl_ = false;
|
|
}
|
|
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC swap_texture_srv_desc;
|
|
TextureFormat frontbuffer_format;
|
|
ID3D12Resource* swap_texture_resource = texture_cache_->RequestSwapTexture(
|
|
swap_texture_srv_desc, frontbuffer_format);
|
|
if (swap_texture_resource) {
|
|
render_target_cache_->FlushAndUnbindRenderTargets();
|
|
|
|
// This is according to D3D::InitializePresentationParameters from a game
|
|
// executable, which initializes the normal gamma ramp for 8_8_8_8 output
|
|
// and the PWL gamma ramp for 2_10_10_10.
|
|
bool use_pwl_gamma_ramp =
|
|
frontbuffer_format == TextureFormat::k_2_10_10_10 ||
|
|
frontbuffer_format == TextureFormat::k_2_10_10_10_AS_16_16_16_16;
|
|
|
|
bool descriptors_obtained;
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_swap_texture;
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_gamma_ramp;
|
|
if (bindless_resources_used_) {
|
|
descriptors_obtained =
|
|
RequestOneUseSingleViewDescriptors(1, &descriptor_swap_texture);
|
|
descriptor_gamma_ramp = GetSystemBindlessViewHandlePair(
|
|
use_pwl_gamma_ramp ? SystemBindlessView::kGammaRampPWLSRV
|
|
: SystemBindlessView::kGammaRampNormalSRV);
|
|
} else {
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
|
descriptors_obtained = RequestOneUseSingleViewDescriptors(2, descriptors);
|
|
if (descriptors_obtained) {
|
|
descriptor_swap_texture = descriptors[0];
|
|
descriptor_gamma_ramp = descriptors[1];
|
|
WriteGammaRampSRV(use_pwl_gamma_ramp, descriptor_gamma_ramp.first);
|
|
}
|
|
}
|
|
if (descriptors_obtained) {
|
|
// Must not call anything that can change the descriptor heap from now on!
|
|
|
|
// Create the swap texture descriptor.
|
|
device->CreateShaderResourceView(swap_texture_resource,
|
|
&swap_texture_srv_desc,
|
|
descriptor_swap_texture.first);
|
|
|
|
// The swap texture is kept as an SRV because the graphics system may draw
|
|
// with it at any time. It's switched to RTV and back when needed.
|
|
PushTransitionBarrier(swap_texture_,
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
|
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_,
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
|
gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
|
SubmitBarriers();
|
|
|
|
auto swap_texture_size = GetSwapTextureSize();
|
|
|
|
// Draw the stretching rectangle.
|
|
deferred_command_list_->D3DOMSetRenderTargets(1, &swap_texture_rtv_, TRUE,
|
|
nullptr);
|
|
D3D12_VIEWPORT viewport;
|
|
viewport.TopLeftX = 0.0f;
|
|
viewport.TopLeftY = 0.0f;
|
|
viewport.Width = float(swap_texture_size.first);
|
|
viewport.Height = float(swap_texture_size.second);
|
|
viewport.MinDepth = 0.0f;
|
|
viewport.MaxDepth = 0.0f;
|
|
deferred_command_list_->RSSetViewport(viewport);
|
|
D3D12_RECT scissor;
|
|
scissor.left = 0;
|
|
scissor.top = 0;
|
|
scissor.right = swap_texture_size.first;
|
|
scissor.bottom = swap_texture_size.second;
|
|
deferred_command_list_->RSSetScissorRect(scissor);
|
|
D3D12GraphicsSystem* graphics_system =
|
|
static_cast<D3D12GraphicsSystem*>(graphics_system_);
|
|
graphics_system->StretchTextureToFrontBuffer(
|
|
descriptor_swap_texture.second, &descriptor_gamma_ramp.second,
|
|
use_pwl_gamma_ramp ? (1.0f / 128.0f) : (1.0f / 256.0f),
|
|
*deferred_command_list_);
|
|
// Ending the current frame anyway, so no need to reset the current render
|
|
// targets when using ROV.
|
|
|
|
PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_RENDER_TARGET,
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
|
// Don't care about graphics state because the frame is ending anyway.
|
|
{
|
|
std::lock_guard<std::mutex> lock(swap_state_.mutex);
|
|
swap_state_.width = swap_texture_size.first;
|
|
swap_state_.height = swap_texture_size.second;
|
|
swap_state_.front_buffer_texture =
|
|
reinterpret_cast<uintptr_t>(swap_texture_srv_descriptor_heap_);
|
|
}
|
|
}
|
|
}
|
|
|
|
EndSubmission(true);
|
|
}
|
|
|
|
void D3D12CommandProcessor::OnPrimaryBufferEnd() {
|
|
if (cvars::d3d12_submit_on_primary_buffer_end && submission_open_ &&
|
|
CanEndSubmissionImmediately()) {
|
|
EndSubmission(false);
|
|
}
|
|
}
|
|
|
|
Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type,
|
|
uint32_t guest_address,
|
|
const uint32_t* host_address,
|
|
uint32_t dword_count) {
|
|
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
|
|
dword_count);
|
|
}
|
|
|
|
bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|
uint32_t index_count,
|
|
IndexBufferInfo* index_buffer_info,
|
|
bool major_mode_explicit) {
|
|
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
|
auto& regs = *register_file_;
|
|
|
|
#if FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
|
|
|
xenos::ModeControl enable_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
|
if (enable_mode == xenos::ModeControl::kIgnore) {
|
|
// Ignored.
|
|
return true;
|
|
}
|
|
if (enable_mode == xenos::ModeControl::kCopy) {
|
|
// Special copy handling.
|
|
return IssueCopy();
|
|
}
|
|
|
|
if (regs.Get<reg::RB_SURFACE_INFO>().surface_pitch == 0) {
|
|
// Doesn't actually draw.
|
|
// TODO(Triang3l): Do something so memexport still works in this case maybe?
|
|
// Unlikely that zero would even really be legal though.
|
|
return true;
|
|
}
|
|
|
|
// Shaders will have already been defined by previous loads.
|
|
// We need them to do just about anything so validate here.
|
|
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
|
auto pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
|
|
if (!vertex_shader) {
|
|
// Always need a vertex shader.
|
|
return false;
|
|
}
|
|
// Depth-only mode doesn't need a pixel shader.
|
|
if (enable_mode == xenos::ModeControl::kDepth) {
|
|
pixel_shader = nullptr;
|
|
} else if (!pixel_shader) {
|
|
// Need a pixel shader in normal color mode.
|
|
return false;
|
|
}
|
|
// Get tessellation info for the current draw for vertex shader translation.
|
|
Shader::HostVertexShaderType host_vertex_shader_type =
|
|
pipeline_cache_->GetHostVertexShaderTypeIfValid();
|
|
if (host_vertex_shader_type == Shader::HostVertexShaderType(-1)) {
|
|
return false;
|
|
}
|
|
// Translate the shaders now to get memexport configuration and color mask,
|
|
// which is needed by the render target cache, to check the possibility of
|
|
// doing early depth/stencil, and also to get used textures and samplers.
|
|
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
|
|
host_vertex_shader_type)) {
|
|
return false;
|
|
}
|
|
bool tessellated =
|
|
host_vertex_shader_type != Shader::HostVertexShaderType::kVertex;
|
|
|
|
// Check if memexport is used. If it is, we can't skip draw calls that have no
|
|
// visual effect.
|
|
bool memexport_used_vertex =
|
|
!vertex_shader->memexport_stream_constants().empty();
|
|
bool memexport_used_pixel =
|
|
pixel_shader != nullptr &&
|
|
!pixel_shader->memexport_stream_constants().empty();
|
|
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
|
|
|
|
bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type);
|
|
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
|
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
|
if (!memexport_used_vertex &&
|
|
(sq_program_cntl.vs_export_mode ==
|
|
xenos::VertexShaderExportMode::kMultipass ||
|
|
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
|
|
pa_su_sc_mode_cntl.cull_back))) {
|
|
// All faces are culled - can't be expressed in the pipeline state.
|
|
return true;
|
|
}
|
|
|
|
BeginSubmission(true);
|
|
|
|
// Set up the render targets - this may bind pipelines.
|
|
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
|
|
return false;
|
|
}
|
|
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
|
render_target_cache_->GetCurrentPipelineRenderTargets();
|
|
|
|
// Set up primitive topology.
|
|
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
|
PrimitiveType primitive_type_converted;
|
|
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
|
if (tessellated) {
|
|
primitive_type_converted = primitive_type;
|
|
switch (primitive_type_converted) {
|
|
// TODO(Triang3l): Support all kinds of patches if found in games.
|
|
case PrimitiveType::kTriangleList:
|
|
case PrimitiveType::kTrianglePatch:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST;
|
|
break;
|
|
case PrimitiveType::kQuadList:
|
|
case PrimitiveType::kQuadPatch:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
} else {
|
|
primitive_type_converted =
|
|
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
|
|
switch (primitive_type_converted) {
|
|
case PrimitiveType::kPointList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
|
|
break;
|
|
case PrimitiveType::kLineList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
|
|
break;
|
|
case PrimitiveType::kLineStrip:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
|
|
break;
|
|
case PrimitiveType::kTriangleList:
|
|
case PrimitiveType::kRectangleList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
|
break;
|
|
case PrimitiveType::kTriangleStrip:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
|
|
break;
|
|
case PrimitiveType::kQuadList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
if (primitive_topology_ != primitive_topology) {
|
|
primitive_topology_ = primitive_topology;
|
|
deferred_command_list_->D3DIASetPrimitiveTopology(primitive_topology);
|
|
}
|
|
uint32_t line_loop_closing_index;
|
|
if (primitive_type == PrimitiveType::kLineLoop && !indexed &&
|
|
index_count >= 3) {
|
|
// Add a vertex to close the loop, and make the vertex shader replace its
|
|
// index (before adding the offset) with 0 to fetch the first vertex again.
|
|
// For indexed line loops, the primitive converter will add the vertex.
|
|
line_loop_closing_index = index_count;
|
|
++index_count;
|
|
} else {
|
|
// Replace index 0 with 0 (do nothing) otherwise.
|
|
line_loop_closing_index = 0;
|
|
}
|
|
|
|
// Update the textures - this may bind pipeline state objects.
|
|
uint32_t used_texture_mask =
|
|
vertex_shader->GetUsedTextureMask() |
|
|
(pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
|
texture_cache_->RequestTextures(used_texture_mask);
|
|
|
|
// Check if early depth/stencil can be enabled.
|
|
bool early_z;
|
|
if (pixel_shader) {
|
|
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
|
early_z = pixel_shader->implicit_early_z_allowed() &&
|
|
(!rb_colorcontrol.alpha_test_enable ||
|
|
rb_colorcontrol.alpha_func == CompareFunction::kAlways) &&
|
|
!rb_colorcontrol.alpha_to_mask_enable;
|
|
} else {
|
|
early_z = true;
|
|
}
|
|
|
|
// Create the pipeline if needed and bind it.
|
|
void* pipeline_handle;
|
|
ID3D12RootSignature* root_signature;
|
|
if (!pipeline_cache_->ConfigurePipeline(
|
|
vertex_shader, pixel_shader, primitive_type_converted,
|
|
indexed ? index_buffer_info->format : IndexFormat::kInt16, early_z,
|
|
pipeline_render_targets, &pipeline_handle, &root_signature)) {
|
|
return false;
|
|
}
|
|
if (current_cached_pipeline_ != pipeline_handle) {
|
|
deferred_command_list_->SetPipelineStateHandle(
|
|
reinterpret_cast<void*>(pipeline_handle));
|
|
current_cached_pipeline_ = pipeline_handle;
|
|
current_external_pipeline_ = nullptr;
|
|
}
|
|
|
|
// Update viewport, scissor, blend factor and stencil reference.
|
|
UpdateFixedFunctionState(primitive_two_faced);
|
|
|
|
// Update system constants before uploading them.
|
|
UpdateSystemConstantValues(
|
|
memexport_used, primitive_two_faced, line_loop_closing_index,
|
|
indexed ? index_buffer_info->endianness : Endian::kNone,
|
|
used_texture_mask, early_z, GetCurrentColorMask(pixel_shader),
|
|
pipeline_render_targets);
|
|
|
|
// Update constant buffers, descriptors and root parameters.
|
|
if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
|
|
return false;
|
|
}
|
|
// Must not call anything that can change the descriptor heap from now on!
|
|
|
|
// Ensure vertex and index buffers are resident and draw.
|
|
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
|
// validity will be tracked.
|
|
uint64_t vertex_buffers_resident[2] = {};
|
|
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
|
|
uint32_t vfetch_index = vertex_binding.fetch_constant;
|
|
if (vertex_buffers_resident[vfetch_index >> 6] &
|
|
(1ull << (vfetch_index & 63))) {
|
|
continue;
|
|
}
|
|
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
|
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
|
|
switch (vfetch_constant.type) {
|
|
case xenos::FetchConstantType::kVertex:
|
|
break;
|
|
case xenos::FetchConstantType::kInvalidVertex:
|
|
if (cvars::gpu_allow_invalid_fetch_constants) {
|
|
break;
|
|
}
|
|
XELOGW(
|
|
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
|
|
"This "
|
|
"is incorrect behavior, but you can try bypassing this by "
|
|
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
|
|
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
|
return false;
|
|
default:
|
|
XELOGW(
|
|
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
|
|
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
|
return false;
|
|
}
|
|
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
|
|
vfetch_constant.size << 2)) {
|
|
XELOGE(
|
|
"Failed to request vertex buffer at 0x{:08X} (size {}) in the shared "
|
|
"memory",
|
|
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
|
return false;
|
|
}
|
|
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
|
}
|
|
|
|
// Gather memexport ranges and ensure the heaps for them are resident, and
|
|
// also load the data surrounding the export and to fill the regions that
|
|
// won't be modified by the shaders.
|
|
struct MemExportRange {
|
|
uint32_t base_address_dwords;
|
|
uint32_t size_dwords;
|
|
};
|
|
MemExportRange memexport_ranges[512];
|
|
uint32_t memexport_range_count = 0;
|
|
if (memexport_used_vertex) {
|
|
const std::vector<uint32_t>& memexport_stream_constants_vertex =
|
|
vertex_shader->memexport_stream_constants();
|
|
for (uint32_t constant_index : memexport_stream_constants_vertex) {
|
|
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
|
XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4);
|
|
if (memexport_stream.index_count == 0) {
|
|
continue;
|
|
}
|
|
uint32_t memexport_format_size =
|
|
GetSupportedMemExportFormatSize(memexport_stream.format);
|
|
if (memexport_format_size == 0) {
|
|
XELOGE("Unsupported memexport format {}",
|
|
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format)))
|
|
->name);
|
|
return false;
|
|
}
|
|
uint32_t memexport_size_dwords =
|
|
memexport_stream.index_count * memexport_format_size;
|
|
// Try to reduce the number of shared memory operations when writing
|
|
// different elements into the same buffer through different exports
|
|
// (happens in Halo 3).
|
|
bool memexport_range_reused = false;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
MemExportRange& memexport_range = memexport_ranges[i];
|
|
if (memexport_range.base_address_dwords ==
|
|
memexport_stream.base_address) {
|
|
memexport_range.size_dwords =
|
|
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
|
memexport_range_reused = true;
|
|
break;
|
|
}
|
|
}
|
|
// Add a new range if haven't expanded an existing one.
|
|
if (!memexport_range_reused) {
|
|
MemExportRange& memexport_range =
|
|
memexport_ranges[memexport_range_count++];
|
|
memexport_range.base_address_dwords = memexport_stream.base_address;
|
|
memexport_range.size_dwords = memexport_size_dwords;
|
|
}
|
|
}
|
|
}
|
|
if (memexport_used_pixel) {
|
|
const std::vector<uint32_t>& memexport_stream_constants_pixel =
|
|
pixel_shader->memexport_stream_constants();
|
|
for (uint32_t constant_index : memexport_stream_constants_pixel) {
|
|
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
|
XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4);
|
|
if (memexport_stream.index_count == 0) {
|
|
continue;
|
|
}
|
|
uint32_t memexport_format_size =
|
|
GetSupportedMemExportFormatSize(memexport_stream.format);
|
|
if (memexport_format_size == 0) {
|
|
XELOGE("Unsupported memexport format {}",
|
|
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format)))
|
|
->name);
|
|
return false;
|
|
}
|
|
uint32_t memexport_size_dwords =
|
|
memexport_stream.index_count * memexport_format_size;
|
|
bool memexport_range_reused = false;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
MemExportRange& memexport_range = memexport_ranges[i];
|
|
if (memexport_range.base_address_dwords ==
|
|
memexport_stream.base_address) {
|
|
memexport_range.size_dwords =
|
|
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
|
memexport_range_reused = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!memexport_range_reused) {
|
|
MemExportRange& memexport_range =
|
|
memexport_ranges[memexport_range_count++];
|
|
memexport_range.base_address_dwords = memexport_stream.base_address;
|
|
memexport_range.size_dwords = memexport_size_dwords;
|
|
}
|
|
}
|
|
}
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
if (!shared_memory_->RequestRange(memexport_range.base_address_dwords << 2,
|
|
memexport_range.size_dwords << 2)) {
|
|
XELOGE(
|
|
"Failed to request memexport stream at 0x{:08X} (size {}) in the "
|
|
"shared memory",
|
|
memexport_range.base_address_dwords << 2,
|
|
memexport_range.size_dwords << 2);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Actually draw.
|
|
if (indexed) {
|
|
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
|
|
? sizeof(uint32_t)
|
|
: sizeof(uint16_t);
|
|
assert_false(index_buffer_info->guest_base & (index_size - 1));
|
|
uint32_t index_base =
|
|
index_buffer_info->guest_base & 0x1FFFFFFF & ~(index_size - 1);
|
|
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
|
index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32
|
|
? DXGI_FORMAT_R32_UINT
|
|
: DXGI_FORMAT_R16_UINT;
|
|
PrimitiveConverter::ConversionResult conversion_result;
|
|
uint32_t converted_index_count;
|
|
if (tessellated) {
|
|
conversion_result =
|
|
PrimitiveConverter::ConversionResult::kConversionNotNeeded;
|
|
} else {
|
|
conversion_result = primitive_converter_->ConvertPrimitives(
|
|
primitive_type, index_buffer_info->guest_base, index_count,
|
|
index_buffer_info->format, index_buffer_info->endianness,
|
|
index_buffer_view.BufferLocation, converted_index_count);
|
|
if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) {
|
|
return false;
|
|
}
|
|
if (conversion_result ==
|
|
PrimitiveConverter::ConversionResult::kPrimitiveEmpty) {
|
|
return true;
|
|
}
|
|
}
|
|
ID3D12Resource* scratch_index_buffer = nullptr;
|
|
if (conversion_result == PrimitiveConverter::ConversionResult::kConverted) {
|
|
index_buffer_view.SizeInBytes = converted_index_count * index_size;
|
|
index_count = converted_index_count;
|
|
} else {
|
|
uint32_t index_buffer_size = index_buffer_info->count * index_size;
|
|
if (!shared_memory_->RequestRange(index_base, index_buffer_size)) {
|
|
XELOGE(
|
|
"Failed to request index buffer at 0x{:08X} (size {}) in the "
|
|
"shared memory",
|
|
index_base, index_buffer_size);
|
|
return false;
|
|
}
|
|
if (memexport_used) {
|
|
// If the shared memory is a UAV, it can't be used as an index buffer
|
|
// (UAV is a read/write state, index buffer is a read-only state). Need
|
|
// to copy the indices to a buffer in the index buffer state.
|
|
scratch_index_buffer = RequestScratchGPUBuffer(
|
|
index_buffer_size, D3D12_RESOURCE_STATE_COPY_DEST);
|
|
if (scratch_index_buffer == nullptr) {
|
|
return false;
|
|
}
|
|
shared_memory_->UseAsCopySource();
|
|
SubmitBarriers();
|
|
deferred_command_list_->D3DCopyBufferRegion(
|
|
scratch_index_buffer, 0, shared_memory_->GetBuffer(), index_base,
|
|
index_buffer_size);
|
|
PushTransitionBarrier(scratch_index_buffer,
|
|
D3D12_RESOURCE_STATE_COPY_DEST,
|
|
D3D12_RESOURCE_STATE_INDEX_BUFFER);
|
|
index_buffer_view.BufferLocation =
|
|
scratch_index_buffer->GetGPUVirtualAddress();
|
|
} else {
|
|
index_buffer_view.BufferLocation =
|
|
shared_memory_->GetGPUAddress() + index_base;
|
|
}
|
|
index_buffer_view.SizeInBytes = index_buffer_size;
|
|
}
|
|
if (memexport_used) {
|
|
shared_memory_->UseForWriting();
|
|
} else {
|
|
shared_memory_->UseForReading();
|
|
}
|
|
SubmitBarriers();
|
|
deferred_command_list_->D3DIASetIndexBuffer(&index_buffer_view);
|
|
deferred_command_list_->D3DDrawIndexedInstanced(index_count, 1, 0, 0, 0);
|
|
if (scratch_index_buffer != nullptr) {
|
|
ReleaseScratchGPUBuffer(scratch_index_buffer,
|
|
D3D12_RESOURCE_STATE_INDEX_BUFFER);
|
|
}
|
|
} else {
|
|
// Check if need to draw using a conversion index buffer.
|
|
uint32_t converted_index_count = 0;
|
|
D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address =
|
|
tessellated ? 0
|
|
: primitive_converter_->GetStaticIndexBuffer(
|
|
primitive_type, index_count, converted_index_count);
|
|
if (memexport_used) {
|
|
shared_memory_->UseForWriting();
|
|
} else {
|
|
shared_memory_->UseForReading();
|
|
}
|
|
SubmitBarriers();
|
|
if (conversion_gpu_address) {
|
|
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
|
index_buffer_view.BufferLocation = conversion_gpu_address;
|
|
index_buffer_view.SizeInBytes = converted_index_count * sizeof(uint16_t);
|
|
index_buffer_view.Format = DXGI_FORMAT_R16_UINT;
|
|
deferred_command_list_->D3DIASetIndexBuffer(&index_buffer_view);
|
|
deferred_command_list_->D3DDrawIndexedInstanced(converted_index_count, 1,
|
|
0, 0, 0);
|
|
} else {
|
|
deferred_command_list_->D3DDrawInstanced(index_count, 1, 0, 0);
|
|
}
|
|
}
|
|
|
|
if (memexport_used) {
|
|
// Commit shared memory writing.
|
|
PushUAVBarrier(shared_memory_->GetBuffer());
|
|
// Invalidate textures in memexported memory and watch for changes.
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
shared_memory_->RangeWrittenByGPU(
|
|
memexport_range.base_address_dwords << 2,
|
|
memexport_range.size_dwords << 2);
|
|
}
|
|
if (cvars::d3d12_readback_memexport) {
|
|
// Read the exported data on the CPU.
|
|
uint32_t memexport_total_size = 0;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
memexport_total_size += memexport_ranges[i].size_dwords << 2;
|
|
}
|
|
if (memexport_total_size != 0) {
|
|
ID3D12Resource* readback_buffer =
|
|
RequestReadbackBuffer(memexport_total_size);
|
|
if (readback_buffer != nullptr) {
|
|
shared_memory_->UseAsCopySource();
|
|
SubmitBarriers();
|
|
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
|
|
uint32_t readback_buffer_offset = 0;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
uint32_t memexport_range_size = memexport_range.size_dwords << 2;
|
|
deferred_command_list_->D3DCopyBufferRegion(
|
|
readback_buffer, readback_buffer_offset, shared_memory_buffer,
|
|
memexport_range.base_address_dwords << 2, memexport_range_size);
|
|
readback_buffer_offset += memexport_range_size;
|
|
}
|
|
AwaitAllSubmissionsCompletion();
|
|
D3D12_RANGE readback_range;
|
|
readback_range.Begin = 0;
|
|
readback_range.End = memexport_total_size;
|
|
void* readback_mapping;
|
|
if (SUCCEEDED(readback_buffer->Map(0, &readback_range,
|
|
&readback_mapping))) {
|
|
const uint32_t* readback_dwords =
|
|
reinterpret_cast<const uint32_t*>(readback_mapping);
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
std::memcpy(memory_->TranslatePhysical(
|
|
memexport_range.base_address_dwords << 2),
|
|
readback_dwords, memexport_range.size_dwords << 2);
|
|
readback_dwords += memexport_range.size_dwords;
|
|
}
|
|
D3D12_RANGE readback_write_range = {};
|
|
readback_buffer->Unmap(0, &readback_write_range);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::InitializeTrace() {
|
|
BeginSubmission(false);
|
|
bool render_target_cache_submitted =
|
|
render_target_cache_->InitializeTraceSubmitDownloads();
|
|
bool shared_memory_submitted =
|
|
shared_memory_->InitializeTraceSubmitDownloads();
|
|
if (!render_target_cache_submitted && !shared_memory_submitted) {
|
|
return;
|
|
}
|
|
if (!EndSubmission(false)) {
|
|
return;
|
|
}
|
|
AwaitAllSubmissionsCompletion();
|
|
if (render_target_cache_submitted) {
|
|
render_target_cache_->InitializeTraceCompleteDownloads();
|
|
}
|
|
if (shared_memory_submitted) {
|
|
shared_memory_->InitializeTraceCompleteDownloads();
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::FinalizeTrace() {}
|
|
|
|
bool D3D12CommandProcessor::IssueCopy() {
|
|
#if FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
|
BeginSubmission(true);
|
|
uint32_t written_address, written_length;
|
|
if (!render_target_cache_->Resolve(shared_memory_.get(), texture_cache_.get(),
|
|
memory_, written_address,
|
|
written_length)) {
|
|
return false;
|
|
}
|
|
if (cvars::d3d12_readback_resolve && !texture_cache_->IsResolutionScale2X() &&
|
|
written_length) {
|
|
// Read the resolved data on the CPU.
|
|
ID3D12Resource* readback_buffer = RequestReadbackBuffer(written_length);
|
|
if (readback_buffer != nullptr) {
|
|
shared_memory_->UseAsCopySource();
|
|
SubmitBarriers();
|
|
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
|
|
deferred_command_list_->D3DCopyBufferRegion(
|
|
readback_buffer, 0, shared_memory_buffer, written_address,
|
|
written_length);
|
|
if (EndSubmission(false)) {
|
|
AwaitAllSubmissionsCompletion();
|
|
D3D12_RANGE readback_range;
|
|
readback_range.Begin = 0;
|
|
readback_range.End = written_length;
|
|
void* readback_mapping;
|
|
if (SUCCEEDED(
|
|
readback_buffer->Map(0, &readback_range, &readback_mapping))) {
|
|
std::memcpy(memory_->TranslatePhysical(written_address),
|
|
readback_mapping, written_length);
|
|
D3D12_RANGE readback_write_range = {};
|
|
readback_buffer->Unmap(0, &readback_write_range);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
|
|
assert_true(await_submission <= submission_current_);
|
|
if (await_submission == submission_current_) {
|
|
assert_true(submission_open_);
|
|
EndSubmission(false);
|
|
}
|
|
|
|
uint64_t submission_completed_before = submission_completed_;
|
|
submission_completed_ = submission_fence_->GetCompletedValue();
|
|
if (submission_completed_ < await_submission) {
|
|
submission_fence_->SetEventOnCompletion(await_submission,
|
|
submission_fence_completion_event_);
|
|
WaitForSingleObject(submission_fence_completion_event_, INFINITE);
|
|
submission_completed_ = submission_fence_->GetCompletedValue();
|
|
}
|
|
if (submission_completed_ <= submission_completed_before) {
|
|
// Not updated - no need to reclaim or download things.
|
|
return;
|
|
}
|
|
|
|
// Reclaim command allocators.
|
|
while (command_allocator_submitted_first_) {
|
|
if (command_allocator_submitted_first_->last_usage_submission >
|
|
submission_completed_) {
|
|
break;
|
|
}
|
|
if (command_allocator_writable_last_) {
|
|
command_allocator_writable_last_->next =
|
|
command_allocator_submitted_first_;
|
|
} else {
|
|
command_allocator_writable_first_ = command_allocator_submitted_first_;
|
|
}
|
|
command_allocator_writable_last_ = command_allocator_submitted_first_;
|
|
command_allocator_submitted_first_ =
|
|
command_allocator_submitted_first_->next;
|
|
command_allocator_writable_last_->next = nullptr;
|
|
}
|
|
if (!command_allocator_submitted_first_) {
|
|
command_allocator_submitted_last_ = nullptr;
|
|
}
|
|
|
|
// Release single-use bindless descriptors.
|
|
while (!view_bindless_one_use_descriptors_.empty()) {
|
|
if (view_bindless_one_use_descriptors_.front().second >
|
|
submission_completed_) {
|
|
break;
|
|
}
|
|
ReleaseViewBindlessDescriptorImmediately(
|
|
view_bindless_one_use_descriptors_.front().first);
|
|
view_bindless_one_use_descriptors_.pop_front();
|
|
}
|
|
|
|
// Delete transient buffers marked for deletion.
|
|
while (!buffers_for_deletion_.empty()) {
|
|
if (buffers_for_deletion_.front().second > submission_completed_) {
|
|
break;
|
|
}
|
|
buffers_for_deletion_.front().first->Release();
|
|
buffers_for_deletion_.pop_front();
|
|
}
|
|
|
|
shared_memory_->CompletedSubmissionUpdated();
|
|
|
|
render_target_cache_->CompletedSubmissionUpdated();
|
|
|
|
primitive_converter_->CompletedSubmissionUpdated();
|
|
}
|
|
|
|
void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
|
#if FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
|
|
|
bool is_opening_frame = is_guest_command && !frame_open_;
|
|
if (submission_open_ && !is_opening_frame) {
|
|
return;
|
|
}
|
|
|
|
// Check the fence - needed for all kinds of submissions (to reclaim transient
|
|
// resources early) and specifically for frames (not to queue too many), and
|
|
// await the availability of the current frame.
|
|
CheckSubmissionFence(
|
|
is_opening_frame
|
|
? closed_frame_submissions_[frame_current_ % kQueueFrames]
|
|
: 0);
|
|
if (is_opening_frame) {
|
|
// Update the completed frame index, also obtaining the actual completed
|
|
// frame number (since the CPU may be actually less than 3 frames behind)
|
|
// before reclaiming resources tracked with the frame number.
|
|
frame_completed_ =
|
|
std::max(frame_current_, uint64_t(kQueueFrames)) - kQueueFrames;
|
|
for (uint64_t frame = frame_completed_ + 1; frame < frame_current_;
|
|
++frame) {
|
|
if (closed_frame_submissions_[frame % kQueueFrames] >
|
|
submission_completed_) {
|
|
break;
|
|
}
|
|
frame_completed_ = frame;
|
|
}
|
|
}
|
|
|
|
if (!submission_open_) {
|
|
submission_open_ = true;
|
|
|
|
// Start a new deferred command list - will submit it to the real one in the
|
|
// end of the submission (when async pipeline state object creation requests
|
|
// are fulfilled).
|
|
deferred_command_list_->Reset();
|
|
|
|
// Reset cached state of the command list.
|
|
ff_viewport_update_needed_ = true;
|
|
ff_scissor_update_needed_ = true;
|
|
ff_blend_factor_update_needed_ = true;
|
|
ff_stencil_ref_update_needed_ = true;
|
|
current_sample_positions_ = MsaaSamples::k1X;
|
|
current_cached_pipeline_ = nullptr;
|
|
current_external_pipeline_ = nullptr;
|
|
current_graphics_root_signature_ = nullptr;
|
|
current_graphics_root_up_to_date_ = 0;
|
|
if (bindless_resources_used_) {
|
|
deferred_command_list_->SetDescriptorHeaps(
|
|
view_bindless_heap_, sampler_bindless_heap_current_);
|
|
} else {
|
|
view_bindful_heap_current_ = nullptr;
|
|
sampler_bindful_heap_current_ = nullptr;
|
|
}
|
|
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
|
|
|
render_target_cache_->BeginSubmission();
|
|
|
|
primitive_converter_->BeginSubmission();
|
|
}
|
|
|
|
if (is_opening_frame) {
|
|
frame_open_ = true;
|
|
|
|
// Reset bindings that depend on the data stored in the pools.
|
|
std::memset(current_float_constant_map_vertex_, 0,
|
|
sizeof(current_float_constant_map_vertex_));
|
|
std::memset(current_float_constant_map_pixel_, 0,
|
|
sizeof(current_float_constant_map_pixel_));
|
|
cbuffer_binding_system_.up_to_date = false;
|
|
cbuffer_binding_float_vertex_.up_to_date = false;
|
|
cbuffer_binding_float_pixel_.up_to_date = false;
|
|
cbuffer_binding_bool_loop_.up_to_date = false;
|
|
cbuffer_binding_fetch_.up_to_date = false;
|
|
if (bindless_resources_used_) {
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
} else {
|
|
draw_view_bindful_heap_index_ =
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid;
|
|
draw_sampler_bindful_heap_index_ =
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid;
|
|
bindful_textures_written_vertex_ = false;
|
|
bindful_textures_written_pixel_ = false;
|
|
bindful_samplers_written_vertex_ = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
|
|
// Reclaim pool pages - no need to do this every small submission since some
|
|
// may be reused.
|
|
constant_buffer_pool_->Reclaim(frame_completed_);
|
|
if (!bindless_resources_used_) {
|
|
view_bindful_heap_pool_->Reclaim(frame_completed_);
|
|
sampler_bindful_heap_pool_->Reclaim(frame_completed_);
|
|
}
|
|
|
|
pix_capturing_ =
|
|
pix_capture_requested_.exchange(false, std::memory_order_relaxed);
|
|
if (pix_capturing_) {
|
|
IDXGraphicsAnalysis* graphics_analysis =
|
|
GetD3D12Context()->GetD3D12Provider()->GetGraphicsAnalysis();
|
|
if (graphics_analysis != nullptr) {
|
|
graphics_analysis->BeginCapture();
|
|
}
|
|
}
|
|
|
|
texture_cache_->BeginFrame();
|
|
|
|
primitive_converter_->BeginFrame();
|
|
}
|
|
}
|
|
|
|
bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
|
|
auto provider = GetD3D12Context()->GetD3D12Provider();
|
|
|
|
// Make sure there is a command allocator to write commands to.
|
|
if (submission_open_ && !command_allocator_writable_first_) {
|
|
ID3D12CommandAllocator* command_allocator;
|
|
if (FAILED(provider->GetDevice()->CreateCommandAllocator(
|
|
D3D12_COMMAND_LIST_TYPE_DIRECT,
|
|
IID_PPV_ARGS(&command_allocator)))) {
|
|
XELOGE("Failed to create a command allocator");
|
|
// Try to submit later. Completely dropping the submission is not
|
|
// permitted because resources would be left in an undefined state.
|
|
return false;
|
|
}
|
|
command_allocator_writable_first_ = new CommandAllocator;
|
|
command_allocator_writable_first_->command_allocator = command_allocator;
|
|
command_allocator_writable_first_->last_usage_submission = 0;
|
|
command_allocator_writable_first_->next = nullptr;
|
|
command_allocator_writable_last_ = command_allocator_writable_first_;
|
|
}
|
|
|
|
bool is_closing_frame = is_swap && frame_open_;
|
|
|
|
if (is_closing_frame) {
|
|
texture_cache_->EndFrame();
|
|
}
|
|
|
|
if (submission_open_) {
|
|
assert_false(scratch_buffer_used_);
|
|
|
|
pipeline_cache_->EndSubmission();
|
|
|
|
// Submit barriers now because resources with the queued barriers may be
|
|
// destroyed between frames.
|
|
SubmitBarriers();
|
|
|
|
auto direct_queue = provider->GetDirectQueue();
|
|
|
|
// Submit the command list.
|
|
ID3D12CommandAllocator* command_allocator =
|
|
command_allocator_writable_first_->command_allocator;
|
|
command_allocator->Reset();
|
|
command_list_->Reset(command_allocator, nullptr);
|
|
deferred_command_list_->Execute(command_list_, command_list_1_);
|
|
command_list_->Close();
|
|
ID3D12CommandList* execute_command_lists[] = {command_list_};
|
|
direct_queue->ExecuteCommandLists(1, execute_command_lists);
|
|
command_allocator_writable_first_->last_usage_submission =
|
|
submission_current_;
|
|
if (command_allocator_submitted_last_) {
|
|
command_allocator_submitted_last_->next =
|
|
command_allocator_writable_first_;
|
|
} else {
|
|
command_allocator_submitted_first_ = command_allocator_writable_first_;
|
|
}
|
|
command_allocator_submitted_last_ = command_allocator_writable_first_;
|
|
command_allocator_writable_first_ = command_allocator_writable_first_->next;
|
|
command_allocator_submitted_last_->next = nullptr;
|
|
if (!command_allocator_writable_first_) {
|
|
command_allocator_writable_last_ = nullptr;
|
|
}
|
|
|
|
direct_queue->Signal(submission_fence_, submission_current_++);
|
|
|
|
submission_open_ = false;
|
|
}
|
|
|
|
if (is_closing_frame) {
|
|
// Close the capture after submitting.
|
|
if (pix_capturing_) {
|
|
IDXGraphicsAnalysis* graphics_analysis = provider->GetGraphicsAnalysis();
|
|
if (graphics_analysis != nullptr) {
|
|
graphics_analysis->EndCapture();
|
|
}
|
|
pix_capturing_ = false;
|
|
}
|
|
frame_open_ = false;
|
|
// Submission already closed now, so minus 1.
|
|
closed_frame_submissions_[(frame_current_++) % kQueueFrames] =
|
|
submission_current_ - 1;
|
|
|
|
if (cache_clear_requested_) {
|
|
cache_clear_requested_ = false;
|
|
AwaitAllSubmissionsCompletion();
|
|
|
|
ClearCommandAllocatorCache();
|
|
|
|
ui::d3d12::util::ReleaseAndNull(scratch_buffer_);
|
|
scratch_buffer_size_ = 0;
|
|
|
|
if (bindless_resources_used_) {
|
|
texture_cache_bindless_sampler_map_.clear();
|
|
for (const auto& sampler_bindless_heap_overflowed :
|
|
sampler_bindless_heaps_overflowed_) {
|
|
sampler_bindless_heap_overflowed.first->Release();
|
|
}
|
|
sampler_bindless_heaps_overflowed_.clear();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
} else {
|
|
sampler_bindful_heap_pool_->ClearCache();
|
|
view_bindful_heap_pool_->ClearCache();
|
|
}
|
|
constant_buffer_pool_->ClearCache();
|
|
|
|
primitive_converter_->ClearCache();
|
|
|
|
pipeline_cache_->ClearCache();
|
|
|
|
render_target_cache_->ClearCache();
|
|
|
|
texture_cache_->ClearCache();
|
|
|
|
for (auto it : root_signatures_bindful_) {
|
|
it.second->Release();
|
|
}
|
|
root_signatures_bindful_.clear();
|
|
|
|
shared_memory_->ClearCache();
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
|
|
return !submission_open_ || !pipeline_cache_->IsCreatingPipelineStates();
|
|
}
|
|
|
|
void D3D12CommandProcessor::AwaitAllSubmissionsCompletion() {
|
|
// May be called if shutting down without everything set up.
|
|
if ((submission_completed_ + 1) >= submission_current_ ||
|
|
!submission_fence_ || GetD3D12Context()->WasLost()) {
|
|
return;
|
|
}
|
|
submission_fence_->SetEventOnCompletion(submission_current_ - 1,
|
|
submission_fence_completion_event_);
|
|
WaitForSingleObject(submission_fence_completion_event_, INFINITE);
|
|
submission_completed_ = submission_current_ - 1;
|
|
}
|
|
|
|
void D3D12CommandProcessor::ClearCommandAllocatorCache() {
|
|
while (command_allocator_submitted_first_) {
|
|
auto next = command_allocator_submitted_first_->next;
|
|
command_allocator_submitted_first_->command_allocator->Release();
|
|
delete command_allocator_submitted_first_;
|
|
command_allocator_submitted_first_ = next;
|
|
}
|
|
command_allocator_submitted_last_ = nullptr;
|
|
while (command_allocator_writable_first_) {
|
|
auto next = command_allocator_writable_first_->next;
|
|
command_allocator_writable_first_->command_allocator->Release();
|
|
delete command_allocator_writable_first_;
|
|
command_allocator_writable_first_ = next;
|
|
}
|
|
command_allocator_writable_last_ = nullptr;
|
|
}
|
|
|
|
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|
auto& regs = *register_file_;
|
|
|
|
#if FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
|
|
|
// Window parameters.
|
|
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
|
// See r200UpdateWindow:
|
|
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
|
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
|
|
|
|
// Supersampling replacing multisampling due to difficulties of emulating
|
|
// EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also
|
|
// resolution scale.
|
|
uint32_t pixel_size_x, pixel_size_y;
|
|
if (edram_rov_used_) {
|
|
pixel_size_x = 1;
|
|
pixel_size_y = 1;
|
|
} else {
|
|
MsaaSamples msaa_samples = regs.Get<reg::RB_SURFACE_INFO>().msaa_samples;
|
|
pixel_size_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
|
pixel_size_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1;
|
|
}
|
|
if (texture_cache_->IsResolutionScale2X()) {
|
|
pixel_size_x *= 2;
|
|
pixel_size_y *= 2;
|
|
}
|
|
|
|
// Viewport.
|
|
// PA_CL_VTE_CNTL contains whether offsets and scales are enabled.
|
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
|
// In games, either all are enabled (for regular drawing) or none are (for
|
|
// rectangle lists usually).
|
|
//
|
|
// If scale/offset is enabled, the Xenos shader is writing (neglecting W
|
|
// division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1)
|
|
// box. If it's not, the position is in screen space. Since we can only use
|
|
// the NDC in PC APIs, we use a viewport of the largest possible size, and
|
|
// divide the position by it in translated shaders.
|
|
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
|
float viewport_scale_x =
|
|
pa_cl_vte_cntl.vport_x_scale_ena
|
|
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
|
|
: 4096.0f;
|
|
float viewport_scale_y =
|
|
pa_cl_vte_cntl.vport_y_scale_ena
|
|
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
|
|
: 4096.0f;
|
|
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
|
: 1.0f;
|
|
float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
|
: std::abs(viewport_scale_x);
|
|
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
|
: std::abs(viewport_scale_y);
|
|
float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
|
: 0.0f;
|
|
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
|
|
viewport_offset_x += float(pa_sc_window_offset.window_x_offset);
|
|
viewport_offset_y += float(pa_sc_window_offset.window_y_offset);
|
|
}
|
|
D3D12_VIEWPORT viewport;
|
|
viewport.TopLeftX =
|
|
(viewport_offset_x - viewport_scale_x) * float(pixel_size_x);
|
|
viewport.TopLeftY =
|
|
(viewport_offset_y - viewport_scale_y) * float(pixel_size_y);
|
|
viewport.Width = viewport_scale_x * 2.0f * float(pixel_size_x);
|
|
viewport.Height = viewport_scale_y * 2.0f * float(pixel_size_y);
|
|
viewport.MinDepth = viewport_offset_z;
|
|
viewport.MaxDepth = viewport_offset_z + viewport_scale_z;
|
|
if (viewport_scale_z < 0.0f) {
|
|
// MinDepth > MaxDepth doesn't work on Nvidia, emulating it in vertex
|
|
// shaders and when applying polygon offset.
|
|
std::swap(viewport.MinDepth, viewport.MaxDepth);
|
|
}
|
|
ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX;
|
|
ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY;
|
|
ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width;
|
|
ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height;
|
|
ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth;
|
|
ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth;
|
|
if (ff_viewport_update_needed_) {
|
|
ff_viewport_ = viewport;
|
|
deferred_command_list_->RSSetViewport(viewport);
|
|
ff_viewport_update_needed_ = false;
|
|
}
|
|
|
|
// Scissor.
|
|
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
|
|
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
|
|
D3D12_RECT scissor;
|
|
scissor.left = pa_sc_window_scissor_tl.tl_x;
|
|
scissor.top = pa_sc_window_scissor_tl.tl_y;
|
|
scissor.right = pa_sc_window_scissor_br.br_x;
|
|
scissor.bottom = pa_sc_window_scissor_br.br_y;
|
|
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
|
scissor.left =
|
|
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
|
|
scissor.top =
|
|
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
|
|
scissor.right =
|
|
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
|
|
scissor.bottom =
|
|
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
|
|
}
|
|
scissor.left *= pixel_size_x;
|
|
scissor.top *= pixel_size_y;
|
|
scissor.right *= pixel_size_x;
|
|
scissor.bottom *= pixel_size_y;
|
|
ff_scissor_update_needed_ |= ff_scissor_.left != scissor.left;
|
|
ff_scissor_update_needed_ |= ff_scissor_.top != scissor.top;
|
|
ff_scissor_update_needed_ |= ff_scissor_.right != scissor.right;
|
|
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom;
|
|
if (ff_scissor_update_needed_) {
|
|
ff_scissor_ = scissor;
|
|
deferred_command_list_->RSSetScissorRect(scissor);
|
|
ff_scissor_update_needed_ = false;
|
|
}
|
|
|
|
if (!edram_rov_used_) {
|
|
// Blend factor.
|
|
ff_blend_factor_update_needed_ |=
|
|
ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
|
ff_blend_factor_update_needed_ |=
|
|
ff_blend_factor_[1] != regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
|
ff_blend_factor_update_needed_ |=
|
|
ff_blend_factor_[2] != regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
|
ff_blend_factor_update_needed_ |=
|
|
ff_blend_factor_[3] != regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
|
if (ff_blend_factor_update_needed_) {
|
|
ff_blend_factor_[0] = regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
|
ff_blend_factor_[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
|
ff_blend_factor_[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
|
ff_blend_factor_[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
|
deferred_command_list_->D3DOMSetBlendFactor(ff_blend_factor_);
|
|
ff_blend_factor_update_needed_ = false;
|
|
}
|
|
|
|
// Stencil reference value. Per-face reference not supported by Direct3D 12,
|
|
// choose the back face one only if drawing only back faces.
|
|
Register stencil_ref_mask_reg;
|
|
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
|
if (primitive_two_faced &&
|
|
regs.Get<reg::RB_DEPTHCONTROL>().backface_enable &&
|
|
pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) {
|
|
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
|
|
} else {
|
|
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK;
|
|
}
|
|
uint32_t stencil_ref =
|
|
regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_reg).stencilref;
|
|
ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref;
|
|
if (ff_stencil_ref_update_needed_) {
|
|
ff_stencil_ref_ = stencil_ref;
|
|
deferred_command_list_->D3DOMSetStencilRef(stencil_ref);
|
|
ff_stencil_ref_update_needed_ = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|
bool shared_memory_is_uav, bool primitive_two_faced,
|
|
uint32_t line_loop_closing_index, Endian index_endian,
|
|
uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
|
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
|
auto& regs = *register_file_;
|
|
|
|
#if FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
|
|
|
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
|
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
|
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
|
auto pa_su_point_size = regs.Get<reg::PA_SU_POINT_SIZE>();
|
|
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
|
auto pa_su_vtx_cntl = regs.Get<reg::PA_SU_VTX_CNTL>();
|
|
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
|
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
|
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
|
auto rb_depthcontrol = regs.Get<reg::RB_DEPTHCONTROL>();
|
|
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
|
|
auto rb_stencilrefmask_bf =
|
|
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
|
|
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
|
auto sq_context_misc = regs.Get<reg::SQ_CONTEXT_MISC>();
|
|
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
|
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
|
|
|
// Get the color info register values for each render target, and also put
|
|
// some safety measures for the ROV path - disable fully aliased render
|
|
// targets. Also, for ROV, exclude components that don't exist in the format
|
|
// from the write mask.
|
|
reg::RB_COLOR_INFO color_infos[4];
|
|
float rt_clamp[4][4];
|
|
uint32_t rt_keep_masks[4][2];
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
|
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
|
color_infos[i] = color_info;
|
|
|
|
if (edram_rov_used_) {
|
|
// Get the mask for keeping previous color's components unmodified,
|
|
// or two UINT32_MAX if no colors actually existing in the RT are written.
|
|
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
|
|
color_info.color_format, (color_mask >> (i * 4)) & 0b1111,
|
|
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
|
|
rt_keep_masks[i][0], rt_keep_masks[i][1]);
|
|
|
|
// Disable the render target if it has the same EDRAM base as another one
|
|
// (with a smaller index - assume it's more important).
|
|
if (rt_keep_masks[i][0] == UINT32_MAX &&
|
|
rt_keep_masks[i][1] == UINT32_MAX) {
|
|
for (uint32_t j = 0; j < i; ++j) {
|
|
if (color_info.color_base == color_infos[j].color_base &&
|
|
(rt_keep_masks[j][0] != UINT32_MAX ||
|
|
rt_keep_masks[j][1] != UINT32_MAX)) {
|
|
rt_keep_masks[i][0] = UINT32_MAX;
|
|
rt_keep_masks[i][1] = UINT32_MAX;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Disable depth and stencil if it aliases a color render target (for
|
|
// instance, during the XBLA logo in Banjo-Kazooie, though depth writing is
|
|
// already disabled there).
|
|
bool depth_stencil_enabled =
|
|
rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
|
|
if (edram_rov_used_ && depth_stencil_enabled) {
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (rb_depth_info.depth_base == color_infos[i].color_base &&
|
|
(rt_keep_masks[i][0] != UINT32_MAX ||
|
|
rt_keep_masks[i][1] != UINT32_MAX)) {
|
|
depth_stencil_enabled = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get viewport Z scale - needed for flags and ROV output.
|
|
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
|
: 1.0f;
|
|
|
|
bool dirty = false;
|
|
|
|
// Flags.
|
|
uint32_t flags = 0;
|
|
// Whether shared memory is an SRV or a UAV. Because a resource can't be in a
|
|
// read-write (UAV) and a read-only (SRV, IBV) state at once, if any shader in
|
|
// the pipeline uses memexport, the shared memory buffer must be a UAV.
|
|
if (shared_memory_is_uav) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_SharedMemoryIsUAV;
|
|
}
|
|
// W0 division control.
|
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
|
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
|
// = false: multiply the X, Y coordinates by 1/W0.
|
|
// 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
|
// = false: multiply the Z coordinate by 1/W0.
|
|
// 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal
|
|
// to get 1/W0.
|
|
if (pa_cl_vte_cntl.vtx_xy_fmt) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW;
|
|
}
|
|
if (pa_cl_vte_cntl.vtx_z_fmt) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW;
|
|
}
|
|
if (pa_cl_vte_cntl.vtx_w0_fmt) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal;
|
|
}
|
|
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
|
if (!pa_cl_clip_cntl.clip_disable) {
|
|
flags |= (pa_cl_clip_cntl.value & 0b111111)
|
|
<< DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift;
|
|
}
|
|
// Reversed depth.
|
|
if (viewport_scale_z < 0.0f) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ReverseZ;
|
|
}
|
|
// Whether SV_IsFrontFace matters.
|
|
if (primitive_two_faced) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_PrimitiveTwoFaced;
|
|
}
|
|
// Primitive killing condition.
|
|
if (pa_cl_clip_cntl.vtx_kill_or) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_KillIfAnyVertexKilled;
|
|
}
|
|
// Alpha test.
|
|
if (rb_colorcontrol.alpha_test_enable) {
|
|
flags |= uint32_t(rb_colorcontrol.alpha_func)
|
|
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
|
|
} else {
|
|
flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess |
|
|
DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual |
|
|
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
|
|
}
|
|
// Alpha to coverage.
|
|
if (rb_colorcontrol.alpha_to_mask_enable) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage;
|
|
}
|
|
// Gamma writing.
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (color_infos[i].color_format ==
|
|
ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i;
|
|
}
|
|
}
|
|
if (edram_rov_used_ && depth_stencil_enabled) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil;
|
|
if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24;
|
|
}
|
|
if (rb_depthcontrol.z_enable) {
|
|
flags |= uint32_t(rb_depthcontrol.zfunc)
|
|
<< DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift;
|
|
if (rb_depthcontrol.z_write_enable) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite;
|
|
}
|
|
} else {
|
|
// In case stencil is used without depth testing - always pass, and
|
|
// don't modify the stored depth.
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess |
|
|
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual |
|
|
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater;
|
|
}
|
|
if (rb_depthcontrol.stencil_enable) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest;
|
|
}
|
|
if (early_z) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencilEarlyWrite;
|
|
}
|
|
}
|
|
dirty |= system_constants_.flags != flags;
|
|
system_constants_.flags = flags;
|
|
|
|
// Tessellation factor range, plus 1.0 according to the images in
|
|
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
|
|
float tessellation_factor_min =
|
|
regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f;
|
|
float tessellation_factor_max =
|
|
regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f;
|
|
dirty |= system_constants_.tessellation_factor_range_min !=
|
|
tessellation_factor_min;
|
|
system_constants_.tessellation_factor_range_min = tessellation_factor_min;
|
|
dirty |= system_constants_.tessellation_factor_range_max !=
|
|
tessellation_factor_max;
|
|
system_constants_.tessellation_factor_range_max = tessellation_factor_max;
|
|
|
|
// Line loop closing index (or 0 when drawing other primitives or using an
|
|
// index buffer).
|
|
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
|
|
system_constants_.line_loop_closing_index = line_loop_closing_index;
|
|
|
|
// Vertex index offset.
|
|
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
|
system_constants_.vertex_base_index = vgt_indx_offset;
|
|
|
|
// Index or tessellation edge factor buffer endianness.
|
|
dirty |= system_constants_.vertex_index_endian != index_endian;
|
|
system_constants_.vertex_index_endian = index_endian;
|
|
|
|
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
|
if (!pa_cl_clip_cntl.clip_disable) {
|
|
for (uint32_t i = 0; i < 6; ++i) {
|
|
if (!(pa_cl_clip_cntl.value & (1 << i))) {
|
|
continue;
|
|
}
|
|
const float* ucp = ®s[XE_GPU_REG_PA_CL_UCP_0_X + i * 4].f32;
|
|
if (std::memcmp(system_constants_.user_clip_planes[i], ucp,
|
|
4 * sizeof(float))) {
|
|
dirty = true;
|
|
std::memcpy(system_constants_.user_clip_planes[i], ucp,
|
|
4 * sizeof(float));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Conversion to Direct3D 12 normalized device coordinates.
|
|
// See viewport configuration in UpdateFixedFunctionState for explanations.
|
|
// X and Y scale/offset is to convert unnormalized coordinates generated by
|
|
// shaders (for rectangle list drawing, for instance) to the 8192x8192
|
|
// viewport (the maximum render target size) that is used to emulate
|
|
// unnormalized coordinates. Z scale/offset is to convert from OpenGL NDC to
|
|
// Direct3D NDC if needed. Also apply half-pixel offset to reproduce Direct3D
|
|
// 9 rasterization rules - must be done before clipping, not through the
|
|
// viewport, for SSAA and resolution scale to work correctly.
|
|
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
|
|
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
|
|
// Kill all primitives if multipass or both faces are culled, but still need
|
|
// to do memexport.
|
|
if (sq_program_cntl.vs_export_mode ==
|
|
xenos::VertexShaderExportMode::kMultipass ||
|
|
(primitive_two_faced && pa_su_sc_mode_cntl.cull_front &&
|
|
pa_su_sc_mode_cntl.cull_back)) {
|
|
dirty |= !std::isnan(system_constants_.ndc_scale[0]);
|
|
dirty |= !std::isnan(system_constants_.ndc_scale[1]);
|
|
dirty |= !std::isnan(system_constants_.ndc_scale[2]);
|
|
dirty |= !std::isnan(system_constants_.ndc_offset[0]);
|
|
dirty |= !std::isnan(system_constants_.ndc_offset[1]);
|
|
dirty |= !std::isnan(system_constants_.ndc_offset[2]);
|
|
float nan_value = std::nanf("");
|
|
system_constants_.ndc_scale[0] = nan_value;
|
|
system_constants_.ndc_scale[1] = nan_value;
|
|
system_constants_.ndc_scale[2] = nan_value;
|
|
system_constants_.ndc_offset[0] = nan_value;
|
|
system_constants_.ndc_offset[1] = nan_value;
|
|
system_constants_.ndc_offset[2] = nan_value;
|
|
} else {
|
|
// When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to
|
|
// be written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF
|
|
// isn't set in this case in draws in games.
|
|
bool gl_clip_space_def =
|
|
!pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena;
|
|
float ndc_scale_x = pa_cl_vte_cntl.vport_x_scale_ena
|
|
? (viewport_scale_x >= 0.0f ? 1.0f : -1.0f)
|
|
: (1.0f / 4096.0f);
|
|
float ndc_scale_y = pa_cl_vte_cntl.vport_y_scale_ena
|
|
? (viewport_scale_y >= 0.0f ? -1.0f : 1.0f)
|
|
: (-1.0f / 4096.0f);
|
|
float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f;
|
|
float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f;
|
|
float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f;
|
|
float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f;
|
|
if (cvars::d3d12_half_pixel_offset && !pa_su_vtx_cntl.pix_center) {
|
|
// Signs are hopefully correct here, tested in GTA IV on both clearing
|
|
// (without a viewport) and drawing things near the edges of the screen.
|
|
if (pa_cl_vte_cntl.vport_x_scale_ena) {
|
|
if (viewport_scale_x != 0.0f) {
|
|
ndc_offset_x += 0.5f / viewport_scale_x;
|
|
}
|
|
} else {
|
|
ndc_offset_x += 1.0f / 8192.0f;
|
|
}
|
|
if (pa_cl_vte_cntl.vport_y_scale_ena) {
|
|
if (viewport_scale_y != 0.0f) {
|
|
ndc_offset_y += 0.5f / viewport_scale_y;
|
|
}
|
|
} else {
|
|
ndc_offset_y -= 1.0f / 8192.0f;
|
|
}
|
|
}
|
|
dirty |= system_constants_.ndc_scale[0] != ndc_scale_x;
|
|
dirty |= system_constants_.ndc_scale[1] != ndc_scale_y;
|
|
dirty |= system_constants_.ndc_scale[2] != ndc_scale_z;
|
|
dirty |= system_constants_.ndc_offset[0] != ndc_offset_x;
|
|
dirty |= system_constants_.ndc_offset[1] != ndc_offset_y;
|
|
dirty |= system_constants_.ndc_offset[2] != ndc_offset_z;
|
|
system_constants_.ndc_scale[0] = ndc_scale_x;
|
|
system_constants_.ndc_scale[1] = ndc_scale_y;
|
|
system_constants_.ndc_scale[2] = ndc_scale_z;
|
|
system_constants_.ndc_offset[0] = ndc_offset_x;
|
|
system_constants_.ndc_offset[1] = ndc_offset_y;
|
|
system_constants_.ndc_offset[2] = ndc_offset_z;
|
|
}
|
|
|
|
// Point size.
|
|
float point_size_x = float(pa_su_point_size.width) * 0.125f;
|
|
float point_size_y = float(pa_su_point_size.height) * 0.125f;
|
|
float point_size_min = float(pa_su_point_minmax.min_size) * 0.125f;
|
|
float point_size_max = float(pa_su_point_minmax.max_size) * 0.125f;
|
|
dirty |= system_constants_.point_size[0] != point_size_x;
|
|
dirty |= system_constants_.point_size[1] != point_size_y;
|
|
dirty |= system_constants_.point_size_min_max[0] != point_size_min;
|
|
dirty |= system_constants_.point_size_min_max[1] != point_size_max;
|
|
system_constants_.point_size[0] = point_size_x;
|
|
system_constants_.point_size[1] = point_size_y;
|
|
system_constants_.point_size_min_max[0] = point_size_min;
|
|
system_constants_.point_size_min_max[1] = point_size_max;
|
|
float point_screen_to_ndc_x, point_screen_to_ndc_y;
|
|
if (pa_cl_vte_cntl.vport_x_scale_ena) {
|
|
point_screen_to_ndc_x =
|
|
(viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f;
|
|
} else {
|
|
point_screen_to_ndc_x = 1.0f / 8192.0f;
|
|
}
|
|
if (pa_cl_vte_cntl.vport_y_scale_ena) {
|
|
point_screen_to_ndc_y =
|
|
(viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f;
|
|
} else {
|
|
point_screen_to_ndc_y = -1.0f / 8192.0f;
|
|
}
|
|
dirty |= system_constants_.point_screen_to_ndc[0] != point_screen_to_ndc_x;
|
|
dirty |= system_constants_.point_screen_to_ndc[1] != point_screen_to_ndc_y;
|
|
system_constants_.point_screen_to_ndc[0] = point_screen_to_ndc_x;
|
|
system_constants_.point_screen_to_ndc[1] = point_screen_to_ndc_y;
|
|
|
|
// Pixel parameter register.
|
|
uint32_t ps_param_gen =
|
|
sq_program_cntl.param_gen ? sq_context_misc.param_gen_pos : UINT_MAX;
|
|
dirty |= system_constants_.ps_param_gen != ps_param_gen;
|
|
system_constants_.ps_param_gen = ps_param_gen;
|
|
|
|
// Texture signedness.
|
|
uint32_t textures_remaining = used_texture_mask;
|
|
uint32_t texture_index;
|
|
while (xe::bit_scan_forward(textures_remaining, &texture_index)) {
|
|
textures_remaining &= ~(uint32_t(1) << texture_index);
|
|
uint32_t& texture_signs_uint =
|
|
system_constants_.texture_swizzled_signs[texture_index >> 2];
|
|
uint32_t texture_signs_shift = (texture_index & 3) * 8;
|
|
uint32_t texture_signs_shifted =
|
|
uint32_t(texture_cache_->GetActiveTextureSwizzledSigns(texture_index))
|
|
<< texture_signs_shift;
|
|
uint32_t texture_signs_mask = uint32_t(0b11111111) << texture_signs_shift;
|
|
dirty |= (texture_signs_uint & texture_signs_mask) != texture_signs_shifted;
|
|
texture_signs_uint =
|
|
(texture_signs_uint & ~texture_signs_mask) | texture_signs_shifted;
|
|
}
|
|
|
|
// Log2 of sample count, for scaling VPOS with SSAA (without ROV) and for
|
|
// EDRAM address calculation with MSAA (with ROV).
|
|
uint32_t sample_count_log2_x =
|
|
rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 1 : 0;
|
|
uint32_t sample_count_log2_y =
|
|
rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 1 : 0;
|
|
dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x;
|
|
dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y;
|
|
system_constants_.sample_count_log2[0] = sample_count_log2_x;
|
|
system_constants_.sample_count_log2[1] = sample_count_log2_y;
|
|
|
|
// Alpha test.
|
|
dirty |= system_constants_.alpha_test_reference != rb_alpha_ref;
|
|
system_constants_.alpha_test_reference = rb_alpha_ref;
|
|
|
|
// EDRAM pitch for ROV writing.
|
|
if (edram_rov_used_) {
|
|
uint32_t edram_pitch_tiles =
|
|
((rb_surface_info.surface_pitch *
|
|
(rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
|
|
79) /
|
|
80;
|
|
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
|
|
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
|
|
}
|
|
|
|
// Color exponent bias and output index mapping or ROV render target writing.
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
reg::RB_COLOR_INFO color_info = color_infos[i];
|
|
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
|
|
int32_t color_exp_bias = color_info.color_exp_bias;
|
|
if (color_info.color_format == ColorRenderTargetFormat::k_16_16 ||
|
|
color_info.color_format == ColorRenderTargetFormat::k_16_16_16_16) {
|
|
// On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have
|
|
// -32...32 range and expect shaders to give -32...32 values, but they're
|
|
// emulated using normalized RG16/RGBA16 when not using the ROV, so the
|
|
// value returned from the shader needs to be divided by 32 (blending will
|
|
// be incorrect in this case, but there's no other way without using ROV,
|
|
// though there's an option to limit the range to -1...1).
|
|
// http://www.students.science.uu.nl/~3220516/advancedgraphics/papers/inferred_lighting.pdf
|
|
if (!edram_rov_used_ && cvars::d3d12_16bit_rtv_full_range) {
|
|
color_exp_bias -= 5;
|
|
}
|
|
}
|
|
float color_exp_bias_scale;
|
|
*reinterpret_cast<int32_t*>(&color_exp_bias_scale) =
|
|
0x3F800000 + (color_exp_bias << 23);
|
|
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
|
|
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
|
|
if (edram_rov_used_) {
|
|
dirty |=
|
|
system_constants_.edram_rt_keep_mask[i][0] != rt_keep_masks[i][0];
|
|
system_constants_.edram_rt_keep_mask[i][0] = rt_keep_masks[i][0];
|
|
dirty |=
|
|
system_constants_.edram_rt_keep_mask[i][1] != rt_keep_masks[i][1];
|
|
system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1];
|
|
if (rt_keep_masks[i][0] != UINT32_MAX ||
|
|
rt_keep_masks[i][1] != UINT32_MAX) {
|
|
uint32_t rt_base_dwords_scaled = color_info.color_base * 1280;
|
|
if (texture_cache_->IsResolutionScale2X()) {
|
|
rt_base_dwords_scaled <<= 2;
|
|
}
|
|
dirty |= system_constants_.edram_rt_base_dwords_scaled[i] !=
|
|
rt_base_dwords_scaled;
|
|
system_constants_.edram_rt_base_dwords_scaled[i] =
|
|
rt_base_dwords_scaled;
|
|
uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags(
|
|
color_info.color_format);
|
|
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
|
|
system_constants_.edram_rt_format_flags[i] = format_flags;
|
|
// Can't do float comparisons here because NaNs would result in always
|
|
// setting the dirty flag.
|
|
dirty |= std::memcmp(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
|
4 * sizeof(float)) != 0;
|
|
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
|
4 * sizeof(float));
|
|
uint32_t blend_factors_ops =
|
|
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
|
|
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
|
|
blend_factors_ops;
|
|
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
|
|
}
|
|
} else {
|
|
dirty |= system_constants_.color_output_map[i] !=
|
|
render_targets[i].guest_render_target;
|
|
system_constants_.color_output_map[i] =
|
|
render_targets[i].guest_render_target;
|
|
}
|
|
}
|
|
|
|
// Resolution scale, depth/stencil testing and blend constant for ROV.
|
|
if (edram_rov_used_) {
|
|
uint32_t resolution_square_scale =
|
|
texture_cache_->IsResolutionScale2X() ? 4 : 1;
|
|
dirty |= system_constants_.edram_resolution_square_scale !=
|
|
resolution_square_scale;
|
|
system_constants_.edram_resolution_square_scale = resolution_square_scale;
|
|
|
|
uint32_t depth_base_dwords = rb_depth_info.depth_base * 1280;
|
|
dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords;
|
|
system_constants_.edram_depth_base_dwords = depth_base_dwords;
|
|
|
|
// The Z range is reversed in the vertex shader if it's reverse - use the
|
|
// absolute value of the scale.
|
|
float depth_range_scale = std::abs(viewport_scale_z);
|
|
dirty |= system_constants_.edram_depth_range_scale != depth_range_scale;
|
|
system_constants_.edram_depth_range_scale = depth_range_scale;
|
|
float depth_range_offset = pa_cl_vte_cntl.vport_z_offset_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
|
: 0.0f;
|
|
if (viewport_scale_z < 0.0f) {
|
|
// Similar to MinDepth in fixed-function viewport calculation.
|
|
depth_range_offset += viewport_scale_z;
|
|
}
|
|
dirty |= system_constants_.edram_depth_range_offset != depth_range_offset;
|
|
system_constants_.edram_depth_range_offset = depth_range_offset;
|
|
|
|
// For non-polygons, front polygon offset is used, and it's enabled if
|
|
// POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back
|
|
// are used.
|
|
float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f;
|
|
float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f;
|
|
if (primitive_two_faced) {
|
|
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
|
|
poly_offset_front_scale =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
|
poly_offset_front_offset =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
|
}
|
|
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
|
|
poly_offset_back_scale =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
|
poly_offset_back_offset =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
|
}
|
|
} else {
|
|
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
|
poly_offset_front_scale =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
|
poly_offset_front_offset =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
|
poly_offset_back_scale = poly_offset_front_scale;
|
|
poly_offset_back_offset = poly_offset_front_offset;
|
|
}
|
|
}
|
|
// "slope computed in subpixels (1/12 or 1/16)" - R5xx Acceleration. Also:
|
|
// https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L943
|
|
poly_offset_front_scale *= 1.0f / 16.0f;
|
|
poly_offset_back_scale *= 1.0f / 16.0f;
|
|
if (texture_cache_->IsResolutionScale2X()) {
|
|
poly_offset_front_scale *= 2.f;
|
|
poly_offset_back_scale *= 2.f;
|
|
}
|
|
dirty |= system_constants_.edram_poly_offset_front_scale !=
|
|
poly_offset_front_scale;
|
|
system_constants_.edram_poly_offset_front_scale = poly_offset_front_scale;
|
|
dirty |= system_constants_.edram_poly_offset_front_offset !=
|
|
poly_offset_front_offset;
|
|
system_constants_.edram_poly_offset_front_offset = poly_offset_front_offset;
|
|
dirty |= system_constants_.edram_poly_offset_back_scale !=
|
|
poly_offset_back_scale;
|
|
system_constants_.edram_poly_offset_back_scale = poly_offset_back_scale;
|
|
dirty |= system_constants_.edram_poly_offset_back_offset !=
|
|
poly_offset_back_offset;
|
|
system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset;
|
|
|
|
if (depth_stencil_enabled && rb_depthcontrol.stencil_enable) {
|
|
dirty |= system_constants_.edram_stencil_front_reference !=
|
|
rb_stencilrefmask.stencilref;
|
|
system_constants_.edram_stencil_front_reference =
|
|
rb_stencilrefmask.stencilref;
|
|
dirty |= system_constants_.edram_stencil_front_read_mask !=
|
|
rb_stencilrefmask.stencilmask;
|
|
system_constants_.edram_stencil_front_read_mask =
|
|
rb_stencilrefmask.stencilmask;
|
|
dirty |= system_constants_.edram_stencil_front_write_mask !=
|
|
rb_stencilrefmask.stencilwritemask;
|
|
system_constants_.edram_stencil_front_write_mask =
|
|
rb_stencilrefmask.stencilwritemask;
|
|
uint32_t stencil_func_ops =
|
|
(rb_depthcontrol.value >> 8) & ((1 << 12) - 1);
|
|
dirty |=
|
|
system_constants_.edram_stencil_front_func_ops != stencil_func_ops;
|
|
system_constants_.edram_stencil_front_func_ops = stencil_func_ops;
|
|
|
|
if (primitive_two_faced && rb_depthcontrol.backface_enable) {
|
|
dirty |= system_constants_.edram_stencil_back_reference !=
|
|
rb_stencilrefmask_bf.stencilref;
|
|
system_constants_.edram_stencil_back_reference =
|
|
rb_stencilrefmask_bf.stencilref;
|
|
dirty |= system_constants_.edram_stencil_back_read_mask !=
|
|
rb_stencilrefmask_bf.stencilmask;
|
|
system_constants_.edram_stencil_back_read_mask =
|
|
rb_stencilrefmask_bf.stencilmask;
|
|
dirty |= system_constants_.edram_stencil_back_write_mask !=
|
|
rb_stencilrefmask_bf.stencilwritemask;
|
|
system_constants_.edram_stencil_back_write_mask =
|
|
rb_stencilrefmask_bf.stencilwritemask;
|
|
uint32_t stencil_func_ops_bf =
|
|
(rb_depthcontrol.value >> 20) & ((1 << 12) - 1);
|
|
dirty |= system_constants_.edram_stencil_back_func_ops !=
|
|
stencil_func_ops_bf;
|
|
system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf;
|
|
} else {
|
|
dirty |= std::memcmp(system_constants_.edram_stencil_back,
|
|
system_constants_.edram_stencil_front,
|
|
4 * sizeof(uint32_t)) != 0;
|
|
std::memcpy(system_constants_.edram_stencil_back,
|
|
system_constants_.edram_stencil_front,
|
|
4 * sizeof(uint32_t));
|
|
}
|
|
}
|
|
|
|
dirty |= system_constants_.edram_blend_constant[0] !=
|
|
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
|
system_constants_.edram_blend_constant[0] =
|
|
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
|
dirty |= system_constants_.edram_blend_constant[1] !=
|
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
|
system_constants_.edram_blend_constant[1] =
|
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
|
dirty |= system_constants_.edram_blend_constant[2] !=
|
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
|
system_constants_.edram_blend_constant[2] =
|
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
|
dirty |= system_constants_.edram_blend_constant[3] !=
|
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
|
system_constants_.edram_blend_constant[3] =
|
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
|
}
|
|
|
|
cbuffer_binding_system_.up_to_date &= !dirty;
|
|
}
|
|
|
|
bool D3D12CommandProcessor::UpdateBindings(
|
|
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
|
|
ID3D12RootSignature* root_signature) {
|
|
auto provider = GetD3D12Context()->GetD3D12Provider();
|
|
auto device = provider->GetDevice();
|
|
auto& regs = *register_file_;
|
|
|
|
#if FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
|
|
|
// Set the new root signature.
|
|
if (current_graphics_root_signature_ != root_signature) {
|
|
current_graphics_root_signature_ = root_signature;
|
|
if (!bindless_resources_used_) {
|
|
GetRootBindfulExtraParameterIndices(
|
|
vertex_shader, pixel_shader, current_graphics_root_bindful_extras_);
|
|
}
|
|
// Changing the root signature invalidates all bindings.
|
|
current_graphics_root_up_to_date_ = 0;
|
|
deferred_command_list_->D3DSetGraphicsRootSignature(root_signature);
|
|
}
|
|
|
|
// Select the root parameter indices depending on the used binding model.
|
|
uint32_t root_parameter_fetch_constants =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_FetchConstants
|
|
: kRootParameter_Bindful_FetchConstants;
|
|
uint32_t root_parameter_float_constants_vertex =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsVertex
|
|
: kRootParameter_Bindful_FloatConstantsVertex;
|
|
uint32_t root_parameter_float_constants_pixel =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsPixel
|
|
: kRootParameter_Bindful_FloatConstantsPixel;
|
|
uint32_t root_parameter_system_constants =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_SystemConstants
|
|
: kRootParameter_Bindful_SystemConstants;
|
|
uint32_t root_parameter_bool_loop_constants =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_BoolLoopConstants
|
|
: kRootParameter_Bindful_BoolLoopConstants;
|
|
|
|
//
|
|
// Update root constant buffers that are common for bindful and bindless.
|
|
//
|
|
|
|
// These are the constant base addresses/ranges for shaders.
|
|
// We have these hardcoded right now cause nothing seems to differ on the Xbox
|
|
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
|
|
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
|
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
|
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
|
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
|
// Check if the float constant layout is still the same and get the counts.
|
|
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
|
vertex_shader->constant_register_map();
|
|
uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count;
|
|
// Even if the shader doesn't need any float constants, a valid binding must
|
|
// still be provided, so if the first draw in the frame with the current root
|
|
// signature doesn't have float constants at all, still allocate an empty
|
|
// buffer.
|
|
uint32_t float_constant_size_vertex = xe::align(
|
|
uint32_t(std::max(float_constant_count_vertex, 1u) * 4 * sizeof(float)),
|
|
256u);
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (current_float_constant_map_vertex_[i] !=
|
|
float_constant_map_vertex.float_bitmap[i]) {
|
|
current_float_constant_map_vertex_[i] =
|
|
float_constant_map_vertex.float_bitmap[i];
|
|
// If no float constants at all, we can reuse any buffer for them, so not
|
|
// invalidating.
|
|
if (float_constant_map_vertex.float_count != 0) {
|
|
cbuffer_binding_float_vertex_.up_to_date = false;
|
|
}
|
|
}
|
|
}
|
|
uint32_t float_constant_count_pixel = 0;
|
|
if (pixel_shader != nullptr) {
|
|
const Shader::ConstantRegisterMap& float_constant_map_pixel =
|
|
pixel_shader->constant_register_map();
|
|
float_constant_count_pixel = float_constant_map_pixel.float_count;
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (current_float_constant_map_pixel_[i] !=
|
|
float_constant_map_pixel.float_bitmap[i]) {
|
|
current_float_constant_map_pixel_[i] =
|
|
float_constant_map_pixel.float_bitmap[i];
|
|
if (float_constant_map_pixel.float_count != 0) {
|
|
cbuffer_binding_float_pixel_.up_to_date = false;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
std::memset(current_float_constant_map_pixel_, 0,
|
|
sizeof(current_float_constant_map_pixel_));
|
|
}
|
|
uint32_t float_constant_size_pixel = xe::align(
|
|
uint32_t(std::max(float_constant_count_pixel, 1u) * 4 * sizeof(float)),
|
|
256u);
|
|
|
|
// Write the constant buffer data.
|
|
if (!cbuffer_binding_system_.up_to_date) {
|
|
uint8_t* system_constants = constant_buffer_pool_->Request(
|
|
frame_current_, xe::align(uint32_t(sizeof(system_constants_)), 256u),
|
|
nullptr, nullptr, &cbuffer_binding_system_.address);
|
|
if (system_constants == nullptr) {
|
|
return false;
|
|
}
|
|
std::memcpy(system_constants, &system_constants_,
|
|
sizeof(system_constants_));
|
|
cbuffer_binding_system_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_system_constants);
|
|
}
|
|
if (!cbuffer_binding_float_vertex_.up_to_date) {
|
|
uint8_t* float_constants = constant_buffer_pool_->Request(
|
|
frame_current_, float_constant_size_vertex, nullptr, nullptr,
|
|
&cbuffer_binding_float_vertex_.address);
|
|
if (float_constants == nullptr) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
uint64_t float_constant_map_entry =
|
|
float_constant_map_vertex.float_bitmap[i];
|
|
uint32_t float_constant_index;
|
|
while (xe::bit_scan_forward(float_constant_map_entry,
|
|
&float_constant_index)) {
|
|
float_constant_map_entry &= ~(1ull << float_constant_index);
|
|
std::memcpy(float_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
|
(float_constant_index << 2)]
|
|
.f32,
|
|
4 * sizeof(float));
|
|
float_constants += 4 * sizeof(float);
|
|
}
|
|
}
|
|
cbuffer_binding_float_vertex_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_float_constants_vertex);
|
|
}
|
|
if (!cbuffer_binding_float_pixel_.up_to_date) {
|
|
uint8_t* float_constants = constant_buffer_pool_->Request(
|
|
frame_current_, float_constant_size_pixel, nullptr, nullptr,
|
|
&cbuffer_binding_float_pixel_.address);
|
|
if (float_constants == nullptr) {
|
|
return false;
|
|
}
|
|
if (pixel_shader != nullptr) {
|
|
const Shader::ConstantRegisterMap& float_constant_map_pixel =
|
|
pixel_shader->constant_register_map();
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
uint64_t float_constant_map_entry =
|
|
float_constant_map_pixel.float_bitmap[i];
|
|
uint32_t float_constant_index;
|
|
while (xe::bit_scan_forward(float_constant_map_entry,
|
|
&float_constant_index)) {
|
|
float_constant_map_entry &= ~(1ull << float_constant_index);
|
|
std::memcpy(float_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
|
(float_constant_index << 2)]
|
|
.f32,
|
|
4 * sizeof(float));
|
|
float_constants += 4 * sizeof(float);
|
|
}
|
|
}
|
|
}
|
|
cbuffer_binding_float_pixel_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_float_constants_pixel);
|
|
}
|
|
if (!cbuffer_binding_bool_loop_.up_to_date) {
|
|
uint8_t* bool_loop_constants =
|
|
constant_buffer_pool_->Request(frame_current_, 256, nullptr, nullptr,
|
|
&cbuffer_binding_bool_loop_.address);
|
|
if (bool_loop_constants == nullptr) {
|
|
return false;
|
|
}
|
|
std::memcpy(bool_loop_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
|
(8 + 32) * sizeof(uint32_t));
|
|
cbuffer_binding_bool_loop_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_bool_loop_constants);
|
|
}
|
|
if (!cbuffer_binding_fetch_.up_to_date) {
|
|
uint8_t* fetch_constants = constant_buffer_pool_->Request(
|
|
frame_current_, 768, nullptr, nullptr, &cbuffer_binding_fetch_.address);
|
|
if (fetch_constants == nullptr) {
|
|
return false;
|
|
}
|
|
std::memcpy(fetch_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
|
32 * 6 * sizeof(uint32_t));
|
|
cbuffer_binding_fetch_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_fetch_constants);
|
|
}
|
|
|
|
//
|
|
// Update descriptors.
|
|
//
|
|
|
|
// Get textures and samplers used by the vertex shader, check if the last used
|
|
// samplers are compatible and update them.
|
|
size_t texture_layout_uid_vertex =
|
|
vertex_shader->GetTextureBindingLayoutUserUID();
|
|
size_t sampler_layout_uid_vertex =
|
|
vertex_shader->GetSamplerBindingLayoutUserUID();
|
|
uint32_t texture_count_vertex, sampler_count_vertex;
|
|
const D3D12Shader::TextureBinding* textures_vertex =
|
|
vertex_shader->GetTextureBindings(texture_count_vertex);
|
|
const D3D12Shader::SamplerBinding* samplers_vertex =
|
|
vertex_shader->GetSamplerBindings(sampler_count_vertex);
|
|
if (sampler_count_vertex) {
|
|
if (current_sampler_layout_uid_vertex_ != sampler_layout_uid_vertex) {
|
|
current_sampler_layout_uid_vertex_ = sampler_layout_uid_vertex;
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
bindful_samplers_written_vertex_ = false;
|
|
}
|
|
current_samplers_vertex_.resize(std::max(current_samplers_vertex_.size(),
|
|
size_t(sampler_count_vertex)));
|
|
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
|
|
TextureCache::SamplerParameters parameters =
|
|
texture_cache_->GetSamplerParameters(samplers_vertex[i]);
|
|
if (current_samplers_vertex_[i] != parameters) {
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
bindful_samplers_written_vertex_ = false;
|
|
current_samplers_vertex_[i] = parameters;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get textures and samplers used by the pixel shader, check if the last used
|
|
// samplers are compatible and update them.
|
|
size_t texture_layout_uid_pixel, sampler_layout_uid_pixel;
|
|
uint32_t texture_count_pixel, sampler_count_pixel;
|
|
const D3D12Shader::TextureBinding* textures_pixel;
|
|
const D3D12Shader::SamplerBinding* samplers_pixel;
|
|
if (pixel_shader != nullptr) {
|
|
texture_layout_uid_pixel = pixel_shader->GetTextureBindingLayoutUserUID();
|
|
sampler_layout_uid_pixel = pixel_shader->GetSamplerBindingLayoutUserUID();
|
|
textures_pixel = pixel_shader->GetTextureBindings(texture_count_pixel);
|
|
samplers_pixel = pixel_shader->GetSamplerBindings(sampler_count_pixel);
|
|
if (sampler_count_pixel) {
|
|
if (current_sampler_layout_uid_pixel_ != sampler_layout_uid_pixel) {
|
|
current_sampler_layout_uid_pixel_ = sampler_layout_uid_pixel;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
current_samplers_pixel_.resize(std::max(current_samplers_pixel_.size(),
|
|
size_t(sampler_count_pixel)));
|
|
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
|
TextureCache::SamplerParameters parameters =
|
|
texture_cache_->GetSamplerParameters(samplers_pixel[i]);
|
|
if (current_samplers_pixel_[i] != parameters) {
|
|
current_samplers_pixel_[i] = parameters;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
texture_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty;
|
|
sampler_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty;
|
|
textures_pixel = nullptr;
|
|
texture_count_pixel = 0;
|
|
samplers_pixel = nullptr;
|
|
sampler_count_pixel = 0;
|
|
}
|
|
|
|
assert_true(sampler_count_vertex + sampler_count_pixel <= kSamplerHeapSize);
|
|
|
|
if (bindless_resources_used_) {
|
|
//
|
|
// Bindless descriptors path.
|
|
//
|
|
|
|
// Check if need to write new descriptor indices.
|
|
// Samplers have already been checked.
|
|
if (texture_count_vertex &&
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date &&
|
|
(current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
|
texture_count_vertex))) {
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
}
|
|
if (texture_count_pixel &&
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date &&
|
|
(current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
|
texture_count_pixel))) {
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
}
|
|
|
|
// Get sampler descriptor indices, write new samplers, and handle sampler
|
|
// heap overflow if it happens.
|
|
if ((sampler_count_vertex &&
|
|
!cbuffer_binding_descriptor_indices_vertex_.up_to_date) ||
|
|
(sampler_count_pixel &&
|
|
!cbuffer_binding_descriptor_indices_pixel_.up_to_date)) {
|
|
for (uint32_t i = 0; i < 2; ++i) {
|
|
if (i) {
|
|
// Overflow happened - invalidate sampler bindings because their
|
|
// descriptor indices can't be used anymore (and even if heap creation
|
|
// fails, because current_sampler_bindless_indices_#_ are in an
|
|
// undefined state now) and switch to a new sampler heap.
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
ID3D12DescriptorHeap* sampler_heap_new;
|
|
if (!sampler_bindless_heaps_overflowed_.empty() &&
|
|
sampler_bindless_heaps_overflowed_.front().second <=
|
|
submission_completed_) {
|
|
sampler_heap_new = sampler_bindless_heaps_overflowed_.front().first;
|
|
sampler_bindless_heaps_overflowed_.pop_front();
|
|
} else {
|
|
D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_new_desc;
|
|
sampler_heap_new_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
|
|
sampler_heap_new_desc.NumDescriptors = kSamplerHeapSize;
|
|
sampler_heap_new_desc.Flags =
|
|
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
sampler_heap_new_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&sampler_heap_new_desc, IID_PPV_ARGS(&sampler_heap_new)))) {
|
|
XELOGE(
|
|
"Failed to create a new bindless sampler descriptor heap "
|
|
"after an overflow of the previous one");
|
|
return false;
|
|
}
|
|
}
|
|
// Only change the heap if a new heap was created successfully, not to
|
|
// leave the values in an undefined state in case CreateDescriptorHeap
|
|
// has failed.
|
|
sampler_bindless_heaps_overflowed_.push_back(std::make_pair(
|
|
sampler_bindless_heap_current_, submission_current_));
|
|
sampler_bindless_heap_current_ = sampler_heap_new;
|
|
sampler_bindless_heap_cpu_start_ =
|
|
sampler_bindless_heap_current_
|
|
->GetCPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_gpu_start_ =
|
|
sampler_bindless_heap_current_
|
|
->GetGPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
// The only thing the heap is used for now is texture cache samplers -
|
|
// invalidate all of them.
|
|
texture_cache_bindless_sampler_map_.clear();
|
|
deferred_command_list_->SetDescriptorHeaps(
|
|
view_bindless_heap_, sampler_bindless_heap_current_);
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindless_SamplerHeap);
|
|
}
|
|
bool samplers_overflowed = false;
|
|
if (sampler_count_vertex &&
|
|
!cbuffer_binding_descriptor_indices_vertex_.up_to_date) {
|
|
current_sampler_bindless_indices_vertex_.resize(
|
|
std::max(current_sampler_bindless_indices_vertex_.size(),
|
|
size_t(sampler_count_vertex)));
|
|
for (uint32_t j = 0; j < sampler_count_vertex; ++j) {
|
|
TextureCache::SamplerParameters sampler_parameters =
|
|
current_samplers_vertex_[j];
|
|
uint32_t sampler_index;
|
|
auto it = texture_cache_bindless_sampler_map_.find(
|
|
sampler_parameters.value);
|
|
if (it != texture_cache_bindless_sampler_map_.end()) {
|
|
sampler_index = it->second;
|
|
} else {
|
|
if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) {
|
|
samplers_overflowed = true;
|
|
break;
|
|
}
|
|
sampler_index = sampler_bindless_heap_allocated_++;
|
|
texture_cache_->WriteSampler(
|
|
sampler_parameters,
|
|
provider->OffsetViewDescriptor(
|
|
sampler_bindless_heap_cpu_start_, sampler_index));
|
|
texture_cache_bindless_sampler_map_.insert(
|
|
{sampler_parameters.value, sampler_index});
|
|
}
|
|
current_sampler_bindless_indices_vertex_[j] = sampler_index;
|
|
}
|
|
}
|
|
if (samplers_overflowed) {
|
|
continue;
|
|
}
|
|
if (sampler_count_pixel &&
|
|
!cbuffer_binding_descriptor_indices_pixel_.up_to_date) {
|
|
current_sampler_bindless_indices_pixel_.resize(
|
|
std::max(current_sampler_bindless_indices_pixel_.size(),
|
|
size_t(sampler_count_pixel)));
|
|
for (uint32_t j = 0; j < sampler_count_pixel; ++j) {
|
|
TextureCache::SamplerParameters sampler_parameters =
|
|
current_samplers_pixel_[j];
|
|
uint32_t sampler_index;
|
|
auto it = texture_cache_bindless_sampler_map_.find(
|
|
sampler_parameters.value);
|
|
if (it != texture_cache_bindless_sampler_map_.end()) {
|
|
sampler_index = it->second;
|
|
} else {
|
|
if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) {
|
|
samplers_overflowed = true;
|
|
break;
|
|
}
|
|
sampler_index = sampler_bindless_heap_allocated_++;
|
|
texture_cache_->WriteSampler(
|
|
sampler_parameters,
|
|
provider->OffsetViewDescriptor(
|
|
sampler_bindless_heap_cpu_start_, sampler_index));
|
|
texture_cache_bindless_sampler_map_.insert(
|
|
{sampler_parameters.value, sampler_index});
|
|
}
|
|
current_sampler_bindless_indices_pixel_[j] = sampler_index;
|
|
}
|
|
}
|
|
if (!samplers_overflowed) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!cbuffer_binding_descriptor_indices_vertex_.up_to_date) {
|
|
uint32_t* descriptor_indices =
|
|
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
|
frame_current_,
|
|
xe::align(
|
|
uint32_t(std::max(texture_count_vertex + sampler_count_vertex,
|
|
uint32_t(1)) *
|
|
sizeof(uint32_t)),
|
|
uint32_t(256)),
|
|
nullptr, nullptr,
|
|
&cbuffer_binding_descriptor_indices_vertex_.address));
|
|
if (!descriptor_indices) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
|
|
const D3D12Shader::TextureBinding& texture = textures_vertex[i];
|
|
descriptor_indices[texture.bindless_descriptor_index] =
|
|
texture_cache_->GetActiveTextureBindlessSRVIndex(texture);
|
|
}
|
|
current_texture_layout_uid_vertex_ = texture_layout_uid_vertex;
|
|
if (texture_count_vertex) {
|
|
current_texture_srv_keys_vertex_.resize(
|
|
std::max(current_texture_srv_keys_vertex_.size(),
|
|
size_t(texture_count_vertex)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
|
texture_count_vertex);
|
|
}
|
|
// Current samplers have already been updated.
|
|
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
|
|
descriptor_indices[samplers_vertex[i].bindless_descriptor_index] =
|
|
current_sampler_bindless_indices_vertex_[i];
|
|
}
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindless_DescriptorIndicesVertex);
|
|
}
|
|
|
|
if (!cbuffer_binding_descriptor_indices_pixel_.up_to_date) {
|
|
uint32_t* descriptor_indices =
|
|
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
|
frame_current_,
|
|
xe::align(
|
|
uint32_t(std::max(texture_count_pixel + sampler_count_pixel,
|
|
uint32_t(1)) *
|
|
sizeof(uint32_t)),
|
|
uint32_t(256)),
|
|
nullptr, nullptr,
|
|
&cbuffer_binding_descriptor_indices_pixel_.address));
|
|
if (!descriptor_indices) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
|
|
const D3D12Shader::TextureBinding& texture = textures_pixel[i];
|
|
descriptor_indices[texture.bindless_descriptor_index] =
|
|
texture_cache_->GetActiveTextureBindlessSRVIndex(texture);
|
|
}
|
|
current_texture_layout_uid_pixel_ = texture_layout_uid_pixel;
|
|
if (texture_count_pixel) {
|
|
current_texture_srv_keys_pixel_.resize(
|
|
std::max(current_texture_srv_keys_pixel_.size(),
|
|
size_t(texture_count_pixel)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
|
texture_count_pixel);
|
|
}
|
|
// Current samplers have already been updated.
|
|
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
|
descriptor_indices[samplers_pixel[i].bindless_descriptor_index] =
|
|
current_sampler_bindless_indices_pixel_[i];
|
|
}
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindless_DescriptorIndicesPixel);
|
|
}
|
|
} else {
|
|
//
|
|
// Bindful descriptors path.
|
|
//
|
|
|
|
// See what descriptors need to be updated.
|
|
// Samplers have already been checked.
|
|
bool write_textures_vertex =
|
|
texture_count_vertex &&
|
|
(!bindful_textures_written_vertex_ ||
|
|
current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
|
texture_count_vertex));
|
|
bool write_textures_pixel =
|
|
texture_count_pixel &&
|
|
(!bindful_textures_written_pixel_ ||
|
|
current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
|
texture_count_pixel));
|
|
bool write_samplers_vertex =
|
|
sampler_count_vertex && !bindful_samplers_written_vertex_;
|
|
bool write_samplers_pixel =
|
|
sampler_count_pixel && !bindful_samplers_written_pixel_;
|
|
|
|
// Allocate the descriptors.
|
|
uint32_t view_count_partial_update = 0;
|
|
if (write_textures_vertex) {
|
|
view_count_partial_update += texture_count_vertex;
|
|
}
|
|
if (write_textures_pixel) {
|
|
view_count_partial_update += texture_count_pixel;
|
|
}
|
|
// All the constants + shared memory SRV and UAV + textures.
|
|
uint32_t view_count_full_update =
|
|
2 + texture_count_vertex + texture_count_pixel;
|
|
if (edram_rov_used_) {
|
|
// + EDRAM UAV.
|
|
++view_count_full_update;
|
|
}
|
|
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle;
|
|
uint32_t descriptor_size_view = provider->GetViewDescriptorSize();
|
|
uint64_t view_heap_index = RequestViewBindfulDescriptors(
|
|
draw_view_bindful_heap_index_, view_count_partial_update,
|
|
view_count_full_update, view_cpu_handle, view_gpu_handle);
|
|
if (view_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
|
XELOGE("Failed to allocate view descriptors");
|
|
return false;
|
|
}
|
|
uint32_t sampler_count_partial_update = 0;
|
|
if (write_samplers_vertex) {
|
|
sampler_count_partial_update += sampler_count_vertex;
|
|
}
|
|
if (write_samplers_pixel) {
|
|
sampler_count_partial_update += sampler_count_pixel;
|
|
}
|
|
D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu_handle = {};
|
|
D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {};
|
|
uint32_t descriptor_size_sampler = provider->GetSamplerDescriptorSize();
|
|
uint64_t sampler_heap_index =
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid;
|
|
if (sampler_count_vertex != 0 || sampler_count_pixel != 0) {
|
|
sampler_heap_index = RequestSamplerBindfulDescriptors(
|
|
draw_sampler_bindful_heap_index_, sampler_count_partial_update,
|
|
sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle,
|
|
sampler_gpu_handle);
|
|
if (sampler_heap_index ==
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
|
XELOGE("Failed to allocate sampler descriptors");
|
|
return false;
|
|
}
|
|
}
|
|
if (draw_view_bindful_heap_index_ != view_heap_index) {
|
|
// Need to update all view descriptors.
|
|
write_textures_vertex = texture_count_vertex != 0;
|
|
write_textures_pixel = texture_count_pixel != 0;
|
|
bindful_textures_written_vertex_ = false;
|
|
bindful_textures_written_pixel_ = false;
|
|
// If updating fully, write the shared memory SRV and UAV descriptors and,
|
|
// if needed, the EDRAM descriptor.
|
|
gpu_handle_shared_memory_and_edram_ = view_gpu_handle;
|
|
shared_memory_->WriteRawSRVDescriptor(view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
shared_memory_->WriteRawUAVDescriptor(view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
if (edram_rov_used_) {
|
|
render_target_cache_->WriteEDRAMR32UintUAVDescriptor(view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
}
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindful_SharedMemoryAndEDRAM);
|
|
}
|
|
if (sampler_heap_index !=
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid &&
|
|
draw_sampler_bindful_heap_index_ != sampler_heap_index) {
|
|
write_samplers_vertex = sampler_count_vertex != 0;
|
|
write_samplers_pixel = sampler_count_pixel != 0;
|
|
bindful_samplers_written_vertex_ = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
|
|
// Write the descriptors.
|
|
if (write_textures_vertex) {
|
|
assert_true(current_graphics_root_bindful_extras_.textures_vertex !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_textures_vertex_ = view_gpu_handle;
|
|
for (uint32_t i = 0; i < texture_count_vertex; ++i) {
|
|
texture_cache_->WriteActiveTextureBindfulSRV(textures_vertex[i],
|
|
view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
}
|
|
current_texture_layout_uid_vertex_ = texture_layout_uid_vertex;
|
|
current_texture_srv_keys_vertex_.resize(
|
|
std::max(current_texture_srv_keys_vertex_.size(),
|
|
size_t(texture_count_vertex)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex,
|
|
texture_count_vertex);
|
|
bindful_textures_written_vertex_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.textures_vertex);
|
|
}
|
|
if (write_textures_pixel) {
|
|
assert_true(current_graphics_root_bindful_extras_.textures_pixel !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_textures_pixel_ = view_gpu_handle;
|
|
for (uint32_t i = 0; i < texture_count_pixel; ++i) {
|
|
texture_cache_->WriteActiveTextureBindfulSRV(textures_pixel[i],
|
|
view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
}
|
|
current_texture_layout_uid_pixel_ = texture_layout_uid_pixel;
|
|
current_texture_srv_keys_pixel_.resize(std::max(
|
|
current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel,
|
|
texture_count_pixel);
|
|
bindful_textures_written_pixel_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.textures_pixel);
|
|
}
|
|
if (write_samplers_vertex) {
|
|
assert_true(current_graphics_root_bindful_extras_.samplers_vertex !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_samplers_vertex_ = sampler_gpu_handle;
|
|
for (uint32_t i = 0; i < sampler_count_vertex; ++i) {
|
|
texture_cache_->WriteSampler(current_samplers_vertex_[i],
|
|
sampler_cpu_handle);
|
|
sampler_cpu_handle.ptr += descriptor_size_sampler;
|
|
sampler_gpu_handle.ptr += descriptor_size_sampler;
|
|
}
|
|
// Current samplers have already been updated.
|
|
bindful_samplers_written_vertex_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.samplers_vertex);
|
|
}
|
|
if (write_samplers_pixel) {
|
|
assert_true(current_graphics_root_bindful_extras_.samplers_pixel !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_samplers_pixel_ = sampler_gpu_handle;
|
|
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
|
texture_cache_->WriteSampler(current_samplers_pixel_[i],
|
|
sampler_cpu_handle);
|
|
sampler_cpu_handle.ptr += descriptor_size_sampler;
|
|
sampler_gpu_handle.ptr += descriptor_size_sampler;
|
|
}
|
|
// Current samplers have already been updated.
|
|
bindful_samplers_written_pixel_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.samplers_pixel);
|
|
}
|
|
|
|
// Wrote new descriptors on the current page.
|
|
draw_view_bindful_heap_index_ = view_heap_index;
|
|
if (sampler_heap_index !=
|
|
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
|
draw_sampler_bindful_heap_index_ = sampler_heap_index;
|
|
}
|
|
}
|
|
|
|
// Update the root parameters.
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_fetch_constants))) {
|
|
deferred_command_list_->D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_fetch_constants, cbuffer_binding_fetch_.address);
|
|
current_graphics_root_up_to_date_ |= 1u << root_parameter_fetch_constants;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_float_constants_vertex))) {
|
|
deferred_command_list_->D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_float_constants_vertex,
|
|
cbuffer_binding_float_vertex_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << root_parameter_float_constants_vertex;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_float_constants_pixel))) {
|
|
deferred_command_list_->D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_float_constants_pixel,
|
|
cbuffer_binding_float_pixel_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << root_parameter_float_constants_pixel;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_system_constants))) {
|
|
deferred_command_list_->D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_system_constants, cbuffer_binding_system_.address);
|
|
current_graphics_root_up_to_date_ |= 1u << root_parameter_system_constants;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_bool_loop_constants))) {
|
|
deferred_command_list_->D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_bool_loop_constants, cbuffer_binding_bool_loop_.address);
|
|
current_graphics_root_up_to_date_ |= 1u
|
|
<< root_parameter_bool_loop_constants;
|
|
}
|
|
if (bindless_resources_used_) {
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_DescriptorIndicesPixel))) {
|
|
deferred_command_list_->D3DSetGraphicsRootConstantBufferView(
|
|
kRootParameter_Bindless_DescriptorIndicesPixel,
|
|
cbuffer_binding_descriptor_indices_pixel_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindless_DescriptorIndicesPixel;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_DescriptorIndicesVertex))) {
|
|
deferred_command_list_->D3DSetGraphicsRootConstantBufferView(
|
|
kRootParameter_Bindless_DescriptorIndicesVertex,
|
|
cbuffer_binding_descriptor_indices_vertex_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindless_DescriptorIndicesVertex;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_SamplerHeap))) {
|
|
deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
|
|
kRootParameter_Bindless_SamplerHeap,
|
|
sampler_bindless_heap_gpu_start_);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindless_SamplerHeap;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_ViewHeap))) {
|
|
deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
|
|
kRootParameter_Bindless_ViewHeap, view_bindless_heap_gpu_start_);
|
|
current_graphics_root_up_to_date_ |= 1u
|
|
<< kRootParameter_Bindless_ViewHeap;
|
|
}
|
|
} else {
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindful_SharedMemoryAndEDRAM))) {
|
|
deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
|
|
kRootParameter_Bindful_SharedMemoryAndEDRAM,
|
|
gpu_handle_shared_memory_and_edram_);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindful_SharedMemoryAndEDRAM;
|
|
}
|
|
uint32_t extra_index;
|
|
extra_index = current_graphics_root_bindful_extras_.textures_pixel;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_textures_pixel_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
extra_index = current_graphics_root_bindful_extras_.samplers_pixel;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_samplers_pixel_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
extra_index = current_graphics_root_bindful_extras_.textures_vertex;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_textures_vertex_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
extra_index = current_graphics_root_bindful_extras_.samplers_vertex;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_->D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_samplers_vertex_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize(
|
|
ColorFormat format) {
|
|
switch (format) {
|
|
case ColorFormat::k_8_8_8_8:
|
|
case ColorFormat::k_2_10_10_10:
|
|
// TODO(Triang3l): Investigate how k_8_8_8_8_A works - not supported in the
|
|
// texture cache currently.
|
|
// case ColorFormat::k_8_8_8_8_A:
|
|
case ColorFormat::k_10_11_11:
|
|
case ColorFormat::k_11_11_10:
|
|
case ColorFormat::k_16_16:
|
|
case ColorFormat::k_16_16_FLOAT:
|
|
case ColorFormat::k_32_FLOAT:
|
|
case ColorFormat::k_8_8_8_8_AS_16_16_16_16:
|
|
case ColorFormat::k_2_10_10_10_AS_16_16_16_16:
|
|
case ColorFormat::k_10_11_11_AS_16_16_16_16:
|
|
case ColorFormat::k_11_11_10_AS_16_16_16_16:
|
|
return 1;
|
|
case ColorFormat::k_16_16_16_16:
|
|
case ColorFormat::k_16_16_16_16_FLOAT:
|
|
case ColorFormat::k_32_32_FLOAT:
|
|
return 2;
|
|
case ColorFormat::k_32_32_32_32_FLOAT:
|
|
return 4;
|
|
default:
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) {
|
|
if (size == 0) {
|
|
return nullptr;
|
|
}
|
|
size = xe::align(size, kReadbackBufferSizeIncrement);
|
|
if (size > readback_buffer_size_) {
|
|
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
|
D3D12_RESOURCE_DESC buffer_desc;
|
|
ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size,
|
|
D3D12_RESOURCE_FLAG_NONE);
|
|
ID3D12Resource* buffer;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE,
|
|
&buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
|
IID_PPV_ARGS(&buffer)))) {
|
|
XELOGE("Failed to create a {} MB readback buffer", size >> 20);
|
|
return nullptr;
|
|
}
|
|
if (readback_buffer_ != nullptr) {
|
|
readback_buffer_->Release();
|
|
}
|
|
readback_buffer_ = buffer;
|
|
}
|
|
return readback_buffer_;
|
|
}
|
|
|
|
void D3D12CommandProcessor::WriteGammaRampSRV(
|
|
bool is_pwl, D3D12_CPU_DESCRIPTOR_HANDLE handle) const {
|
|
auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
|
|
desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM;
|
|
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
|
|
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
// 256-entry for normal, 128-entry for PWL.
|
|
desc.Texture1D.MostDetailedMip = is_pwl ? 1 : 0;
|
|
desc.Texture1D.MipLevels = 1;
|
|
desc.Texture1D.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(gamma_ramp_texture_, &desc, handle);
|
|
}
|
|
|
|
} // namespace d3d12
|
|
} // namespace gpu
|
|
} // namespace xe
|