4539 lines
195 KiB
C++
4539 lines
195 KiB
C++
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* Copyright 2022 Ben Vanik. All rights reserved. *
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
******************************************************************************
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <sstream>
|
|
#include <utility>
|
|
|
|
#include "xenia/base/assert.h"
|
|
#include "xenia/base/byte_order.h"
|
|
#include "xenia/base/cvar.h"
|
|
#include "xenia/base/logging.h"
|
|
#include "xenia/base/math.h"
|
|
#include "xenia/base/profiling.h"
|
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
|
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
|
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
|
#include "xenia/gpu/draw_util.h"
|
|
#include "xenia/gpu/gpu_flags.h"
|
|
#include "xenia/gpu/registers.h"
|
|
#include "xenia/gpu/xenos.h"
|
|
#include "xenia/ui/d3d12/d3d12_presenter.h"
|
|
#include "xenia/ui/d3d12/d3d12_util.h"
|
|
|
|
DEFINE_bool(d3d12_bindless, true,
|
|
"Use bindless resources where available - may improve performance, "
|
|
"but may make debugging more complicated.",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_readback_memexport, false,
|
|
"Read data written by memory export in shaders on the CPU. This "
|
|
"may be needed in some games (but many only access exported data "
|
|
"on the GPU, and this flag isn't needed to handle such behavior), "
|
|
"but causes mid-frame synchronization, so it has a huge "
|
|
"performance impact.",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_readback_resolve, false,
|
|
"Read render-to-texture results on the CPU. This may be needed in "
|
|
"some games, for instance, for screenshots in saved games, but "
|
|
"causes mid-frame synchronization, so it has a huge performance "
|
|
"impact.",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_submit_on_primary_buffer_end, true,
|
|
"Submit the command list when a PM4 primary buffer ends if it's "
|
|
"possible to submit immediately to try to reduce frame latency.",
|
|
"D3D12");
|
|
DEFINE_bool(d3d12_clear_memory_page_state, false,
|
|
"Refresh state of memory pages to enable gpu written data. (Use "
|
|
"for 'Team Ninja' Games to fix missing character models)",
|
|
"D3D12");
|
|
|
|
namespace xe {
|
|
namespace gpu {
|
|
namespace d3d12 {
|
|
|
|
// Generated with `xb buildshaders`.
|
|
namespace shaders {
|
|
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_pwl_cs.h"
|
|
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_pwl_fxaa_luma_cs.h"
|
|
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_table_cs.h"
|
|
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_table_fxaa_luma_cs.h"
|
|
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/fxaa_cs.h"
|
|
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/fxaa_extreme_cs.h"
|
|
} // namespace shaders
|
|
|
|
D3D12CommandProcessor::D3D12CommandProcessor(
|
|
D3D12GraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
|
: CommandProcessor(graphics_system, kernel_state),
|
|
deferred_command_list_(*this) {}
|
|
D3D12CommandProcessor::~D3D12CommandProcessor() = default;
|
|
|
|
void D3D12CommandProcessor::ClearCaches() {
|
|
CommandProcessor::ClearCaches();
|
|
cache_clear_requested_ = true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::InitializeShaderStorage(
|
|
const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) {
|
|
CommandProcessor::InitializeShaderStorage(cache_root, title_id, blocking);
|
|
pipeline_cache_->InitializeShaderStorage(cache_root, title_id, blocking);
|
|
}
|
|
|
|
void D3D12CommandProcessor::RequestFrameTrace(
|
|
const std::filesystem::path& root_path) {
|
|
// Capture with PIX if attached.
|
|
if (GetD3D12Provider().GetGraphicsAnalysis() != nullptr) {
|
|
pix_capture_requested_.store(true, std::memory_order_relaxed);
|
|
return;
|
|
}
|
|
CommandProcessor::RequestFrameTrace(root_path);
|
|
}
|
|
|
|
void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
|
uint32_t length) {
|
|
shared_memory_->MemoryInvalidationCallback(base_ptr, length, true);
|
|
primitive_processor_->MemoryInvalidationCallback(base_ptr, length, true);
|
|
}
|
|
|
|
void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) {
|
|
// Starting a new frame because descriptors may be needed.
|
|
if (!BeginSubmission(true)) {
|
|
return;
|
|
}
|
|
render_target_cache_->RestoreEdramSnapshot(snapshot);
|
|
}
|
|
|
|
bool D3D12CommandProcessor::PushTransitionBarrier(
|
|
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
|
D3D12_RESOURCE_STATES new_state, UINT subresource) {
|
|
if (old_state == new_state) {
|
|
return false;
|
|
}
|
|
D3D12_RESOURCE_BARRIER barrier;
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.Transition.pResource = resource;
|
|
barrier.Transition.Subresource = subresource;
|
|
barrier.Transition.StateBefore = old_state;
|
|
barrier.Transition.StateAfter = new_state;
|
|
barriers_.push_back(barrier);
|
|
return true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::PushAliasingBarrier(ID3D12Resource* old_resource,
|
|
ID3D12Resource* new_resource) {
|
|
D3D12_RESOURCE_BARRIER barrier;
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.Aliasing.pResourceBefore = old_resource;
|
|
barrier.Aliasing.pResourceAfter = new_resource;
|
|
barriers_.push_back(barrier);
|
|
}
|
|
|
|
void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) {
|
|
D3D12_RESOURCE_BARRIER barrier;
|
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
barrier.UAV.pResource = resource;
|
|
barriers_.push_back(barrier);
|
|
}
|
|
|
|
void D3D12CommandProcessor::SubmitBarriers() {
|
|
UINT barrier_count = UINT(barriers_.size());
|
|
if (barrier_count != 0) {
|
|
deferred_command_list_.D3DResourceBarrier(barrier_count, barriers_.data());
|
|
barriers_.clear();
|
|
}
|
|
}
|
|
|
|
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
|
|
bool tessellated) {
|
|
if (bindless_resources_used_) {
|
|
return tessellated ? root_signature_bindless_ds_
|
|
: root_signature_bindless_vs_;
|
|
}
|
|
|
|
D3D12_SHADER_VISIBILITY vertex_visibility =
|
|
tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN
|
|
: D3D12_SHADER_VISIBILITY_VERTEX;
|
|
|
|
uint32_t texture_count_vertex =
|
|
uint32_t(vertex_shader->GetTextureBindingsAfterTranslation().size());
|
|
uint32_t sampler_count_vertex =
|
|
uint32_t(vertex_shader->GetSamplerBindingsAfterTranslation().size());
|
|
uint32_t texture_count_pixel =
|
|
pixel_shader
|
|
? uint32_t(pixel_shader->GetTextureBindingsAfterTranslation().size())
|
|
: 0;
|
|
uint32_t sampler_count_pixel =
|
|
pixel_shader
|
|
? uint32_t(pixel_shader->GetSamplerBindingsAfterTranslation().size())
|
|
: 0;
|
|
|
|
// Better put the pixel texture/sampler in the lower bits probably because it
|
|
// changes often.
|
|
uint32_t index = 0;
|
|
uint32_t index_offset = 0;
|
|
index |= texture_count_pixel << index_offset;
|
|
index_offset += D3D12Shader::kMaxTextureBindingIndexBits;
|
|
index |= sampler_count_pixel << index_offset;
|
|
index_offset += D3D12Shader::kMaxSamplerBindingIndexBits;
|
|
index |= texture_count_vertex << index_offset;
|
|
index_offset += D3D12Shader::kMaxTextureBindingIndexBits;
|
|
index |= sampler_count_vertex << index_offset;
|
|
index_offset += D3D12Shader::kMaxSamplerBindingIndexBits;
|
|
index |= uint32_t(vertex_visibility == D3D12_SHADER_VISIBILITY_DOMAIN)
|
|
<< index_offset;
|
|
++index_offset;
|
|
assert_true(index_offset <= 32);
|
|
|
|
// Try an existing root signature.
|
|
auto it = root_signatures_bindful_.find(index);
|
|
if (it != root_signatures_bindful_.end()) {
|
|
return it->second;
|
|
}
|
|
|
|
// Create a new one.
|
|
D3D12_ROOT_SIGNATURE_DESC desc;
|
|
D3D12_ROOT_PARAMETER parameters[kRootParameter_Bindful_Count_Max];
|
|
desc.NumParameters = kRootParameter_Bindful_Count_Base;
|
|
desc.pParameters = parameters;
|
|
desc.NumStaticSamplers = 0;
|
|
desc.pStaticSamplers = nullptr;
|
|
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
|
|
// Base parameters.
|
|
|
|
// Fetch constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_FetchConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
// Vertex float constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsVertex];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = vertex_visibility;
|
|
}
|
|
|
|
// Pixel float constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsPixel];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
}
|
|
|
|
// System constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_SystemConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
// Bool and loop constants.
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_BoolLoopConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
|
|
// Shared memory and, if ROVs are used, EDRAM.
|
|
D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[3];
|
|
{
|
|
auto& parameter = parameters[kRootParameter_Bindful_SharedMemoryAndEdram];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 2;
|
|
parameter.DescriptorTable.pDescriptorRanges =
|
|
shared_memory_and_edram_ranges;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
shared_memory_and_edram_ranges[0].RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
shared_memory_and_edram_ranges[0].NumDescriptors = 1;
|
|
shared_memory_and_edram_ranges[0].BaseShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::SRVMainRegister::kSharedMemory);
|
|
shared_memory_and_edram_ranges[0].RegisterSpace =
|
|
uint32_t(DxbcShaderTranslator::SRVSpace::kMain);
|
|
shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
|
shared_memory_and_edram_ranges[1].RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
shared_memory_and_edram_ranges[1].NumDescriptors = 1;
|
|
shared_memory_and_edram_ranges[1].BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
|
|
shared_memory_and_edram_ranges[1].RegisterSpace = 0;
|
|
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
|
if (render_target_cache_->GetPath() ==
|
|
RenderTargetCache::Path::kPixelShaderInterlock) {
|
|
++parameter.DescriptorTable.NumDescriptorRanges;
|
|
shared_memory_and_edram_ranges[2].RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
shared_memory_and_edram_ranges[2].NumDescriptors = 1;
|
|
shared_memory_and_edram_ranges[2].BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kEdram);
|
|
shared_memory_and_edram_ranges[2].RegisterSpace = 0;
|
|
shared_memory_and_edram_ranges[2].OffsetInDescriptorsFromTableStart = 2;
|
|
}
|
|
}
|
|
|
|
// Extra parameters.
|
|
|
|
// Pixel textures.
|
|
D3D12_DESCRIPTOR_RANGE range_textures_pixel;
|
|
if (texture_count_pixel > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_textures_pixel;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
range_textures_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range_textures_pixel.NumDescriptors = texture_count_pixel;
|
|
range_textures_pixel.BaseShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart);
|
|
range_textures_pixel.RegisterSpace =
|
|
uint32_t(DxbcShaderTranslator::SRVSpace::kMain);
|
|
range_textures_pixel.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
// Pixel samplers.
|
|
D3D12_DESCRIPTOR_RANGE range_samplers_pixel;
|
|
if (sampler_count_pixel > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_samplers_pixel;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
range_samplers_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
range_samplers_pixel.NumDescriptors = sampler_count_pixel;
|
|
range_samplers_pixel.BaseShaderRegister = 0;
|
|
range_samplers_pixel.RegisterSpace = 0;
|
|
range_samplers_pixel.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
// Vertex textures.
|
|
D3D12_DESCRIPTOR_RANGE range_textures_vertex;
|
|
if (texture_count_vertex > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_textures_vertex;
|
|
parameter.ShaderVisibility = vertex_visibility;
|
|
range_textures_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range_textures_vertex.NumDescriptors = texture_count_vertex;
|
|
range_textures_vertex.BaseShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart);
|
|
range_textures_vertex.RegisterSpace =
|
|
uint32_t(DxbcShaderTranslator::SRVSpace::kMain);
|
|
range_textures_vertex.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
// Vertex samplers.
|
|
D3D12_DESCRIPTOR_RANGE range_samplers_vertex;
|
|
if (sampler_count_vertex > 0) {
|
|
auto& parameter = parameters[desc.NumParameters];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges = &range_samplers_vertex;
|
|
parameter.ShaderVisibility = vertex_visibility;
|
|
range_samplers_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
range_samplers_vertex.NumDescriptors = sampler_count_vertex;
|
|
range_samplers_vertex.BaseShaderRegister = 0;
|
|
range_samplers_vertex.RegisterSpace = 0;
|
|
range_samplers_vertex.OffsetInDescriptorsFromTableStart = 0;
|
|
++desc.NumParameters;
|
|
}
|
|
|
|
ID3D12RootSignature* root_signature =
|
|
ui::d3d12::util::CreateRootSignature(GetD3D12Provider(), desc);
|
|
if (root_signature == nullptr) {
|
|
XELOGE(
|
|
"Failed to create a root signature with {} pixel textures, {} pixel "
|
|
"samplers, {} vertex textures and {} vertex samplers",
|
|
texture_count_pixel, sampler_count_pixel, texture_count_vertex,
|
|
sampler_count_vertex);
|
|
return nullptr;
|
|
}
|
|
root_signatures_bindful_.emplace(index, root_signature);
|
|
return root_signature;
|
|
}
|
|
|
|
uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices(
|
|
const DxbcShader* vertex_shader, const DxbcShader* pixel_shader,
|
|
RootBindfulExtraParameterIndices& indices_out) {
|
|
uint32_t index = kRootParameter_Bindful_Count_Base;
|
|
if (pixel_shader &&
|
|
!pixel_shader->GetTextureBindingsAfterTranslation().empty()) {
|
|
indices_out.textures_pixel = index++;
|
|
} else {
|
|
indices_out.textures_pixel = RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
if (pixel_shader &&
|
|
!pixel_shader->GetSamplerBindingsAfterTranslation().empty()) {
|
|
indices_out.samplers_pixel = index++;
|
|
} else {
|
|
indices_out.samplers_pixel = RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
if (!vertex_shader->GetTextureBindingsAfterTranslation().empty()) {
|
|
indices_out.textures_vertex = index++;
|
|
} else {
|
|
indices_out.textures_vertex =
|
|
RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
if (!vertex_shader->GetSamplerBindingsAfterTranslation().empty()) {
|
|
indices_out.samplers_vertex = index++;
|
|
} else {
|
|
indices_out.samplers_vertex =
|
|
RootBindfulExtraParameterIndices::kUnavailable;
|
|
}
|
|
return index;
|
|
}
|
|
|
|
uint64_t D3D12CommandProcessor::RequestViewBindfulDescriptors(
|
|
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
|
uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
|
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
|
|
assert_false(bindless_resources_used_);
|
|
assert_true(submission_open_);
|
|
uint32_t descriptor_index;
|
|
uint64_t current_heap_index = view_bindful_heap_pool_->Request(
|
|
frame_current_, previous_heap_index, count_for_partial_update,
|
|
count_for_full_update, descriptor_index);
|
|
if (current_heap_index ==
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
|
// There was an error.
|
|
return ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
|
|
}
|
|
ID3D12DescriptorHeap* heap = view_bindful_heap_pool_->GetLastRequestHeap();
|
|
if (view_bindful_heap_current_ != heap) {
|
|
view_bindful_heap_current_ = heap;
|
|
deferred_command_list_.SetDescriptorHeaps(view_bindful_heap_current_,
|
|
sampler_bindful_heap_current_);
|
|
}
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
cpu_handle_out = provider.OffsetViewDescriptor(
|
|
view_bindful_heap_pool_->GetLastRequestHeapCPUStart(), descriptor_index);
|
|
gpu_handle_out = provider.OffsetViewDescriptor(
|
|
view_bindful_heap_pool_->GetLastRequestHeapGPUStart(), descriptor_index);
|
|
return current_heap_index;
|
|
}
|
|
|
|
uint32_t D3D12CommandProcessor::RequestPersistentViewBindlessDescriptor() {
|
|
assert_true(bindless_resources_used_);
|
|
if (!view_bindless_heap_free_.empty()) {
|
|
uint32_t descriptor_index = view_bindless_heap_free_.back();
|
|
view_bindless_heap_free_.pop_back();
|
|
return descriptor_index;
|
|
}
|
|
if (view_bindless_heap_allocated_ >= kViewBindlessHeapSize) {
|
|
return UINT32_MAX;
|
|
}
|
|
return view_bindless_heap_allocated_++;
|
|
}
|
|
|
|
void D3D12CommandProcessor::ReleaseViewBindlessDescriptorImmediately(
|
|
uint32_t descriptor_index) {
|
|
assert_true(bindless_resources_used_);
|
|
view_bindless_heap_free_.push_back(descriptor_index);
|
|
}
|
|
|
|
bool D3D12CommandProcessor::RequestOneUseSingleViewDescriptors(
|
|
uint32_t count, ui::d3d12::util::DescriptorCpuGpuHandlePair* handles_out) {
|
|
assert_true(submission_open_);
|
|
if (!count) {
|
|
return true;
|
|
}
|
|
assert_not_null(handles_out);
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
if (bindless_resources_used_) {
|
|
// Request separate bindless descriptors that will be freed when this
|
|
// submission is completed by the GPU.
|
|
if (count > kViewBindlessHeapSize - view_bindless_heap_allocated_ +
|
|
view_bindless_heap_free_.size()) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
uint32_t descriptor_index;
|
|
if (!view_bindless_heap_free_.empty()) {
|
|
descriptor_index = view_bindless_heap_free_.back();
|
|
view_bindless_heap_free_.pop_back();
|
|
} else {
|
|
descriptor_index = view_bindless_heap_allocated_++;
|
|
}
|
|
view_bindless_one_use_descriptors_.push_back(
|
|
std::make_pair(descriptor_index, submission_current_));
|
|
handles_out[i] =
|
|
std::make_pair(provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_, descriptor_index),
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_gpu_start_, descriptor_index));
|
|
}
|
|
} else {
|
|
// Request a range within the current heap for bindful resources path.
|
|
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle_start;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_start;
|
|
if (RequestViewBindfulDescriptors(
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid, count, count,
|
|
cpu_handle_start, gpu_handle_start) ==
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
handles_out[i] =
|
|
std::make_pair(provider.OffsetViewDescriptor(cpu_handle_start, i),
|
|
provider.OffsetViewDescriptor(gpu_handle_start, i));
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair
|
|
D3D12CommandProcessor::GetSystemBindlessViewHandlePair(
|
|
SystemBindlessView view) const {
|
|
assert_true(bindless_resources_used_);
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
return std::make_pair(provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_, uint32_t(view)),
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_gpu_start_, uint32_t(view)));
|
|
}
|
|
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair
|
|
D3D12CommandProcessor::GetSharedMemoryUintPow2BindlessSRVHandlePair(
|
|
uint32_t element_size_bytes_pow2) const {
|
|
SystemBindlessView view;
|
|
switch (element_size_bytes_pow2) {
|
|
case 2:
|
|
view = SystemBindlessView::kSharedMemoryR32UintSRV;
|
|
break;
|
|
case 3:
|
|
view = SystemBindlessView::kSharedMemoryR32G32UintSRV;
|
|
break;
|
|
case 4:
|
|
view = SystemBindlessView::kSharedMemoryR32G32B32A32UintSRV;
|
|
break;
|
|
default:
|
|
assert_unhandled_case(element_size_bytes_pow2);
|
|
view = SystemBindlessView::kSharedMemoryR32UintSRV;
|
|
}
|
|
return GetSystemBindlessViewHandlePair(view);
|
|
}
|
|
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair
|
|
D3D12CommandProcessor::GetSharedMemoryUintPow2BindlessUAVHandlePair(
|
|
uint32_t element_size_bytes_pow2) const {
|
|
SystemBindlessView view;
|
|
switch (element_size_bytes_pow2) {
|
|
case 2:
|
|
view = SystemBindlessView::kSharedMemoryR32UintUAV;
|
|
break;
|
|
case 3:
|
|
view = SystemBindlessView::kSharedMemoryR32G32UintUAV;
|
|
break;
|
|
case 4:
|
|
view = SystemBindlessView::kSharedMemoryR32G32B32A32UintUAV;
|
|
break;
|
|
default:
|
|
assert_unhandled_case(element_size_bytes_pow2);
|
|
view = SystemBindlessView::kSharedMemoryR32UintUAV;
|
|
}
|
|
return GetSystemBindlessViewHandlePair(view);
|
|
}
|
|
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair
|
|
D3D12CommandProcessor::GetEdramUintPow2BindlessSRVHandlePair(
|
|
uint32_t element_size_bytes_pow2) const {
|
|
SystemBindlessView view;
|
|
switch (element_size_bytes_pow2) {
|
|
case 2:
|
|
view = SystemBindlessView::kEdramR32UintSRV;
|
|
break;
|
|
case 3:
|
|
view = SystemBindlessView::kEdramR32G32UintSRV;
|
|
break;
|
|
case 4:
|
|
view = SystemBindlessView::kEdramR32G32B32A32UintSRV;
|
|
break;
|
|
default:
|
|
assert_unhandled_case(element_size_bytes_pow2);
|
|
view = SystemBindlessView::kEdramR32UintSRV;
|
|
}
|
|
return GetSystemBindlessViewHandlePair(view);
|
|
}
|
|
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair
|
|
D3D12CommandProcessor::GetEdramUintPow2BindlessUAVHandlePair(
|
|
uint32_t element_size_bytes_pow2) const {
|
|
SystemBindlessView view;
|
|
switch (element_size_bytes_pow2) {
|
|
case 2:
|
|
view = SystemBindlessView::kEdramR32UintUAV;
|
|
break;
|
|
case 3:
|
|
view = SystemBindlessView::kEdramR32G32UintUAV;
|
|
break;
|
|
case 4:
|
|
view = SystemBindlessView::kEdramR32G32B32A32UintUAV;
|
|
break;
|
|
default:
|
|
assert_unhandled_case(element_size_bytes_pow2);
|
|
view = SystemBindlessView::kEdramR32UintUAV;
|
|
}
|
|
return GetSystemBindlessViewHandlePair(view);
|
|
}
|
|
|
|
uint64_t D3D12CommandProcessor::RequestSamplerBindfulDescriptors(
|
|
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
|
uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
|
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) {
|
|
assert_false(bindless_resources_used_);
|
|
assert_true(submission_open_);
|
|
uint32_t descriptor_index;
|
|
uint64_t current_heap_index = sampler_bindful_heap_pool_->Request(
|
|
frame_current_, previous_heap_index, count_for_partial_update,
|
|
count_for_full_update, descriptor_index);
|
|
if (current_heap_index ==
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
|
// There was an error.
|
|
return ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
|
|
}
|
|
ID3D12DescriptorHeap* heap = sampler_bindful_heap_pool_->GetLastRequestHeap();
|
|
if (sampler_bindful_heap_current_ != heap) {
|
|
sampler_bindful_heap_current_ = heap;
|
|
deferred_command_list_.SetDescriptorHeaps(view_bindful_heap_current_,
|
|
sampler_bindful_heap_current_);
|
|
}
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
cpu_handle_out = provider.OffsetSamplerDescriptor(
|
|
sampler_bindful_heap_pool_->GetLastRequestHeapCPUStart(),
|
|
descriptor_index);
|
|
gpu_handle_out = provider.OffsetSamplerDescriptor(
|
|
sampler_bindful_heap_pool_->GetLastRequestHeapGPUStart(),
|
|
descriptor_index);
|
|
return current_heap_index;
|
|
}
|
|
|
|
ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
|
|
uint32_t size, D3D12_RESOURCE_STATES state) {
|
|
assert_true(submission_open_);
|
|
assert_false(scratch_buffer_used_);
|
|
if (!submission_open_ || scratch_buffer_used_ || size == 0) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (size <= scratch_buffer_size_) {
|
|
PushTransitionBarrier(scratch_buffer_, scratch_buffer_state_, state);
|
|
scratch_buffer_state_ = state;
|
|
scratch_buffer_used_ = true;
|
|
return scratch_buffer_;
|
|
}
|
|
|
|
size = xe::align(size, kScratchBufferSizeIncrement);
|
|
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
ID3D12Device* device = provider.GetDevice();
|
|
D3D12_RESOURCE_DESC buffer_desc;
|
|
ui::d3d12::util::FillBufferResourceDesc(
|
|
buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
|
ID3D12Resource* buffer;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault,
|
|
provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, state, nullptr,
|
|
IID_PPV_ARGS(&buffer)))) {
|
|
XELOGE("Failed to create a {} MB scratch GPU buffer", size >> 20);
|
|
return nullptr;
|
|
}
|
|
if (scratch_buffer_ != nullptr) {
|
|
resources_for_deletion_.emplace_back(submission_current_, scratch_buffer_);
|
|
}
|
|
scratch_buffer_ = buffer;
|
|
scratch_buffer_size_ = size;
|
|
scratch_buffer_state_ = state;
|
|
scratch_buffer_used_ = true;
|
|
return scratch_buffer_;
|
|
}
|
|
|
|
void D3D12CommandProcessor::ReleaseScratchGPUBuffer(
|
|
ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state) {
|
|
assert_true(submission_open_);
|
|
assert_true(scratch_buffer_used_);
|
|
scratch_buffer_used_ = false;
|
|
if (buffer == scratch_buffer_) {
|
|
scratch_buffer_state_ = new_state;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetExternalPipeline(ID3D12PipelineState* pipeline) {
|
|
if (current_external_pipeline_ != pipeline) {
|
|
current_external_pipeline_ = pipeline;
|
|
current_guest_pipeline_ = nullptr;
|
|
deferred_command_list_.D3DSetPipelineState(pipeline);
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetExternalGraphicsRootSignature(
|
|
ID3D12RootSignature* root_signature) {
|
|
if (current_graphics_root_signature_ != root_signature) {
|
|
current_graphics_root_signature_ = root_signature;
|
|
deferred_command_list_.D3DSetGraphicsRootSignature(root_signature);
|
|
}
|
|
// Force-invalidate because setting a non-guest root signature.
|
|
current_graphics_root_up_to_date_ = 0;
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetViewport(const D3D12_VIEWPORT& viewport) {
|
|
ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX;
|
|
ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY;
|
|
ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width;
|
|
ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height;
|
|
ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth;
|
|
ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth;
|
|
if (ff_viewport_update_needed_) {
|
|
ff_viewport_ = viewport;
|
|
deferred_command_list_.RSSetViewport(ff_viewport_);
|
|
ff_viewport_update_needed_ = false;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetScissorRect(const D3D12_RECT& scissor_rect) {
|
|
ff_scissor_update_needed_ |= ff_scissor_.left != scissor_rect.left;
|
|
ff_scissor_update_needed_ |= ff_scissor_.top != scissor_rect.top;
|
|
ff_scissor_update_needed_ |= ff_scissor_.right != scissor_rect.right;
|
|
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor_rect.bottom;
|
|
if (ff_scissor_update_needed_) {
|
|
ff_scissor_ = scissor_rect;
|
|
deferred_command_list_.RSSetScissorRect(ff_scissor_);
|
|
ff_scissor_update_needed_ = false;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetStencilReference(uint32_t stencil_ref) {
|
|
ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref;
|
|
if (ff_stencil_ref_update_needed_) {
|
|
ff_stencil_ref_ = stencil_ref;
|
|
deferred_command_list_.D3DOMSetStencilRef(stencil_ref);
|
|
ff_stencil_ref_update_needed_ = false;
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::SetPrimitiveTopology(
|
|
D3D12_PRIMITIVE_TOPOLOGY primitive_topology) {
|
|
if (primitive_topology_ != primitive_topology) {
|
|
primitive_topology_ = primitive_topology;
|
|
deferred_command_list_.D3DIASetPrimitiveTopology(primitive_topology);
|
|
}
|
|
}
|
|
|
|
std::string D3D12CommandProcessor::GetWindowTitleText() const {
|
|
std::ostringstream title;
|
|
title << "Direct3D 12";
|
|
if (render_target_cache_) {
|
|
// Rasterizer-ordered views are a feature very rarely used as of 2020 and
|
|
// that faces adoption complications (outside of Direct3D - on Vulkan - at
|
|
// least), but crucial to Xenia - raise awareness of its usage.
|
|
// https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319
|
|
// "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I
|
|
// wasn't aware that Xenia D3D12 backend was using Raster Order Views
|
|
// feature" - oscarbg in that issue.
|
|
switch (render_target_cache_->GetPath()) {
|
|
case RenderTargetCache::Path::kHostRenderTargets:
|
|
title << " - RTV/DSV";
|
|
break;
|
|
case RenderTargetCache::Path::kPixelShaderInterlock:
|
|
title << " - ROV";
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
uint32_t draw_resolution_scale_x =
|
|
texture_cache_ ? texture_cache_->draw_resolution_scale_x() : 1;
|
|
uint32_t draw_resolution_scale_y =
|
|
texture_cache_ ? texture_cache_->draw_resolution_scale_y() : 1;
|
|
if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) {
|
|
title << ' ' << draw_resolution_scale_x << 'x' << draw_resolution_scale_y;
|
|
}
|
|
}
|
|
return title.str();
|
|
}
|
|
|
|
bool D3D12CommandProcessor::SetupContext() {
|
|
if (!CommandProcessor::SetupContext()) {
|
|
XELOGE("Failed to initialize base command processor context");
|
|
return false;
|
|
}
|
|
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
ID3D12Device* device = provider.GetDevice();
|
|
ID3D12CommandQueue* direct_queue = provider.GetDirectQueue();
|
|
|
|
fence_completion_event_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
|
|
if (fence_completion_event_ == nullptr) {
|
|
XELOGE("Failed to create the fence completion event");
|
|
return false;
|
|
}
|
|
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
|
|
IID_PPV_ARGS(&submission_fence_)))) {
|
|
XELOGE("Failed to create the submission fence");
|
|
return false;
|
|
}
|
|
if (FAILED(device->CreateFence(
|
|
0, D3D12_FENCE_FLAG_NONE,
|
|
IID_PPV_ARGS(&queue_operations_since_submission_fence_)))) {
|
|
XELOGE(
|
|
"Failed to create the fence for awaiting queue operations done since "
|
|
"the latest submission");
|
|
return false;
|
|
}
|
|
|
|
// Create the command list and one allocator because it's needed for a command
|
|
// list.
|
|
ID3D12CommandAllocator* command_allocator;
|
|
if (FAILED(device->CreateCommandAllocator(
|
|
D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&command_allocator)))) {
|
|
XELOGE("Failed to create a command allocator");
|
|
return false;
|
|
}
|
|
command_allocator_writable_first_ = new CommandAllocator;
|
|
command_allocator_writable_first_->command_allocator = command_allocator;
|
|
command_allocator_writable_first_->last_usage_submission = 0;
|
|
command_allocator_writable_first_->next = nullptr;
|
|
command_allocator_writable_last_ = command_allocator_writable_first_;
|
|
if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT,
|
|
command_allocator, nullptr,
|
|
IID_PPV_ARGS(&command_list_)))) {
|
|
XELOGE("Failed to create the graphics command list");
|
|
return false;
|
|
}
|
|
// Initially in open state, wait until a deferred command list submission.
|
|
command_list_->Close();
|
|
// Optional - added in Creators Update (SDK 10.0.15063.0).
|
|
command_list_->QueryInterface(IID_PPV_ARGS(&command_list_1_));
|
|
|
|
bindless_resources_used_ =
|
|
cvars::d3d12_bindless &&
|
|
provider.GetResourceBindingTier() >= D3D12_RESOURCE_BINDING_TIER_2;
|
|
|
|
// Get the draw resolution scale for the render target cache and the texture
|
|
// cache.
|
|
uint32_t draw_resolution_scale_x, draw_resolution_scale_y;
|
|
bool draw_resolution_scale_not_clamped =
|
|
TextureCache::GetConfigDrawResolutionScale(draw_resolution_scale_x,
|
|
draw_resolution_scale_y);
|
|
if (!D3D12TextureCache::ClampDrawResolutionScaleToMaxSupported(
|
|
draw_resolution_scale_x, draw_resolution_scale_y, provider)) {
|
|
draw_resolution_scale_not_clamped = false;
|
|
}
|
|
if (!draw_resolution_scale_not_clamped) {
|
|
XELOGW(
|
|
"The requested draw resolution scale is not supported by the device or "
|
|
"the emulator, reducing to {}x{}",
|
|
draw_resolution_scale_x, draw_resolution_scale_y);
|
|
}
|
|
|
|
shared_memory_ =
|
|
std::make_unique<D3D12SharedMemory>(*this, *memory_, trace_writer_);
|
|
if (!shared_memory_->Initialize()) {
|
|
XELOGE("Failed to initialize shared memory");
|
|
return false;
|
|
}
|
|
|
|
// Initialize the render target cache before configuring binding - need to
|
|
// know if using rasterizer-ordered views for the bindless root signature.
|
|
render_target_cache_ = std::make_unique<D3D12RenderTargetCache>(
|
|
*register_file_, *memory_, trace_writer_, draw_resolution_scale_x,
|
|
draw_resolution_scale_y, *this, bindless_resources_used_);
|
|
if (!render_target_cache_->Initialize()) {
|
|
XELOGE("Failed to initialize the render target cache");
|
|
return false;
|
|
}
|
|
|
|
// Initialize resource binding.
|
|
constant_buffer_pool_ = std::make_unique<ui::d3d12::D3D12UploadBufferPool>(
|
|
provider,
|
|
std::max(ui::d3d12::D3D12UploadBufferPool::kDefaultPageSize,
|
|
sizeof(float) * 4 * D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT));
|
|
if (bindless_resources_used_) {
|
|
D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc;
|
|
view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
view_bindless_heap_desc.NumDescriptors = kViewBindlessHeapSize;
|
|
view_bindless_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
view_bindless_heap_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&view_bindless_heap_desc, IID_PPV_ARGS(&view_bindless_heap_)))) {
|
|
XELOGE("Failed to create the bindless CBV/SRV/UAV descriptor heap");
|
|
return false;
|
|
}
|
|
view_bindless_heap_cpu_start_ =
|
|
view_bindless_heap_->GetCPUDescriptorHandleForHeapStart();
|
|
view_bindless_heap_gpu_start_ =
|
|
view_bindless_heap_->GetGPUDescriptorHandleForHeapStart();
|
|
view_bindless_heap_allocated_ = uint32_t(SystemBindlessView::kCount);
|
|
|
|
D3D12_DESCRIPTOR_HEAP_DESC sampler_bindless_heap_desc;
|
|
sampler_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
|
|
sampler_bindless_heap_desc.NumDescriptors = kSamplerHeapSize;
|
|
sampler_bindless_heap_desc.Flags =
|
|
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
sampler_bindless_heap_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&sampler_bindless_heap_desc,
|
|
IID_PPV_ARGS(&sampler_bindless_heap_current_)))) {
|
|
XELOGE("Failed to create the bindless sampler descriptor heap");
|
|
return false;
|
|
}
|
|
sampler_bindless_heap_cpu_start_ =
|
|
sampler_bindless_heap_current_->GetCPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_gpu_start_ =
|
|
sampler_bindless_heap_current_->GetGPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
} else {
|
|
view_bindful_heap_pool_ =
|
|
std::make_unique<ui::d3d12::D3D12DescriptorHeapPool>(
|
|
device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
|
kViewBindfulHeapSize);
|
|
sampler_bindful_heap_pool_ =
|
|
std::make_unique<ui::d3d12::D3D12DescriptorHeapPool>(
|
|
device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, kSamplerHeapSize);
|
|
}
|
|
|
|
if (bindless_resources_used_) {
|
|
// Global bindless resource root signatures.
|
|
// No CBV or UAV descriptor ranges with any descriptors to be allocated
|
|
// dynamically (via RequestPersistentViewBindlessDescriptor or
|
|
// RequestOneUseSingleViewDescriptors) should be here, because they would
|
|
// overlap the unbounded SRV range, which is not allowed on Nvidia Fermi!
|
|
D3D12_ROOT_SIGNATURE_DESC root_signature_bindless_desc;
|
|
D3D12_ROOT_PARAMETER
|
|
root_parameters_bindless[kRootParameter_Bindless_Count];
|
|
root_signature_bindless_desc.NumParameters = kRootParameter_Bindless_Count;
|
|
root_signature_bindless_desc.pParameters = root_parameters_bindless;
|
|
root_signature_bindless_desc.NumStaticSamplers = 0;
|
|
root_signature_bindless_desc.pStaticSamplers = nullptr;
|
|
root_signature_bindless_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
// Fetch constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_FetchConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
// Vertex float constants.
|
|
{
|
|
auto& parameter = root_parameters_bindless
|
|
[kRootParameter_Bindless_FloatConstantsVertex];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
|
}
|
|
// Pixel float constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_FloatConstantsPixel];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
}
|
|
// Pixel shader descriptor indices.
|
|
{
|
|
auto& parameter = root_parameters_bindless
|
|
[kRootParameter_Bindless_DescriptorIndicesPixel];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
|
}
|
|
// Vertex shader descriptor indices.
|
|
{
|
|
auto& parameter = root_parameters_bindless
|
|
[kRootParameter_Bindless_DescriptorIndicesVertex];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
|
}
|
|
// System constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_SystemConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
// Bool and loop constants.
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_BoolLoopConstants];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
|
parameter.Descriptor.ShaderRegister =
|
|
uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants);
|
|
parameter.Descriptor.RegisterSpace = 0;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
// Sampler heap.
|
|
D3D12_DESCRIPTOR_RANGE root_bindless_sampler_range;
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_SamplerHeap];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
// Will be appending.
|
|
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
|
parameter.DescriptorTable.pDescriptorRanges =
|
|
&root_bindless_sampler_range;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
root_bindless_sampler_range.RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
|
|
root_bindless_sampler_range.NumDescriptors = UINT_MAX;
|
|
root_bindless_sampler_range.BaseShaderRegister = 0;
|
|
root_bindless_sampler_range.RegisterSpace = 0;
|
|
root_bindless_sampler_range.OffsetInDescriptorsFromTableStart = 0;
|
|
}
|
|
// View heap.
|
|
D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[6];
|
|
{
|
|
auto& parameter =
|
|
root_parameters_bindless[kRootParameter_Bindless_ViewHeap];
|
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
// Will be appending.
|
|
parameter.DescriptorTable.NumDescriptorRanges = 0;
|
|
parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges;
|
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
// Shared memory SRV.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = 1;
|
|
range.BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory);
|
|
range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain);
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kSharedMemoryRawSRV);
|
|
}
|
|
// Shared memory UAV.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
range.NumDescriptors = 1;
|
|
range.BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
|
|
range.RegisterSpace = 0;
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kSharedMemoryRawUAV);
|
|
}
|
|
// EDRAM.
|
|
if (render_target_cache_->GetPath() ==
|
|
RenderTargetCache::Path::kPixelShaderInterlock) {
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
range.NumDescriptors = 1;
|
|
range.BaseShaderRegister =
|
|
UINT(DxbcShaderTranslator::UAVRegister::kEdram);
|
|
range.RegisterSpace = 0;
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kEdramR32UintUAV);
|
|
}
|
|
// Used UAV and SRV ranges must not overlap on Nvidia Fermi, so textures
|
|
// have OffsetInDescriptorsFromTableStart after all static descriptors of
|
|
// other types.
|
|
// 2D array textures.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = UINT_MAX;
|
|
range.BaseShaderRegister = 0;
|
|
range.RegisterSpace =
|
|
UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures2DArray);
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kUnboundedSRVsStart);
|
|
}
|
|
// 3D textures.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = UINT_MAX;
|
|
range.BaseShaderRegister = 0;
|
|
range.RegisterSpace =
|
|
UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures3D);
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kUnboundedSRVsStart);
|
|
}
|
|
// Cube textures.
|
|
{
|
|
assert_true(parameter.DescriptorTable.NumDescriptorRanges <
|
|
xe::countof(root_bindless_view_ranges));
|
|
auto& range = root_bindless_view_ranges[parameter.DescriptorTable
|
|
.NumDescriptorRanges++];
|
|
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
range.NumDescriptors = UINT_MAX;
|
|
range.BaseShaderRegister = 0;
|
|
range.RegisterSpace =
|
|
UINT(DxbcShaderTranslator::SRVSpace::kBindlessTexturesCube);
|
|
range.OffsetInDescriptorsFromTableStart =
|
|
UINT(SystemBindlessView::kUnboundedSRVsStart);
|
|
}
|
|
}
|
|
root_signature_bindless_vs_ = ui::d3d12::util::CreateRootSignature(
|
|
provider, root_signature_bindless_desc);
|
|
if (!root_signature_bindless_vs_) {
|
|
XELOGE(
|
|
"Failed to create the global root signature for bindless resources, "
|
|
"the version for use without tessellation");
|
|
return false;
|
|
}
|
|
root_parameters_bindless[kRootParameter_Bindless_FloatConstantsVertex]
|
|
.ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN;
|
|
root_parameters_bindless[kRootParameter_Bindless_DescriptorIndicesVertex]
|
|
.ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN;
|
|
root_signature_bindless_ds_ = ui::d3d12::util::CreateRootSignature(
|
|
provider, root_signature_bindless_desc);
|
|
if (!root_signature_bindless_ds_) {
|
|
XELOGE(
|
|
"Failed to create the global root signature for bindless resources, "
|
|
"the version for use with tessellation");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
primitive_processor_ = std::make_unique<D3D12PrimitiveProcessor>(
|
|
*register_file_, *memory_, trace_writer_, *shared_memory_, *this);
|
|
if (!primitive_processor_->Initialize()) {
|
|
XELOGE("Failed to initialize the geometric primitive processor");
|
|
return false;
|
|
}
|
|
|
|
texture_cache_ = D3D12TextureCache::Create(
|
|
*register_file_, *shared_memory_, draw_resolution_scale_x,
|
|
draw_resolution_scale_y, *this, bindless_resources_used_);
|
|
if (!texture_cache_) {
|
|
XELOGE("Failed to initialize the texture cache");
|
|
return false;
|
|
}
|
|
|
|
pipeline_cache_ = std::make_unique<PipelineCache>(*this, *register_file_,
|
|
*render_target_cache_.get(),
|
|
bindless_resources_used_);
|
|
if (!pipeline_cache_->Initialize()) {
|
|
XELOGE("Failed to initialize the graphics pipeline cache");
|
|
return false;
|
|
}
|
|
|
|
D3D12_HEAP_FLAGS heap_flag_create_not_zeroed =
|
|
provider.GetHeapFlagCreateNotZeroed();
|
|
|
|
// Create gamma ramp resources.
|
|
gamma_ramp_256_entry_table_up_to_date_ = false;
|
|
gamma_ramp_pwl_up_to_date_ = false;
|
|
D3D12_RESOURCE_DESC gamma_ramp_buffer_desc;
|
|
ui::d3d12::util::FillBufferResourceDesc(
|
|
gamma_ramp_buffer_desc, (256 + 128 * 3) * 4, D3D12_RESOURCE_FLAG_NONE);
|
|
// The first action will be uploading.
|
|
gamma_ramp_buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed,
|
|
&gamma_ramp_buffer_desc, gamma_ramp_buffer_state_, nullptr,
|
|
IID_PPV_ARGS(&gamma_ramp_buffer_)))) {
|
|
XELOGE("Failed to create the gamma ramp buffer");
|
|
return false;
|
|
}
|
|
// The upload buffer is frame-buffered.
|
|
gamma_ramp_buffer_desc.Width *= kQueueFrames;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed,
|
|
&gamma_ramp_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
|
|
IID_PPV_ARGS(&gamma_ramp_upload_buffer_)))) {
|
|
XELOGE("Failed to create the gamma ramp upload buffer");
|
|
return false;
|
|
}
|
|
if (FAILED(gamma_ramp_upload_buffer_->Map(
|
|
0, nullptr,
|
|
reinterpret_cast<void**>(&gamma_ramp_upload_buffer_mapping_)))) {
|
|
XELOGE("Failed to map the gamma ramp upload buffer");
|
|
gamma_ramp_upload_buffer_mapping_ = nullptr;
|
|
return false;
|
|
}
|
|
|
|
// Initialize compute pipelines for output with gamma ramp.
|
|
D3D12_ROOT_PARAMETER
|
|
apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kCount)];
|
|
{
|
|
D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_constants =
|
|
apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kConstants)];
|
|
apply_gamma_root_parameter_constants.ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
apply_gamma_root_parameter_constants.Constants.ShaderRegister = 0;
|
|
apply_gamma_root_parameter_constants.Constants.RegisterSpace = 0;
|
|
apply_gamma_root_parameter_constants.Constants.Num32BitValues =
|
|
sizeof(ApplyGammaConstants) / sizeof(uint32_t);
|
|
apply_gamma_root_parameter_constants.ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
D3D12_DESCRIPTOR_RANGE apply_gamma_root_descriptor_range_dest;
|
|
apply_gamma_root_descriptor_range_dest.RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
apply_gamma_root_descriptor_range_dest.NumDescriptors = 1;
|
|
apply_gamma_root_descriptor_range_dest.BaseShaderRegister = 0;
|
|
apply_gamma_root_descriptor_range_dest.RegisterSpace = 0;
|
|
apply_gamma_root_descriptor_range_dest.OffsetInDescriptorsFromTableStart = 0;
|
|
{
|
|
D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_dest =
|
|
apply_gamma_root_parameters[UINT(
|
|
ApplyGammaRootParameter::kDestination)];
|
|
apply_gamma_root_parameter_dest.ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
apply_gamma_root_parameter_dest.DescriptorTable.NumDescriptorRanges = 1;
|
|
apply_gamma_root_parameter_dest.DescriptorTable.pDescriptorRanges =
|
|
&apply_gamma_root_descriptor_range_dest;
|
|
apply_gamma_root_parameter_dest.ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
D3D12_DESCRIPTOR_RANGE apply_gamma_root_descriptor_range_source;
|
|
apply_gamma_root_descriptor_range_source.RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
apply_gamma_root_descriptor_range_source.NumDescriptors = 1;
|
|
apply_gamma_root_descriptor_range_source.BaseShaderRegister = 1;
|
|
apply_gamma_root_descriptor_range_source.RegisterSpace = 0;
|
|
apply_gamma_root_descriptor_range_source.OffsetInDescriptorsFromTableStart =
|
|
0;
|
|
{
|
|
D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_source =
|
|
apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kSource)];
|
|
apply_gamma_root_parameter_source.ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
apply_gamma_root_parameter_source.DescriptorTable.NumDescriptorRanges = 1;
|
|
apply_gamma_root_parameter_source.DescriptorTable.pDescriptorRanges =
|
|
&apply_gamma_root_descriptor_range_source;
|
|
apply_gamma_root_parameter_source.ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
D3D12_DESCRIPTOR_RANGE apply_gamma_root_descriptor_range_ramp;
|
|
apply_gamma_root_descriptor_range_ramp.RangeType =
|
|
D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
apply_gamma_root_descriptor_range_ramp.NumDescriptors = 1;
|
|
apply_gamma_root_descriptor_range_ramp.BaseShaderRegister = 0;
|
|
apply_gamma_root_descriptor_range_ramp.RegisterSpace = 0;
|
|
apply_gamma_root_descriptor_range_ramp.OffsetInDescriptorsFromTableStart = 0;
|
|
{
|
|
D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_gamma_ramp =
|
|
apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kRamp)];
|
|
apply_gamma_root_parameter_gamma_ramp.ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
apply_gamma_root_parameter_gamma_ramp.DescriptorTable.NumDescriptorRanges =
|
|
1;
|
|
apply_gamma_root_parameter_gamma_ramp.DescriptorTable.pDescriptorRanges =
|
|
&apply_gamma_root_descriptor_range_ramp;
|
|
apply_gamma_root_parameter_gamma_ramp.ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
D3D12_ROOT_SIGNATURE_DESC apply_gamma_root_signature_desc;
|
|
apply_gamma_root_signature_desc.NumParameters =
|
|
UINT(ApplyGammaRootParameter::kCount);
|
|
apply_gamma_root_signature_desc.pParameters = apply_gamma_root_parameters;
|
|
apply_gamma_root_signature_desc.NumStaticSamplers = 0;
|
|
apply_gamma_root_signature_desc.pStaticSamplers = nullptr;
|
|
apply_gamma_root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
*(apply_gamma_root_signature_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateRootSignature(provider,
|
|
apply_gamma_root_signature_desc);
|
|
if (!apply_gamma_root_signature_) {
|
|
XELOGE("Failed to create the gamma ramp application root signature");
|
|
return false;
|
|
}
|
|
*(apply_gamma_table_pipeline_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateComputePipeline(
|
|
device, shaders::apply_gamma_table_cs,
|
|
sizeof(shaders::apply_gamma_table_cs),
|
|
apply_gamma_root_signature_.Get());
|
|
if (!apply_gamma_table_pipeline_) {
|
|
XELOGE(
|
|
"Failed to create the 256-entry table gamma ramp application compute "
|
|
"pipeline");
|
|
return false;
|
|
}
|
|
*(apply_gamma_table_fxaa_luma_pipeline_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateComputePipeline(
|
|
device, shaders::apply_gamma_table_fxaa_luma_cs,
|
|
sizeof(shaders::apply_gamma_table_fxaa_luma_cs),
|
|
apply_gamma_root_signature_.Get());
|
|
if (!apply_gamma_table_fxaa_luma_pipeline_) {
|
|
XELOGE(
|
|
"Failed to create the 256-entry table gamma ramp application compute "
|
|
"pipeline with perceptual luma output");
|
|
return false;
|
|
}
|
|
*(apply_gamma_pwl_pipeline_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateComputePipeline(
|
|
device, shaders::apply_gamma_pwl_cs,
|
|
sizeof(shaders::apply_gamma_pwl_cs),
|
|
apply_gamma_root_signature_.Get());
|
|
if (!apply_gamma_pwl_pipeline_) {
|
|
XELOGE("Failed to create the PWL gamma ramp application compute pipeline");
|
|
return false;
|
|
}
|
|
*(apply_gamma_pwl_fxaa_luma_pipeline_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateComputePipeline(
|
|
device, shaders::apply_gamma_pwl_fxaa_luma_cs,
|
|
sizeof(shaders::apply_gamma_pwl_fxaa_luma_cs),
|
|
apply_gamma_root_signature_.Get());
|
|
if (!apply_gamma_pwl_fxaa_luma_pipeline_) {
|
|
XELOGE(
|
|
"Failed to create the PWL gamma ramp application compute pipeline with "
|
|
"perceptual luma output");
|
|
return false;
|
|
}
|
|
|
|
// Initialize compute pipelines for post-processing anti-aliasing.
|
|
D3D12_ROOT_PARAMETER fxaa_root_parameters[UINT(FxaaRootParameter::kCount)];
|
|
{
|
|
D3D12_ROOT_PARAMETER& fxaa_root_parameter_constants =
|
|
fxaa_root_parameters[UINT(ApplyGammaRootParameter::kConstants)];
|
|
fxaa_root_parameter_constants.ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
fxaa_root_parameter_constants.Constants.ShaderRegister = 0;
|
|
fxaa_root_parameter_constants.Constants.RegisterSpace = 0;
|
|
fxaa_root_parameter_constants.Constants.Num32BitValues =
|
|
sizeof(FxaaConstants) / sizeof(uint32_t);
|
|
fxaa_root_parameter_constants.ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
D3D12_DESCRIPTOR_RANGE fxaa_root_descriptor_range_dest;
|
|
fxaa_root_descriptor_range_dest.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
fxaa_root_descriptor_range_dest.NumDescriptors = 1;
|
|
fxaa_root_descriptor_range_dest.BaseShaderRegister = 0;
|
|
fxaa_root_descriptor_range_dest.RegisterSpace = 0;
|
|
fxaa_root_descriptor_range_dest.OffsetInDescriptorsFromTableStart = 0;
|
|
{
|
|
D3D12_ROOT_PARAMETER& fxaa_root_parameter_dest =
|
|
fxaa_root_parameters[UINT(FxaaRootParameter::kDestination)];
|
|
fxaa_root_parameter_dest.ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
fxaa_root_parameter_dest.DescriptorTable.NumDescriptorRanges = 1;
|
|
fxaa_root_parameter_dest.DescriptorTable.pDescriptorRanges =
|
|
&fxaa_root_descriptor_range_dest;
|
|
fxaa_root_parameter_dest.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
D3D12_DESCRIPTOR_RANGE fxaa_root_descriptor_range_source;
|
|
fxaa_root_descriptor_range_source.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
fxaa_root_descriptor_range_source.NumDescriptors = 1;
|
|
fxaa_root_descriptor_range_source.BaseShaderRegister = 0;
|
|
fxaa_root_descriptor_range_source.RegisterSpace = 0;
|
|
fxaa_root_descriptor_range_source.OffsetInDescriptorsFromTableStart = 0;
|
|
{
|
|
D3D12_ROOT_PARAMETER& fxaa_root_parameter_source =
|
|
fxaa_root_parameters[UINT(FxaaRootParameter::kSource)];
|
|
fxaa_root_parameter_source.ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
fxaa_root_parameter_source.DescriptorTable.NumDescriptorRanges = 1;
|
|
fxaa_root_parameter_source.DescriptorTable.pDescriptorRanges =
|
|
&fxaa_root_descriptor_range_source;
|
|
fxaa_root_parameter_source.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
}
|
|
D3D12_STATIC_SAMPLER_DESC fxaa_root_sampler;
|
|
fxaa_root_sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
|
|
fxaa_root_sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
fxaa_root_sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
fxaa_root_sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
fxaa_root_sampler.MipLODBias = 0.0f;
|
|
fxaa_root_sampler.MaxAnisotropy = 1;
|
|
fxaa_root_sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
|
|
fxaa_root_sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK;
|
|
fxaa_root_sampler.MinLOD = 0.0f;
|
|
fxaa_root_sampler.MaxLOD = 0.0f;
|
|
fxaa_root_sampler.ShaderRegister = 0;
|
|
fxaa_root_sampler.RegisterSpace = 0;
|
|
fxaa_root_sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
D3D12_ROOT_SIGNATURE_DESC fxaa_root_signature_desc;
|
|
fxaa_root_signature_desc.NumParameters = UINT(FxaaRootParameter::kCount);
|
|
fxaa_root_signature_desc.pParameters = fxaa_root_parameters;
|
|
fxaa_root_signature_desc.NumStaticSamplers = 1;
|
|
fxaa_root_signature_desc.pStaticSamplers = &fxaa_root_sampler;
|
|
fxaa_root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
*(fxaa_root_signature_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateRootSignature(provider, fxaa_root_signature_desc);
|
|
if (!fxaa_root_signature_) {
|
|
XELOGE("Failed to create the FXAA root signature");
|
|
return false;
|
|
}
|
|
*(fxaa_pipeline_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateComputePipeline(device, shaders::fxaa_cs,
|
|
sizeof(shaders::fxaa_cs),
|
|
fxaa_root_signature_.Get());
|
|
if (!fxaa_pipeline_) {
|
|
XELOGE("Failed to create the FXAA compute pipeline");
|
|
return false;
|
|
}
|
|
*(fxaa_extreme_pipeline_.ReleaseAndGetAddressOf()) =
|
|
ui::d3d12::util::CreateComputePipeline(device, shaders::fxaa_extreme_cs,
|
|
sizeof(shaders::fxaa_extreme_cs),
|
|
fxaa_root_signature_.Get());
|
|
if (!fxaa_pipeline_) {
|
|
XELOGE("Failed to create the extreme-quality FXAA compute pipeline");
|
|
return false;
|
|
}
|
|
|
|
if (bindless_resources_used_) {
|
|
// Create the system bindless descriptors once all resources are
|
|
// initialized.
|
|
// kNullTexture2DArray.
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc;
|
|
null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
null_srv_desc.Shader4ComponentMapping =
|
|
D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
|
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
|
|
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
|
|
null_srv_desc.Texture2DArray.MostDetailedMip = 0;
|
|
null_srv_desc.Texture2DArray.MipLevels = 1;
|
|
null_srv_desc.Texture2DArray.FirstArraySlice = 0;
|
|
null_srv_desc.Texture2DArray.ArraySize = 1;
|
|
null_srv_desc.Texture2DArray.PlaneSlice = 0;
|
|
null_srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(
|
|
nullptr, &null_srv_desc,
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kNullTexture2DArray)));
|
|
// kNullTexture3D.
|
|
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
|
|
null_srv_desc.Texture3D.MostDetailedMip = 0;
|
|
null_srv_desc.Texture3D.MipLevels = 1;
|
|
null_srv_desc.Texture3D.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(
|
|
nullptr, &null_srv_desc,
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kNullTexture3D)));
|
|
// kNullTextureCube.
|
|
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
|
|
null_srv_desc.TextureCube.MostDetailedMip = 0;
|
|
null_srv_desc.TextureCube.MipLevels = 1;
|
|
null_srv_desc.TextureCube.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(
|
|
nullptr, &null_srv_desc,
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kNullTextureCube)));
|
|
// kSharedMemoryRawSRV.
|
|
shared_memory_->WriteRawSRVDescriptor(provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryRawSRV)));
|
|
// kSharedMemoryR32UintSRV.
|
|
shared_memory_->WriteUintPow2SRVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryR32UintSRV)),
|
|
2);
|
|
// kSharedMemoryR32G32UintSRV.
|
|
shared_memory_->WriteUintPow2SRVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryR32G32UintSRV)),
|
|
3);
|
|
// kSharedMemoryR32G32B32A32UintSRV.
|
|
shared_memory_->WriteUintPow2SRVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryR32G32B32A32UintSRV)),
|
|
4);
|
|
// kSharedMemoryRawUAV.
|
|
shared_memory_->WriteRawUAVDescriptor(provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryRawUAV)));
|
|
// kSharedMemoryR32UintUAV.
|
|
shared_memory_->WriteUintPow2UAVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryR32UintUAV)),
|
|
2);
|
|
// kSharedMemoryR32G32UintUAV.
|
|
shared_memory_->WriteUintPow2UAVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryR32G32UintUAV)),
|
|
3);
|
|
// kSharedMemoryR32G32B32A32UintUAV.
|
|
shared_memory_->WriteUintPow2UAVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kSharedMemoryR32G32B32A32UintUAV)),
|
|
4);
|
|
// kEdramRawSRV.
|
|
render_target_cache_->WriteEdramRawSRVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramRawSRV)));
|
|
// kEdramR32UintSRV.
|
|
render_target_cache_->WriteEdramUintPow2SRVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramR32UintSRV)),
|
|
2);
|
|
// kEdramR32G32UintSRV.
|
|
render_target_cache_->WriteEdramUintPow2SRVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramR32G32UintSRV)),
|
|
3);
|
|
// kEdramR32G32B32A32UintSRV.
|
|
render_target_cache_->WriteEdramUintPow2SRVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramR32G32B32A32UintSRV)),
|
|
4);
|
|
// kEdramRawUAV.
|
|
render_target_cache_->WriteEdramRawUAVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramRawUAV)));
|
|
// kEdramR32UintUAV.
|
|
render_target_cache_->WriteEdramUintPow2UAVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramR32UintUAV)),
|
|
2);
|
|
// kEdramR32G32UintUAV.
|
|
render_target_cache_->WriteEdramUintPow2UAVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramR32G32UintUAV)),
|
|
3);
|
|
// kEdramR32G32B32A32UintUAV.
|
|
render_target_cache_->WriteEdramUintPow2UAVDescriptor(
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kEdramR32G32B32A32UintUAV)),
|
|
4);
|
|
// kGammaRampTableSRV.
|
|
WriteGammaRampSRV(false,
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kGammaRampTableSRV)));
|
|
// kGammaRampPWLSRV.
|
|
WriteGammaRampSRV(true,
|
|
provider.OffsetViewDescriptor(
|
|
view_bindless_heap_cpu_start_,
|
|
uint32_t(SystemBindlessView::kGammaRampPWLSRV)));
|
|
}
|
|
|
|
pix_capture_requested_.store(false, std::memory_order_relaxed);
|
|
pix_capturing_ = false;
|
|
|
|
// Just not to expose uninitialized memory.
|
|
std::memset(&system_constants_, 0, sizeof(system_constants_));
|
|
|
|
return true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::ShutdownContext() {
|
|
AwaitAllQueueOperationsCompletion();
|
|
|
|
ui::d3d12::util::ReleaseAndNull(readback_buffer_);
|
|
readback_buffer_size_ = 0;
|
|
|
|
ui::d3d12::util::ReleaseAndNull(scratch_buffer_);
|
|
scratch_buffer_size_ = 0;
|
|
|
|
for (const std::pair<uint64_t, ID3D12Resource*>& resource_for_deletion :
|
|
resources_for_deletion_) {
|
|
resource_for_deletion.second->Release();
|
|
}
|
|
resources_for_deletion_.clear();
|
|
|
|
fxaa_source_texture_submission_ = 0;
|
|
fxaa_source_texture_.Reset();
|
|
|
|
fxaa_extreme_pipeline_.Reset();
|
|
fxaa_pipeline_.Reset();
|
|
fxaa_root_signature_.Reset();
|
|
|
|
apply_gamma_pwl_fxaa_luma_pipeline_.Reset();
|
|
apply_gamma_pwl_pipeline_.Reset();
|
|
apply_gamma_table_fxaa_luma_pipeline_.Reset();
|
|
apply_gamma_table_pipeline_.Reset();
|
|
apply_gamma_root_signature_.Reset();
|
|
|
|
// Unmapping will be done implicitly by the destruction.
|
|
gamma_ramp_upload_buffer_mapping_ = nullptr;
|
|
gamma_ramp_upload_buffer_.Reset();
|
|
gamma_ramp_buffer_.Reset();
|
|
|
|
texture_cache_.reset();
|
|
|
|
pipeline_cache_.reset();
|
|
|
|
primitive_processor_.reset();
|
|
|
|
// Shut down binding - bindless descriptors may be owned by subsystems like
|
|
// the texture cache.
|
|
|
|
// Root signatures are used by pipelines, thus freed after the pipelines.
|
|
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_);
|
|
ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_);
|
|
for (auto it : root_signatures_bindful_) {
|
|
it.second->Release();
|
|
}
|
|
root_signatures_bindful_.clear();
|
|
|
|
if (bindless_resources_used_) {
|
|
texture_cache_bindless_sampler_map_.clear();
|
|
for (const auto& sampler_bindless_heap_overflowed :
|
|
sampler_bindless_heaps_overflowed_) {
|
|
sampler_bindless_heap_overflowed.first->Release();
|
|
}
|
|
sampler_bindless_heaps_overflowed_.clear();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
ui::d3d12::util::ReleaseAndNull(sampler_bindless_heap_current_);
|
|
view_bindless_one_use_descriptors_.clear();
|
|
view_bindless_heap_free_.clear();
|
|
ui::d3d12::util::ReleaseAndNull(view_bindless_heap_);
|
|
} else {
|
|
sampler_bindful_heap_pool_.reset();
|
|
view_bindful_heap_pool_.reset();
|
|
}
|
|
constant_buffer_pool_.reset();
|
|
|
|
render_target_cache_.reset();
|
|
|
|
shared_memory_.reset();
|
|
|
|
deferred_command_list_.Reset();
|
|
ui::d3d12::util::ReleaseAndNull(command_list_1_);
|
|
ui::d3d12::util::ReleaseAndNull(command_list_);
|
|
ClearCommandAllocatorCache();
|
|
|
|
frame_open_ = false;
|
|
frame_current_ = 1;
|
|
frame_completed_ = 0;
|
|
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
|
|
|
|
// First release the fences since they may reference fence_completion_event_.
|
|
|
|
queue_operations_done_since_submission_signal_ = false;
|
|
queue_operations_since_submission_fence_last_ = 0;
|
|
ui::d3d12::util::ReleaseAndNull(queue_operations_since_submission_fence_);
|
|
|
|
ui::d3d12::util::ReleaseAndNull(submission_fence_);
|
|
submission_open_ = false;
|
|
submission_current_ = 1;
|
|
submission_completed_ = 0;
|
|
|
|
if (fence_completion_event_) {
|
|
CloseHandle(fence_completion_event_);
|
|
fence_completion_event_ = nullptr;
|
|
}
|
|
|
|
device_removed_ = false;
|
|
|
|
CommandProcessor::ShutdownContext();
|
|
}
|
|
|
|
void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|
CommandProcessor::WriteRegister(index, value);
|
|
|
|
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
|
|
if (frame_open_) {
|
|
uint32_t float_constant_index =
|
|
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
|
|
if (float_constant_index >= 256) {
|
|
float_constant_index -= 256;
|
|
if (current_float_constant_map_pixel_[float_constant_index >> 6] &
|
|
(1ull << (float_constant_index & 63))) {
|
|
cbuffer_binding_float_pixel_.up_to_date = false;
|
|
}
|
|
} else {
|
|
if (current_float_constant_map_vertex_[float_constant_index >> 6] &
|
|
(1ull << (float_constant_index & 63))) {
|
|
cbuffer_binding_float_vertex_.up_to_date = false;
|
|
}
|
|
}
|
|
}
|
|
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
|
|
cbuffer_binding_bool_loop_.up_to_date = false;
|
|
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) {
|
|
cbuffer_binding_fetch_.up_to_date = false;
|
|
if (texture_cache_ != nullptr) {
|
|
texture_cache_->TextureFetchConstantWritten(
|
|
(index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6);
|
|
}
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::OnGammaRamp256EntryTableValueWritten() {
|
|
gamma_ramp_256_entry_table_up_to_date_ = false;
|
|
}
|
|
|
|
void D3D12CommandProcessor::OnGammaRampPWLValueWritten() {
|
|
gamma_ramp_pwl_up_to_date_ = false;
|
|
}
|
|
|
|
void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
|
|
uint32_t frontbuffer_width,
|
|
uint32_t frontbuffer_height) {
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
ui::Presenter* presenter = graphics_system_->presenter();
|
|
if (!presenter) {
|
|
return;
|
|
}
|
|
|
|
// In case the swap command is the only one in the frame.
|
|
if (!BeginSubmission(true)) {
|
|
return;
|
|
}
|
|
|
|
// Obtain the actual front buffer size to pass to RefreshGuestOutput,
|
|
// resolution-scaled if it's a resolve destination, or not otherwise.
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC swap_texture_srv_desc;
|
|
xenos::TextureFormat frontbuffer_format;
|
|
ID3D12Resource* swap_texture_resource = texture_cache_->RequestSwapTexture(
|
|
swap_texture_srv_desc, frontbuffer_format);
|
|
if (!swap_texture_resource) {
|
|
return;
|
|
}
|
|
D3D12_RESOURCE_DESC swap_texture_desc = swap_texture_resource->GetDesc();
|
|
|
|
presenter->RefreshGuestOutput(
|
|
uint32_t(swap_texture_desc.Width), uint32_t(swap_texture_desc.Height),
|
|
1280, 720,
|
|
[this, &swap_texture_srv_desc, frontbuffer_format, swap_texture_resource,
|
|
&swap_texture_desc](
|
|
ui::Presenter::GuestOutputRefreshContext& context) -> bool {
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
ID3D12Device* device = provider.GetDevice();
|
|
|
|
SwapPostEffect swap_post_effect = GetActualSwapPostEffect();
|
|
bool use_fxaa = swap_post_effect == SwapPostEffect::kFxaa ||
|
|
swap_post_effect == SwapPostEffect::kFxaaExtreme;
|
|
if (use_fxaa) {
|
|
// Make sure the texture of the correct size is available for FXAA.
|
|
if (fxaa_source_texture_) {
|
|
D3D12_RESOURCE_DESC fxaa_source_texture_desc =
|
|
fxaa_source_texture_->GetDesc();
|
|
if (fxaa_source_texture_desc.Width != swap_texture_desc.Width ||
|
|
fxaa_source_texture_desc.Height != swap_texture_desc.Height) {
|
|
if (submission_completed_ < fxaa_source_texture_submission_) {
|
|
fxaa_source_texture_->AddRef();
|
|
resources_for_deletion_.emplace_back(
|
|
fxaa_source_texture_submission_,
|
|
fxaa_source_texture_.Get());
|
|
}
|
|
fxaa_source_texture_.Reset();
|
|
fxaa_source_texture_submission_ = 0;
|
|
}
|
|
}
|
|
if (!fxaa_source_texture_) {
|
|
D3D12_RESOURCE_DESC fxaa_source_texture_desc;
|
|
fxaa_source_texture_desc.Dimension =
|
|
D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
fxaa_source_texture_desc.Alignment = 0;
|
|
fxaa_source_texture_desc.Width = swap_texture_desc.Width;
|
|
fxaa_source_texture_desc.Height = swap_texture_desc.Height;
|
|
fxaa_source_texture_desc.DepthOrArraySize = 1;
|
|
fxaa_source_texture_desc.MipLevels = 1;
|
|
fxaa_source_texture_desc.Format = kFxaaSourceTextureFormat;
|
|
fxaa_source_texture_desc.SampleDesc.Count = 1;
|
|
fxaa_source_texture_desc.SampleDesc.Quality = 0;
|
|
fxaa_source_texture_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
fxaa_source_texture_desc.Flags =
|
|
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault,
|
|
provider.GetHeapFlagCreateNotZeroed(),
|
|
&fxaa_source_texture_desc,
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr,
|
|
IID_PPV_ARGS(&fxaa_source_texture_)))) {
|
|
XELOGE("Failed to create the FXAA input texture");
|
|
swap_post_effect = SwapPostEffect::kNone;
|
|
use_fxaa = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// This is according to D3D::InitializePresentationParameters from a
|
|
// game executable, which initializes the 256-entry table gamma ramp for
|
|
// 8_8_8_8 output and the PWL gamma ramp for 2_10_10_10.
|
|
// TODO(Triang3l): Choose between the table and PWL based on
|
|
// DC_LUTA_CONTROL, support both for all formats (and also different
|
|
// increments for PWL).
|
|
bool use_pwl_gamma_ramp =
|
|
frontbuffer_format == xenos::TextureFormat::k_2_10_10_10 ||
|
|
frontbuffer_format ==
|
|
xenos::TextureFormat::k_2_10_10_10_AS_16_16_16_16;
|
|
|
|
context.SetIs8bpc(!use_pwl_gamma_ramp && !use_fxaa);
|
|
|
|
// Upload the new gamma ramp, using the upload buffer for the current
|
|
// frame (will close the frame after this anyway, so can't write
|
|
// multiple times per frame).
|
|
if (!(use_pwl_gamma_ramp ? gamma_ramp_pwl_up_to_date_
|
|
: gamma_ramp_256_entry_table_up_to_date_)) {
|
|
uint32_t gamma_ramp_offset_bytes = use_pwl_gamma_ramp ? 256 * 4 : 0;
|
|
uint32_t gamma_ramp_upload_offset_bytes =
|
|
uint32_t(frame_current_ % kQueueFrames) * ((256 + 128 * 3) * 4) +
|
|
gamma_ramp_offset_bytes;
|
|
uint32_t gamma_ramp_size_bytes =
|
|
(use_pwl_gamma_ramp ? 128 * 3 : 256) * 4;
|
|
if (std::endian::native != std::endian::little &&
|
|
use_pwl_gamma_ramp) {
|
|
// R16G16 is first R16, where the shader expects the base, and
|
|
// second G16, where the delta should be, but gamma_ramp_pwl_rgb()
|
|
// is an array of 32-bit DC_LUT_PWL_DATA registers - swap 16 bits in
|
|
// each 32.
|
|
auto gamma_ramp_pwl_upload_buffer =
|
|
reinterpret_cast<reg::DC_LUT_PWL_DATA*>(
|
|
gamma_ramp_upload_buffer_mapping_ +
|
|
gamma_ramp_upload_offset_bytes);
|
|
const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl = gamma_ramp_pwl_rgb();
|
|
for (size_t i = 0; i < 128 * 3; ++i) {
|
|
reg::DC_LUT_PWL_DATA& gamma_ramp_pwl_upload_buffer_entry =
|
|
gamma_ramp_pwl_upload_buffer[i];
|
|
reg::DC_LUT_PWL_DATA gamma_ramp_pwl_entry = gamma_ramp_pwl[i];
|
|
gamma_ramp_pwl_upload_buffer_entry.base =
|
|
gamma_ramp_pwl_entry.delta;
|
|
gamma_ramp_pwl_upload_buffer_entry.delta =
|
|
gamma_ramp_pwl_entry.base;
|
|
}
|
|
} else {
|
|
std::memcpy(
|
|
gamma_ramp_upload_buffer_mapping_ +
|
|
gamma_ramp_upload_offset_bytes,
|
|
use_pwl_gamma_ramp
|
|
? static_cast<const void*>(gamma_ramp_pwl_rgb())
|
|
: static_cast<const void*>(gamma_ramp_256_entry_table()),
|
|
gamma_ramp_size_bytes);
|
|
}
|
|
PushTransitionBarrier(gamma_ramp_buffer_.Get(),
|
|
gamma_ramp_buffer_state_,
|
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
|
gamma_ramp_buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
SubmitBarriers();
|
|
deferred_command_list_.D3DCopyBufferRegion(
|
|
gamma_ramp_buffer_.Get(), gamma_ramp_offset_bytes,
|
|
gamma_ramp_upload_buffer_.Get(), gamma_ramp_upload_offset_bytes,
|
|
gamma_ramp_size_bytes);
|
|
(use_pwl_gamma_ramp ? gamma_ramp_pwl_up_to_date_
|
|
: gamma_ramp_256_entry_table_up_to_date_) = true;
|
|
}
|
|
|
|
// Destination, source, and if bindful, gamma ramp.
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair apply_gamma_descriptors[3];
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair
|
|
apply_gamma_descriptor_gamma_ramp;
|
|
if (!RequestOneUseSingleViewDescriptors(
|
|
bindless_resources_used_ ? 2 : 3, apply_gamma_descriptors)) {
|
|
return false;
|
|
}
|
|
// Must not call anything that can change the descriptor heap from now
|
|
// on!
|
|
if (bindless_resources_used_) {
|
|
apply_gamma_descriptor_gamma_ramp = GetSystemBindlessViewHandlePair(
|
|
use_pwl_gamma_ramp ? SystemBindlessView::kGammaRampPWLSRV
|
|
: SystemBindlessView::kGammaRampTableSRV);
|
|
} else {
|
|
apply_gamma_descriptor_gamma_ramp = apply_gamma_descriptors[2];
|
|
WriteGammaRampSRV(use_pwl_gamma_ramp,
|
|
apply_gamma_descriptor_gamma_ramp.first);
|
|
}
|
|
|
|
ID3D12Resource* guest_output_resource =
|
|
static_cast<
|
|
ui::d3d12::D3D12Presenter::D3D12GuestOutputRefreshContext&>(
|
|
context)
|
|
.resource_uav_capable();
|
|
|
|
if (use_fxaa) {
|
|
fxaa_source_texture_submission_ = submission_current_;
|
|
}
|
|
|
|
ID3D12Resource* apply_gamma_dest =
|
|
use_fxaa ? fxaa_source_texture_.Get() : guest_output_resource;
|
|
D3D12_RESOURCE_STATES apply_gamma_dest_initial_state =
|
|
use_fxaa ? D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE
|
|
: ui::d3d12::D3D12Presenter::kGuestOutputInternalState;
|
|
static_cast<ui::d3d12::D3D12Presenter::D3D12GuestOutputRefreshContext&>(
|
|
context)
|
|
.resource_uav_capable();
|
|
PushTransitionBarrier(apply_gamma_dest, apply_gamma_dest_initial_state,
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
// From now on, even in case of failure, apply_gamma_dest must be
|
|
// transitioned back to apply_gamma_dest_initial_state!
|
|
D3D12_UNORDERED_ACCESS_VIEW_DESC apply_gamma_dest_uav_desc;
|
|
apply_gamma_dest_uav_desc.Format =
|
|
use_fxaa ? kFxaaSourceTextureFormat
|
|
: ui::d3d12::D3D12Presenter::kGuestOutputFormat;
|
|
apply_gamma_dest_uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
|
apply_gamma_dest_uav_desc.Texture2D.MipSlice = 0;
|
|
apply_gamma_dest_uav_desc.Texture2D.PlaneSlice = 0;
|
|
device->CreateUnorderedAccessView(apply_gamma_dest, nullptr,
|
|
&apply_gamma_dest_uav_desc,
|
|
apply_gamma_descriptors[0].first);
|
|
|
|
device->CreateShaderResourceView(swap_texture_resource,
|
|
&swap_texture_srv_desc,
|
|
apply_gamma_descriptors[1].first);
|
|
|
|
PushTransitionBarrier(gamma_ramp_buffer_.Get(),
|
|
gamma_ramp_buffer_state_,
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
|
gamma_ramp_buffer_state_ =
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
|
|
deferred_command_list_.D3DSetComputeRootSignature(
|
|
apply_gamma_root_signature_.Get());
|
|
ApplyGammaConstants apply_gamma_constants;
|
|
apply_gamma_constants.size[0] = uint32_t(swap_texture_desc.Width);
|
|
apply_gamma_constants.size[1] = uint32_t(swap_texture_desc.Height);
|
|
deferred_command_list_.D3DSetComputeRoot32BitConstants(
|
|
UINT(ApplyGammaRootParameter::kConstants),
|
|
sizeof(apply_gamma_constants) / sizeof(uint32_t),
|
|
&apply_gamma_constants, 0);
|
|
deferred_command_list_.D3DSetComputeRootDescriptorTable(
|
|
UINT(ApplyGammaRootParameter::kDestination),
|
|
apply_gamma_descriptors[0].second);
|
|
deferred_command_list_.D3DSetComputeRootDescriptorTable(
|
|
UINT(ApplyGammaRootParameter::kSource),
|
|
apply_gamma_descriptors[1].second);
|
|
deferred_command_list_.D3DSetComputeRootDescriptorTable(
|
|
UINT(ApplyGammaRootParameter::kRamp),
|
|
apply_gamma_descriptor_gamma_ramp.second);
|
|
ID3D12PipelineState* apply_gamma_pipeline;
|
|
if (use_pwl_gamma_ramp) {
|
|
apply_gamma_pipeline = use_fxaa
|
|
? apply_gamma_pwl_fxaa_luma_pipeline_.Get()
|
|
: apply_gamma_pwl_pipeline_.Get();
|
|
} else {
|
|
apply_gamma_pipeline =
|
|
use_fxaa ? apply_gamma_table_fxaa_luma_pipeline_.Get()
|
|
: apply_gamma_table_pipeline_.Get();
|
|
}
|
|
SetExternalPipeline(apply_gamma_pipeline);
|
|
SubmitBarriers();
|
|
uint32_t group_count_x = (uint32_t(swap_texture_desc.Width) + 15) / 16;
|
|
uint32_t group_count_y = (uint32_t(swap_texture_desc.Height) + 7) / 8;
|
|
deferred_command_list_.D3DDispatch(group_count_x, group_count_y, 1);
|
|
|
|
// Apply FXAA.
|
|
if (use_fxaa) {
|
|
// Destination and source.
|
|
ui::d3d12::util::DescriptorCpuGpuHandlePair fxaa_descriptors[2];
|
|
if (!RequestOneUseSingleViewDescriptors(
|
|
uint32_t(xe::countof(fxaa_descriptors)), fxaa_descriptors)) {
|
|
// Failed to obtain descriptors for FXAA - just copy after gamma
|
|
// ramp application without applying FXAA.
|
|
PushTransitionBarrier(apply_gamma_dest,
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
PushTransitionBarrier(
|
|
guest_output_resource,
|
|
ui::d3d12::D3D12Presenter::kGuestOutputInternalState,
|
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
|
SubmitBarriers();
|
|
deferred_command_list_.D3DCopyResource(guest_output_resource,
|
|
apply_gamma_dest);
|
|
PushTransitionBarrier(apply_gamma_dest,
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE,
|
|
apply_gamma_dest_initial_state);
|
|
PushTransitionBarrier(
|
|
guest_output_resource, D3D12_RESOURCE_STATE_COPY_DEST,
|
|
ui::d3d12::D3D12Presenter::kGuestOutputInternalState);
|
|
return false;
|
|
} else {
|
|
assert_true(apply_gamma_dest_initial_state ==
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
|
PushTransitionBarrier(apply_gamma_dest,
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
|
apply_gamma_dest_initial_state);
|
|
PushTransitionBarrier(
|
|
guest_output_resource,
|
|
ui::d3d12::D3D12Presenter::kGuestOutputInternalState,
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
// From now on, even in case of failure, guest_output_resource must
|
|
// be transitioned back to kGuestOutputInternalState!
|
|
deferred_command_list_.D3DSetComputeRootSignature(
|
|
fxaa_root_signature_.Get());
|
|
FxaaConstants fxaa_constants;
|
|
fxaa_constants.size[0] = uint32_t(swap_texture_desc.Width);
|
|
fxaa_constants.size[1] = uint32_t(swap_texture_desc.Height);
|
|
fxaa_constants.size_inv[0] = 1.0f / float(fxaa_constants.size[0]);
|
|
fxaa_constants.size_inv[1] = 1.0f / float(fxaa_constants.size[1]);
|
|
deferred_command_list_.D3DSetComputeRoot32BitConstants(
|
|
UINT(FxaaRootParameter::kConstants),
|
|
sizeof(fxaa_constants) / sizeof(uint32_t), &fxaa_constants, 0);
|
|
D3D12_UNORDERED_ACCESS_VIEW_DESC fxaa_dest_uav_desc;
|
|
fxaa_dest_uav_desc.Format =
|
|
ui::d3d12::D3D12Presenter::kGuestOutputFormat;
|
|
fxaa_dest_uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
|
fxaa_dest_uav_desc.Texture2D.MipSlice = 0;
|
|
fxaa_dest_uav_desc.Texture2D.PlaneSlice = 0;
|
|
device->CreateUnorderedAccessView(guest_output_resource, nullptr,
|
|
&fxaa_dest_uav_desc,
|
|
fxaa_descriptors[0].first);
|
|
deferred_command_list_.D3DSetComputeRootDescriptorTable(
|
|
UINT(FxaaRootParameter::kDestination),
|
|
fxaa_descriptors[0].second);
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC fxaa_source_srv_desc;
|
|
fxaa_source_srv_desc.Format = kFxaaSourceTextureFormat;
|
|
fxaa_source_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
|
fxaa_source_srv_desc.Shader4ComponentMapping =
|
|
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
fxaa_source_srv_desc.Texture2D.MostDetailedMip = 0;
|
|
fxaa_source_srv_desc.Texture2D.MipLevels = 1;
|
|
fxaa_source_srv_desc.Texture2D.PlaneSlice = 0;
|
|
fxaa_source_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
|
|
device->CreateShaderResourceView(fxaa_source_texture_.Get(),
|
|
&fxaa_source_srv_desc,
|
|
fxaa_descriptors[1].first);
|
|
deferred_command_list_.D3DSetComputeRootDescriptorTable(
|
|
UINT(FxaaRootParameter::kSource), fxaa_descriptors[1].second);
|
|
SetExternalPipeline(swap_post_effect == SwapPostEffect::kFxaaExtreme
|
|
? fxaa_extreme_pipeline_.Get()
|
|
: fxaa_pipeline_.Get());
|
|
SubmitBarriers();
|
|
deferred_command_list_.D3DDispatch(group_count_x, group_count_y, 1);
|
|
PushTransitionBarrier(
|
|
guest_output_resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
|
ui::d3d12::D3D12Presenter::kGuestOutputInternalState);
|
|
}
|
|
} else {
|
|
assert_true(apply_gamma_dest_initial_state ==
|
|
ui::d3d12::D3D12Presenter::kGuestOutputInternalState);
|
|
PushTransitionBarrier(apply_gamma_dest,
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
|
apply_gamma_dest_initial_state);
|
|
}
|
|
|
|
// Need to submit all the commands before giving the image back to the
|
|
// presenter so it can submit its own commands for displaying it to the
|
|
// queue.
|
|
SubmitBarriers();
|
|
EndSubmission(true);
|
|
return true;
|
|
});
|
|
|
|
// End the frame even if did not present for any reason (the image refresher
|
|
// was not called), to prevent leaking per-frame resources.
|
|
EndSubmission(true);
|
|
}
|
|
|
|
void D3D12CommandProcessor::OnPrimaryBufferEnd() {
|
|
if (cvars::d3d12_submit_on_primary_buffer_end && submission_open_ &&
|
|
CanEndSubmissionImmediately()) {
|
|
EndSubmission(false);
|
|
}
|
|
}
|
|
|
|
Shader* D3D12CommandProcessor::LoadShader(xenos::ShaderType shader_type,
|
|
uint32_t guest_address,
|
|
const uint32_t* host_address,
|
|
uint32_t dword_count) {
|
|
return pipeline_cache_->LoadShader(shader_type, host_address, dword_count);
|
|
}
|
|
|
|
bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|
uint32_t index_count,
|
|
IndexBufferInfo* index_buffer_info,
|
|
bool major_mode_explicit) {
|
|
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
ID3D12Device* device = GetD3D12Provider().GetDevice();
|
|
const RegisterFile& regs = *register_file_;
|
|
|
|
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
|
if (edram_mode == xenos::ModeControl::kCopy) {
|
|
// Special copy handling.
|
|
return IssueCopy();
|
|
}
|
|
|
|
if (regs.Get<reg::RB_SURFACE_INFO>().surface_pitch == 0) {
|
|
// Doesn't actually draw.
|
|
// TODO(Triang3l): Do something so memexport still works in this case maybe?
|
|
// Unlikely that zero would even really be legal though.
|
|
return true;
|
|
}
|
|
|
|
// Vertex shader analysis.
|
|
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
|
if (!vertex_shader) {
|
|
// Always need a vertex shader.
|
|
return false;
|
|
}
|
|
pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
|
|
bool memexport_used_vertex = vertex_shader->is_valid_memexport_used();
|
|
|
|
// Pixel shader analysis.
|
|
bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs);
|
|
bool is_rasterization_done =
|
|
draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal);
|
|
D3D12Shader* pixel_shader = nullptr;
|
|
if (is_rasterization_done) {
|
|
// See xenos::ModeControl for explanation why the pixel shader is only used
|
|
// when it's kColorDepth here.
|
|
if (edram_mode == xenos::ModeControl::kColorDepth) {
|
|
pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
|
|
if (pixel_shader) {
|
|
pipeline_cache_->AnalyzeShaderUcode(*pixel_shader);
|
|
if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader,
|
|
regs)) {
|
|
pixel_shader = nullptr;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Disabling pixel shader for this case is also required by the pipeline
|
|
// cache.
|
|
if (!memexport_used_vertex) {
|
|
// This draw has no effect.
|
|
return true;
|
|
}
|
|
}
|
|
bool memexport_used_pixel =
|
|
pixel_shader && pixel_shader->is_valid_memexport_used();
|
|
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
|
|
|
|
if (!BeginSubmission(true)) {
|
|
return false;
|
|
}
|
|
|
|
// Process primitives.
|
|
PrimitiveProcessor::ProcessingResult primitive_processing_result;
|
|
if (!primitive_processor_->Process(primitive_processing_result)) {
|
|
return false;
|
|
}
|
|
if (!primitive_processing_result.host_draw_vertex_count) {
|
|
// Nothing to draw.
|
|
return true;
|
|
}
|
|
|
|
reg::RB_DEPTHCONTROL normalized_depth_control =
|
|
draw_util::GetNormalizedDepthControl(regs);
|
|
|
|
// Shader modifications.
|
|
uint32_t ps_param_gen_pos = UINT32_MAX;
|
|
uint32_t interpolator_mask =
|
|
pixel_shader ? (vertex_shader->writes_interpolators() &
|
|
pixel_shader->GetInterpolatorInputMask(
|
|
regs.Get<reg::SQ_PROGRAM_CNTL>(),
|
|
regs.Get<reg::SQ_CONTEXT_MISC>(), ps_param_gen_pos))
|
|
: 0;
|
|
DxbcShaderTranslator::Modification vertex_shader_modification =
|
|
pipeline_cache_->GetCurrentVertexShaderModification(
|
|
*vertex_shader, primitive_processing_result.host_vertex_shader_type,
|
|
interpolator_mask);
|
|
DxbcShaderTranslator::Modification pixel_shader_modification =
|
|
pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification(
|
|
*pixel_shader, interpolator_mask, ps_param_gen_pos,
|
|
normalized_depth_control)
|
|
: DxbcShaderTranslator::Modification(0);
|
|
|
|
// Set up the render targets - this may perform dispatches and draws.
|
|
uint32_t normalized_color_mask =
|
|
pixel_shader ? draw_util::GetNormalizedColorMask(
|
|
regs, pixel_shader->writes_color_targets())
|
|
: 0;
|
|
if (!render_target_cache_->Update(is_rasterization_done,
|
|
normalized_depth_control,
|
|
normalized_color_mask, *vertex_shader)) {
|
|
return false;
|
|
}
|
|
|
|
// Create the pipeline (for this, need the actually used render target formats
|
|
// from the render target cache), translating the shaders - doing this now to
|
|
// obtain the used textures.
|
|
D3D12Shader::D3D12Translation* vertex_shader_translation =
|
|
static_cast<D3D12Shader::D3D12Translation*>(
|
|
vertex_shader->GetOrCreateTranslation(
|
|
vertex_shader_modification.value));
|
|
D3D12Shader::D3D12Translation* pixel_shader_translation =
|
|
pixel_shader ? static_cast<D3D12Shader::D3D12Translation*>(
|
|
pixel_shader->GetOrCreateTranslation(
|
|
pixel_shader_modification.value))
|
|
: nullptr;
|
|
uint32_t bound_depth_and_color_render_target_bits;
|
|
uint32_t bound_depth_and_color_render_target_formats
|
|
[1 + xenos::kMaxColorRenderTargets];
|
|
bool host_render_targets_used = render_target_cache_->GetPath() ==
|
|
RenderTargetCache::Path::kHostRenderTargets;
|
|
if (host_render_targets_used) {
|
|
bound_depth_and_color_render_target_bits =
|
|
render_target_cache_->GetLastUpdateBoundRenderTargets(
|
|
render_target_cache_->gamma_render_target_as_srgb(),
|
|
bound_depth_and_color_render_target_formats);
|
|
} else {
|
|
bound_depth_and_color_render_target_bits = 0;
|
|
}
|
|
void* pipeline_handle;
|
|
ID3D12RootSignature* root_signature;
|
|
if (!pipeline_cache_->ConfigurePipeline(
|
|
vertex_shader_translation, pixel_shader_translation,
|
|
primitive_processing_result, normalized_depth_control,
|
|
normalized_color_mask, bound_depth_and_color_render_target_bits,
|
|
bound_depth_and_color_render_target_formats, &pipeline_handle,
|
|
&root_signature)) {
|
|
return false;
|
|
}
|
|
|
|
// Update the textures - this may bind pipelines.
|
|
uint32_t used_texture_mask =
|
|
vertex_shader->GetUsedTextureMaskAfterTranslation() |
|
|
(pixel_shader != nullptr
|
|
? pixel_shader->GetUsedTextureMaskAfterTranslation()
|
|
: 0);
|
|
texture_cache_->RequestTextures(used_texture_mask);
|
|
|
|
// Bind the pipeline after configuring it and doing everything that may bind
|
|
// other pipelines.
|
|
if (current_guest_pipeline_ != pipeline_handle) {
|
|
deferred_command_list_.SetPipelineStateHandle(
|
|
reinterpret_cast<void*>(pipeline_handle));
|
|
current_guest_pipeline_ = pipeline_handle;
|
|
current_external_pipeline_ = nullptr;
|
|
}
|
|
|
|
// Get dynamic rasterizer state.
|
|
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
|
|
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
|
|
draw_util::ViewportInfo viewport_info;
|
|
draw_util::GetHostViewportInfo(
|
|
regs, draw_resolution_scale_x, draw_resolution_scale_y, true,
|
|
D3D12_VIEWPORT_BOUNDS_MAX, D3D12_VIEWPORT_BOUNDS_MAX, false,
|
|
normalized_depth_control,
|
|
host_render_targets_used &&
|
|
render_target_cache_->depth_float24_convert_in_pixel_shader(),
|
|
host_render_targets_used, pixel_shader && pixel_shader->writes_depth(),
|
|
viewport_info);
|
|
draw_util::Scissor scissor;
|
|
draw_util::GetScissor(regs, scissor);
|
|
scissor.offset[0] *= draw_resolution_scale_x;
|
|
scissor.offset[1] *= draw_resolution_scale_y;
|
|
scissor.extent[0] *= draw_resolution_scale_x;
|
|
scissor.extent[1] *= draw_resolution_scale_y;
|
|
|
|
// Update viewport, scissor, blend factor and stencil reference.
|
|
UpdateFixedFunctionState(viewport_info, scissor, primitive_polygonal,
|
|
normalized_depth_control);
|
|
|
|
// Update system constants before uploading them.
|
|
// TODO(Triang3l): With ROV, pass the disabled render target mask for safety.
|
|
UpdateSystemConstantValues(
|
|
memexport_used, primitive_polygonal,
|
|
primitive_processing_result.line_loop_closing_index,
|
|
primitive_processing_result.host_shader_index_endian, viewport_info,
|
|
used_texture_mask, normalized_depth_control, normalized_color_mask);
|
|
|
|
// Update constant buffers, descriptors and root parameters.
|
|
if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) {
|
|
return false;
|
|
}
|
|
// Must not call anything that can change the descriptor heap from now on!
|
|
|
|
// Ensure vertex buffers are resident.
|
|
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
|
// validity is tracked.
|
|
const Shader::ConstantRegisterMap& constant_map_vertex =
|
|
vertex_shader->constant_register_map();
|
|
for (uint32_t i = 0; i < xe::countof(constant_map_vertex.vertex_fetch_bitmap);
|
|
++i) {
|
|
uint32_t vfetch_bits_remaining = constant_map_vertex.vertex_fetch_bitmap[i];
|
|
uint32_t j;
|
|
while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
|
|
vfetch_bits_remaining &= ~(uint32_t(1) << j);
|
|
uint32_t vfetch_index = i * 32 + j;
|
|
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
|
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
|
|
switch (vfetch_constant.type) {
|
|
case xenos::FetchConstantType::kVertex:
|
|
break;
|
|
case xenos::FetchConstantType::kInvalidVertex:
|
|
if (cvars::gpu_allow_invalid_fetch_constants) {
|
|
break;
|
|
}
|
|
XELOGW(
|
|
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
|
|
"This is incorrect behavior, but you can try bypassing this by "
|
|
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
|
|
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
|
return false;
|
|
default:
|
|
XELOGW(
|
|
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
|
|
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
|
return false;
|
|
}
|
|
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
|
|
vfetch_constant.size << 2)) {
|
|
XELOGE(
|
|
"Failed to request vertex buffer at 0x{:08X} (size {}) in the "
|
|
"shared memory",
|
|
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Gather memexport ranges and ensure the heaps for them are resident, and
|
|
// also load the data surrounding the export and to fill the regions that
|
|
// won't be modified by the shaders.
|
|
struct MemExportRange {
|
|
uint32_t base_address_dwords;
|
|
uint32_t size_dwords;
|
|
};
|
|
MemExportRange memexport_ranges[512];
|
|
uint32_t memexport_range_count = 0;
|
|
if (memexport_used_vertex) {
|
|
for (uint32_t constant_index :
|
|
vertex_shader->memexport_stream_constants()) {
|
|
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
|
XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4);
|
|
if (memexport_stream.index_count == 0) {
|
|
continue;
|
|
}
|
|
uint32_t memexport_format_size =
|
|
GetSupportedMemExportFormatSize(memexport_stream.format);
|
|
if (memexport_format_size == 0) {
|
|
XELOGE("Unsupported memexport format {}",
|
|
FormatInfo::Get(
|
|
xenos::TextureFormat(uint32_t(memexport_stream.format)))
|
|
->name);
|
|
return false;
|
|
}
|
|
uint32_t memexport_size_dwords =
|
|
memexport_stream.index_count * memexport_format_size;
|
|
// Try to reduce the number of shared memory operations when writing
|
|
// different elements into the same buffer through different exports
|
|
// (happens in 4D5307E6).
|
|
bool memexport_range_reused = false;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
MemExportRange& memexport_range = memexport_ranges[i];
|
|
if (memexport_range.base_address_dwords ==
|
|
memexport_stream.base_address) {
|
|
memexport_range.size_dwords =
|
|
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
|
memexport_range_reused = true;
|
|
break;
|
|
}
|
|
}
|
|
// Add a new range if haven't expanded an existing one.
|
|
if (!memexport_range_reused) {
|
|
MemExportRange& memexport_range =
|
|
memexport_ranges[memexport_range_count++];
|
|
memexport_range.base_address_dwords = memexport_stream.base_address;
|
|
memexport_range.size_dwords = memexport_size_dwords;
|
|
}
|
|
}
|
|
}
|
|
if (memexport_used_pixel) {
|
|
for (uint32_t constant_index : pixel_shader->memexport_stream_constants()) {
|
|
const auto& memexport_stream = regs.Get<xenos::xe_gpu_memexport_stream_t>(
|
|
XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4);
|
|
if (memexport_stream.index_count == 0) {
|
|
continue;
|
|
}
|
|
uint32_t memexport_format_size =
|
|
GetSupportedMemExportFormatSize(memexport_stream.format);
|
|
if (memexport_format_size == 0) {
|
|
XELOGE("Unsupported memexport format {}",
|
|
FormatInfo::Get(
|
|
xenos::TextureFormat(uint32_t(memexport_stream.format)))
|
|
->name);
|
|
return false;
|
|
}
|
|
uint32_t memexport_size_dwords =
|
|
memexport_stream.index_count * memexport_format_size;
|
|
bool memexport_range_reused = false;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
MemExportRange& memexport_range = memexport_ranges[i];
|
|
if (memexport_range.base_address_dwords ==
|
|
memexport_stream.base_address) {
|
|
memexport_range.size_dwords =
|
|
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
|
memexport_range_reused = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!memexport_range_reused) {
|
|
MemExportRange& memexport_range =
|
|
memexport_ranges[memexport_range_count++];
|
|
memexport_range.base_address_dwords = memexport_stream.base_address;
|
|
memexport_range.size_dwords = memexport_size_dwords;
|
|
}
|
|
}
|
|
}
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
if (!shared_memory_->RequestRange(memexport_range.base_address_dwords << 2,
|
|
memexport_range.size_dwords << 2)) {
|
|
XELOGE(
|
|
"Failed to request memexport stream at 0x{:08X} (size {}) in the "
|
|
"shared memory",
|
|
memexport_range.base_address_dwords << 2,
|
|
memexport_range.size_dwords << 2);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Primitive topology.
|
|
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
|
if (primitive_processing_result.IsTessellated()) {
|
|
switch (primitive_processing_result.host_primitive_type) {
|
|
// TODO(Triang3l): Support all primitive types.
|
|
case xenos::PrimitiveType::kTriangleList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST;
|
|
break;
|
|
case xenos::PrimitiveType::kQuadList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST;
|
|
break;
|
|
case xenos::PrimitiveType::kTrianglePatch:
|
|
primitive_topology =
|
|
(regs.Get<reg::VGT_HOS_CNTL>().tess_mode ==
|
|
xenos::TessellationMode::kAdaptive)
|
|
? D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST
|
|
: D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST;
|
|
break;
|
|
case xenos::PrimitiveType::kQuadPatch:
|
|
primitive_topology =
|
|
(regs.Get<reg::VGT_HOS_CNTL>().tess_mode ==
|
|
xenos::TessellationMode::kAdaptive)
|
|
? D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST
|
|
: D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST;
|
|
break;
|
|
default:
|
|
XELOGE(
|
|
"Host tessellated primitive type {} returned by the primitive "
|
|
"processor is not supported by the Direct3D 12 command processor",
|
|
uint32_t(primitive_processing_result.host_primitive_type));
|
|
assert_unhandled_case(primitive_processing_result.host_primitive_type);
|
|
return false;
|
|
}
|
|
} else {
|
|
switch (primitive_processing_result.host_primitive_type) {
|
|
case xenos::PrimitiveType::kPointList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
|
|
break;
|
|
case xenos::PrimitiveType::kLineList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
|
|
break;
|
|
case xenos::PrimitiveType::kLineStrip:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
|
|
break;
|
|
case xenos::PrimitiveType::kTriangleList:
|
|
case xenos::PrimitiveType::kRectangleList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
|
break;
|
|
case xenos::PrimitiveType::kTriangleStrip:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
|
|
break;
|
|
case xenos::PrimitiveType::kQuadList:
|
|
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
|
|
break;
|
|
default:
|
|
XELOGE(
|
|
"Host primitive type {} returned by the primitive processor is not "
|
|
"supported by the Direct3D 12 command processor",
|
|
uint32_t(primitive_processing_result.host_primitive_type));
|
|
assert_unhandled_case(primitive_processing_result.host_primitive_type);
|
|
return false;
|
|
}
|
|
}
|
|
SetPrimitiveTopology(primitive_topology);
|
|
// Must not call anything that may change the primitive topology from now on!
|
|
|
|
// Draw.
|
|
if (primitive_processing_result.index_buffer_type ==
|
|
PrimitiveProcessor::ProcessedIndexBufferType::kNone) {
|
|
if (memexport_used) {
|
|
shared_memory_->UseForWriting();
|
|
} else {
|
|
shared_memory_->UseForReading();
|
|
}
|
|
SubmitBarriers();
|
|
deferred_command_list_.D3DDrawInstanced(
|
|
primitive_processing_result.host_draw_vertex_count, 1, 0, 0);
|
|
} else {
|
|
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
|
index_buffer_view.SizeInBytes =
|
|
primitive_processing_result.host_draw_vertex_count;
|
|
if (primitive_processing_result.host_index_format ==
|
|
xenos::IndexFormat::kInt16) {
|
|
index_buffer_view.SizeInBytes *= sizeof(uint16_t);
|
|
index_buffer_view.Format = DXGI_FORMAT_R16_UINT;
|
|
} else {
|
|
index_buffer_view.SizeInBytes *= sizeof(uint32_t);
|
|
index_buffer_view.Format = DXGI_FORMAT_R32_UINT;
|
|
}
|
|
ID3D12Resource* scratch_index_buffer = nullptr;
|
|
switch (primitive_processing_result.index_buffer_type) {
|
|
case PrimitiveProcessor::ProcessedIndexBufferType::kGuestDMA: {
|
|
if (memexport_used) {
|
|
// If the shared memory is a UAV, it can't be used as an index buffer
|
|
// (UAV is a read/write state, index buffer is a read-only state).
|
|
// Need to copy the indices to a buffer in the index buffer state.
|
|
scratch_index_buffer = RequestScratchGPUBuffer(
|
|
index_buffer_view.SizeInBytes, D3D12_RESOURCE_STATE_COPY_DEST);
|
|
if (scratch_index_buffer == nullptr) {
|
|
return false;
|
|
}
|
|
shared_memory_->UseAsCopySource();
|
|
SubmitBarriers();
|
|
deferred_command_list_.D3DCopyBufferRegion(
|
|
scratch_index_buffer, 0, shared_memory_->GetBuffer(),
|
|
primitive_processing_result.guest_index_base,
|
|
index_buffer_view.SizeInBytes);
|
|
PushTransitionBarrier(scratch_index_buffer,
|
|
D3D12_RESOURCE_STATE_COPY_DEST,
|
|
D3D12_RESOURCE_STATE_INDEX_BUFFER);
|
|
index_buffer_view.BufferLocation =
|
|
scratch_index_buffer->GetGPUVirtualAddress();
|
|
} else {
|
|
index_buffer_view.BufferLocation =
|
|
shared_memory_->GetGPUAddress() +
|
|
primitive_processing_result.guest_index_base;
|
|
}
|
|
} break;
|
|
case PrimitiveProcessor::ProcessedIndexBufferType::kHostConverted:
|
|
index_buffer_view.BufferLocation =
|
|
primitive_processor_->GetConvertedIndexBufferGpuAddress(
|
|
primitive_processing_result.host_index_buffer_handle);
|
|
break;
|
|
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltinForAuto:
|
|
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltinForDMA:
|
|
index_buffer_view.BufferLocation =
|
|
primitive_processor_->GetBuiltinIndexBufferGpuAddress(
|
|
primitive_processing_result.host_index_buffer_handle);
|
|
break;
|
|
default:
|
|
assert_unhandled_case(primitive_processing_result.index_buffer_type);
|
|
return false;
|
|
}
|
|
deferred_command_list_.D3DIASetIndexBuffer(&index_buffer_view);
|
|
if (memexport_used) {
|
|
shared_memory_->UseForWriting();
|
|
} else {
|
|
shared_memory_->UseForReading();
|
|
}
|
|
SubmitBarriers();
|
|
deferred_command_list_.D3DDrawIndexedInstanced(
|
|
primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0);
|
|
if (scratch_index_buffer != nullptr) {
|
|
ReleaseScratchGPUBuffer(scratch_index_buffer,
|
|
D3D12_RESOURCE_STATE_INDEX_BUFFER);
|
|
}
|
|
}
|
|
|
|
if (memexport_used) {
|
|
// Make sure this memexporting draw is ordered with other work using shared
|
|
// memory as a UAV.
|
|
// TODO(Triang3l): Find some PM4 command that can be used for indication of
|
|
// when memexports should be awaited?
|
|
shared_memory_->MarkUAVWritesCommitNeeded();
|
|
// Invalidate textures in memexported memory and watch for changes.
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
shared_memory_->RangeWrittenByGpu(
|
|
memexport_range.base_address_dwords << 2,
|
|
memexport_range.size_dwords << 2, false);
|
|
}
|
|
if (cvars::d3d12_readback_memexport) {
|
|
// Read the exported data on the CPU.
|
|
uint32_t memexport_total_size = 0;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
memexport_total_size += memexport_ranges[i].size_dwords << 2;
|
|
}
|
|
if (memexport_total_size != 0) {
|
|
ID3D12Resource* readback_buffer =
|
|
RequestReadbackBuffer(memexport_total_size);
|
|
if (readback_buffer != nullptr) {
|
|
shared_memory_->UseAsCopySource();
|
|
SubmitBarriers();
|
|
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
|
|
uint32_t readback_buffer_offset = 0;
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
uint32_t memexport_range_size = memexport_range.size_dwords << 2;
|
|
deferred_command_list_.D3DCopyBufferRegion(
|
|
readback_buffer, readback_buffer_offset, shared_memory_buffer,
|
|
memexport_range.base_address_dwords << 2, memexport_range_size);
|
|
readback_buffer_offset += memexport_range_size;
|
|
}
|
|
if (AwaitAllQueueOperationsCompletion()) {
|
|
D3D12_RANGE readback_range;
|
|
readback_range.Begin = 0;
|
|
readback_range.End = memexport_total_size;
|
|
void* readback_mapping;
|
|
if (SUCCEEDED(readback_buffer->Map(0, &readback_range,
|
|
&readback_mapping))) {
|
|
const uint32_t* readback_dwords =
|
|
reinterpret_cast<const uint32_t*>(readback_mapping);
|
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
|
std::memcpy(memory_->TranslatePhysical(
|
|
memexport_range.base_address_dwords << 2),
|
|
readback_dwords, memexport_range.size_dwords << 2);
|
|
readback_dwords += memexport_range.size_dwords;
|
|
}
|
|
D3D12_RANGE readback_write_range = {};
|
|
readback_buffer->Unmap(0, &readback_write_range);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::InitializeTrace() {
|
|
CommandProcessor::InitializeTrace();
|
|
|
|
if (!BeginSubmission(false)) {
|
|
return;
|
|
}
|
|
bool render_target_cache_submitted =
|
|
render_target_cache_->InitializeTraceSubmitDownloads();
|
|
bool shared_memory_submitted =
|
|
shared_memory_->InitializeTraceSubmitDownloads();
|
|
if (!render_target_cache_submitted && !shared_memory_submitted) {
|
|
return;
|
|
}
|
|
AwaitAllQueueOperationsCompletion();
|
|
if (render_target_cache_submitted) {
|
|
render_target_cache_->InitializeTraceCompleteDownloads();
|
|
}
|
|
if (shared_memory_submitted) {
|
|
shared_memory_->InitializeTraceCompleteDownloads();
|
|
}
|
|
}
|
|
|
|
bool D3D12CommandProcessor::IssueCopy() {
|
|
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
if (!BeginSubmission(true)) {
|
|
return false;
|
|
}
|
|
uint32_t written_address, written_length;
|
|
if (!render_target_cache_->Resolve(*memory_, *shared_memory_, *texture_cache_,
|
|
written_address, written_length)) {
|
|
return false;
|
|
}
|
|
if (cvars::d3d12_readback_resolve &&
|
|
!texture_cache_->IsDrawResolutionScaled() && written_length) {
|
|
// Read the resolved data on the CPU.
|
|
ID3D12Resource* readback_buffer = RequestReadbackBuffer(written_length);
|
|
if (readback_buffer != nullptr) {
|
|
shared_memory_->UseAsCopySource();
|
|
SubmitBarriers();
|
|
ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer();
|
|
deferred_command_list_.D3DCopyBufferRegion(
|
|
readback_buffer, 0, shared_memory_buffer, written_address,
|
|
written_length);
|
|
if (AwaitAllQueueOperationsCompletion()) {
|
|
D3D12_RANGE readback_range;
|
|
readback_range.Begin = 0;
|
|
readback_range.End = written_length;
|
|
void* readback_mapping;
|
|
if (SUCCEEDED(
|
|
readback_buffer->Map(0, &readback_range, &readback_mapping))) {
|
|
std::memcpy(memory_->TranslatePhysical(written_address),
|
|
readback_mapping, written_length);
|
|
D3D12_RANGE readback_write_range = {};
|
|
readback_buffer->Unmap(0, &readback_write_range);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
|
|
if (await_submission >= submission_current_) {
|
|
if (submission_open_) {
|
|
EndSubmission(false);
|
|
}
|
|
// Ending an open submission should result in queue operations done directly
|
|
// (like UpdateTileMappings) to be tracked within the scope of that
|
|
// submission, but just in case of a failure, or queue operations being done
|
|
// outside of a submission, await explicitly.
|
|
if (queue_operations_done_since_submission_signal_) {
|
|
UINT64 fence_value = ++queue_operations_since_submission_fence_last_;
|
|
ID3D12CommandQueue* direct_queue = GetD3D12Provider().GetDirectQueue();
|
|
if (SUCCEEDED(
|
|
direct_queue->Signal(queue_operations_since_submission_fence_,
|
|
fence_value) &&
|
|
SUCCEEDED(queue_operations_since_submission_fence_
|
|
->SetEventOnCompletion(fence_value,
|
|
fence_completion_event_)))) {
|
|
WaitForSingleObject(fence_completion_event_, INFINITE);
|
|
queue_operations_done_since_submission_signal_ = false;
|
|
} else {
|
|
XELOGE(
|
|
"Failed to await an out-of-submission queue operation completion "
|
|
"Direct3D 12 fence");
|
|
}
|
|
}
|
|
// A submission won't be ended if it hasn't been started, or if ending
|
|
// has failed - clamp the index.
|
|
await_submission = submission_current_ - 1;
|
|
}
|
|
|
|
uint64_t submission_completed_before = submission_completed_;
|
|
submission_completed_ = submission_fence_->GetCompletedValue();
|
|
if (submission_completed_ < await_submission) {
|
|
if (SUCCEEDED(submission_fence_->SetEventOnCompletion(
|
|
await_submission, fence_completion_event_))) {
|
|
WaitForSingleObject(fence_completion_event_, INFINITE);
|
|
submission_completed_ = submission_fence_->GetCompletedValue();
|
|
}
|
|
}
|
|
if (submission_completed_ < await_submission) {
|
|
XELOGE("Failed to await a submission completion Direct3D 12 fence");
|
|
}
|
|
if (submission_completed_ <= submission_completed_before) {
|
|
// Not updated - no need to reclaim or download things.
|
|
return;
|
|
}
|
|
|
|
// Reclaim command allocators.
|
|
while (command_allocator_submitted_first_) {
|
|
if (command_allocator_submitted_first_->last_usage_submission >
|
|
submission_completed_) {
|
|
break;
|
|
}
|
|
if (command_allocator_writable_last_) {
|
|
command_allocator_writable_last_->next =
|
|
command_allocator_submitted_first_;
|
|
} else {
|
|
command_allocator_writable_first_ = command_allocator_submitted_first_;
|
|
}
|
|
command_allocator_writable_last_ = command_allocator_submitted_first_;
|
|
command_allocator_submitted_first_ =
|
|
command_allocator_submitted_first_->next;
|
|
command_allocator_writable_last_->next = nullptr;
|
|
}
|
|
if (!command_allocator_submitted_first_) {
|
|
command_allocator_submitted_last_ = nullptr;
|
|
}
|
|
|
|
// Release single-use bindless descriptors.
|
|
while (!view_bindless_one_use_descriptors_.empty()) {
|
|
if (view_bindless_one_use_descriptors_.front().second >
|
|
submission_completed_) {
|
|
break;
|
|
}
|
|
ReleaseViewBindlessDescriptorImmediately(
|
|
view_bindless_one_use_descriptors_.front().first);
|
|
view_bindless_one_use_descriptors_.pop_front();
|
|
}
|
|
|
|
// Delete transient resources marked for deletion.
|
|
while (!resources_for_deletion_.empty()) {
|
|
if (resources_for_deletion_.front().first > submission_completed_) {
|
|
break;
|
|
}
|
|
resources_for_deletion_.front().second->Release();
|
|
resources_for_deletion_.pop_front();
|
|
}
|
|
|
|
shared_memory_->CompletedSubmissionUpdated();
|
|
|
|
render_target_cache_->CompletedSubmissionUpdated();
|
|
|
|
primitive_processor_->CompletedSubmissionUpdated();
|
|
|
|
texture_cache_->CompletedSubmissionUpdated(submission_completed_);
|
|
}
|
|
|
|
bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) {
|
|
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
if (device_removed_) {
|
|
return false;
|
|
}
|
|
|
|
bool is_opening_frame = is_guest_command && !frame_open_;
|
|
if (submission_open_ && !is_opening_frame) {
|
|
return true;
|
|
}
|
|
|
|
// Check if the device is still available.
|
|
ID3D12Device* device = GetD3D12Provider().GetDevice();
|
|
HRESULT device_removed_reason = device->GetDeviceRemovedReason();
|
|
if (FAILED(device_removed_reason)) {
|
|
device_removed_ = true;
|
|
graphics_system_->OnHostGpuLossFromAnyThread(device_removed_reason !=
|
|
DXGI_ERROR_DEVICE_REMOVED);
|
|
return false;
|
|
}
|
|
|
|
// Check the fence - needed for all kinds of submissions (to reclaim transient
|
|
// resources early) and specifically for frames (not to queue too many), and
|
|
// await the availability of the current frame.
|
|
CheckSubmissionFence(
|
|
is_opening_frame
|
|
? closed_frame_submissions_[frame_current_ % kQueueFrames]
|
|
: 0);
|
|
// TODO(Triang3l): If failed to await (completed submission < awaited frame
|
|
// submission), do something like dropping the draw command that wanted to
|
|
// open the frame.
|
|
if (is_opening_frame) {
|
|
// Update the completed frame index, also obtaining the actual completed
|
|
// frame number (since the CPU may be actually less than 3 frames behind)
|
|
// before reclaiming resources tracked with the frame number.
|
|
frame_completed_ =
|
|
std::max(frame_current_, uint64_t(kQueueFrames)) - kQueueFrames;
|
|
for (uint64_t frame = frame_completed_ + 1; frame < frame_current_;
|
|
++frame) {
|
|
if (closed_frame_submissions_[frame % kQueueFrames] >
|
|
submission_completed_) {
|
|
break;
|
|
}
|
|
frame_completed_ = frame;
|
|
}
|
|
}
|
|
|
|
if (!submission_open_) {
|
|
submission_open_ = true;
|
|
|
|
// Start a new deferred command list - will submit it to the real one in the
|
|
// end of the submission (when async pipeline creation requests are
|
|
// fulfilled).
|
|
deferred_command_list_.Reset();
|
|
|
|
// Reset cached state of the command list.
|
|
ff_viewport_update_needed_ = true;
|
|
ff_scissor_update_needed_ = true;
|
|
ff_blend_factor_update_needed_ = true;
|
|
ff_stencil_ref_update_needed_ = true;
|
|
current_guest_pipeline_ = nullptr;
|
|
current_external_pipeline_ = nullptr;
|
|
current_graphics_root_signature_ = nullptr;
|
|
current_graphics_root_up_to_date_ = 0;
|
|
if (bindless_resources_used_) {
|
|
deferred_command_list_.SetDescriptorHeaps(view_bindless_heap_,
|
|
sampler_bindless_heap_current_);
|
|
} else {
|
|
view_bindful_heap_current_ = nullptr;
|
|
sampler_bindful_heap_current_ = nullptr;
|
|
}
|
|
primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
|
|
|
|
render_target_cache_->BeginSubmission();
|
|
|
|
primitive_processor_->BeginSubmission();
|
|
|
|
texture_cache_->BeginSubmission(submission_current_);
|
|
}
|
|
|
|
if (is_opening_frame) {
|
|
frame_open_ = true;
|
|
|
|
// Reset bindings that depend on the data stored in the pools.
|
|
std::memset(current_float_constant_map_vertex_, 0,
|
|
sizeof(current_float_constant_map_vertex_));
|
|
std::memset(current_float_constant_map_pixel_, 0,
|
|
sizeof(current_float_constant_map_pixel_));
|
|
cbuffer_binding_system_.up_to_date = false;
|
|
cbuffer_binding_float_vertex_.up_to_date = false;
|
|
cbuffer_binding_float_pixel_.up_to_date = false;
|
|
cbuffer_binding_bool_loop_.up_to_date = false;
|
|
cbuffer_binding_fetch_.up_to_date = false;
|
|
if (bindless_resources_used_) {
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
} else {
|
|
draw_view_bindful_heap_index_ =
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
|
|
draw_sampler_bindful_heap_index_ =
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
|
|
bindful_textures_written_vertex_ = false;
|
|
bindful_textures_written_pixel_ = false;
|
|
bindful_samplers_written_vertex_ = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
|
|
// Reclaim pool pages - no need to do this every small submission since some
|
|
// may be reused.
|
|
constant_buffer_pool_->Reclaim(frame_completed_);
|
|
if (!bindless_resources_used_) {
|
|
view_bindful_heap_pool_->Reclaim(frame_completed_);
|
|
sampler_bindful_heap_pool_->Reclaim(frame_completed_);
|
|
}
|
|
|
|
pix_capturing_ =
|
|
pix_capture_requested_.exchange(false, std::memory_order_relaxed);
|
|
if (pix_capturing_) {
|
|
IDXGraphicsAnalysis* graphics_analysis =
|
|
GetD3D12Provider().GetGraphicsAnalysis();
|
|
if (graphics_analysis != nullptr) {
|
|
graphics_analysis->BeginCapture();
|
|
}
|
|
}
|
|
|
|
primitive_processor_->BeginFrame();
|
|
|
|
texture_cache_->BeginFrame();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool D3D12CommandProcessor::EndSubmission(bool is_swap) {
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
|
|
// Make sure there is a command allocator to write commands to.
|
|
if (submission_open_ && !command_allocator_writable_first_) {
|
|
ID3D12CommandAllocator* command_allocator;
|
|
if (FAILED(provider.GetDevice()->CreateCommandAllocator(
|
|
D3D12_COMMAND_LIST_TYPE_DIRECT,
|
|
IID_PPV_ARGS(&command_allocator)))) {
|
|
XELOGE("Failed to create a command allocator");
|
|
// Try to submit later. Completely dropping the submission is not
|
|
// permitted because resources would be left in an undefined state.
|
|
return false;
|
|
}
|
|
command_allocator_writable_first_ = new CommandAllocator;
|
|
command_allocator_writable_first_->command_allocator = command_allocator;
|
|
command_allocator_writable_first_->last_usage_submission = 0;
|
|
command_allocator_writable_first_->next = nullptr;
|
|
command_allocator_writable_last_ = command_allocator_writable_first_;
|
|
}
|
|
|
|
bool is_closing_frame = is_swap && frame_open_;
|
|
|
|
if (is_closing_frame) {
|
|
texture_cache_->EndFrame();
|
|
|
|
primitive_processor_->EndFrame();
|
|
}
|
|
|
|
if (submission_open_) {
|
|
assert_false(scratch_buffer_used_);
|
|
|
|
pipeline_cache_->EndSubmission();
|
|
|
|
// Submit barriers now because resources with the queued barriers may be
|
|
// destroyed between frames.
|
|
SubmitBarriers();
|
|
|
|
ID3D12CommandQueue* direct_queue = provider.GetDirectQueue();
|
|
|
|
// Submit the deferred command list.
|
|
// Only one deferred command list must be executed in the same
|
|
// ExecuteCommandLists - the boundaries of ExecuteCommandLists are a full
|
|
// UAV and aliasing barrier, and subsystems of the emulator assume it
|
|
// happens between Xenia submissions.
|
|
ID3D12CommandAllocator* command_allocator =
|
|
command_allocator_writable_first_->command_allocator;
|
|
command_allocator->Reset();
|
|
command_list_->Reset(command_allocator, nullptr);
|
|
deferred_command_list_.Execute(command_list_, command_list_1_);
|
|
command_list_->Close();
|
|
ID3D12CommandList* execute_command_lists[] = {command_list_};
|
|
direct_queue->ExecuteCommandLists(1, execute_command_lists);
|
|
command_allocator_writable_first_->last_usage_submission =
|
|
submission_current_;
|
|
if (command_allocator_submitted_last_) {
|
|
command_allocator_submitted_last_->next =
|
|
command_allocator_writable_first_;
|
|
} else {
|
|
command_allocator_submitted_first_ = command_allocator_writable_first_;
|
|
}
|
|
command_allocator_submitted_last_ = command_allocator_writable_first_;
|
|
command_allocator_writable_first_ = command_allocator_writable_first_->next;
|
|
command_allocator_submitted_last_->next = nullptr;
|
|
if (!command_allocator_writable_first_) {
|
|
command_allocator_writable_last_ = nullptr;
|
|
}
|
|
|
|
direct_queue->Signal(submission_fence_, submission_current_++);
|
|
|
|
submission_open_ = false;
|
|
|
|
// Queue operations done directly (like UpdateTileMappings) will be awaited
|
|
// alongside the last submission if needed.
|
|
queue_operations_done_since_submission_signal_ = false;
|
|
}
|
|
|
|
if (is_closing_frame) {
|
|
if (cvars::d3d12_clear_memory_page_state) {
|
|
shared_memory_->SetSystemPageBlocksValidWithGpuDataWritten();
|
|
}
|
|
// Close the capture after submitting.
|
|
if (pix_capturing_) {
|
|
IDXGraphicsAnalysis* graphics_analysis = provider.GetGraphicsAnalysis();
|
|
if (graphics_analysis != nullptr) {
|
|
graphics_analysis->EndCapture();
|
|
}
|
|
pix_capturing_ = false;
|
|
}
|
|
frame_open_ = false;
|
|
// Submission already closed now, so minus 1.
|
|
closed_frame_submissions_[(frame_current_++) % kQueueFrames] =
|
|
submission_current_ - 1;
|
|
|
|
if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) {
|
|
cache_clear_requested_ = false;
|
|
|
|
ClearCommandAllocatorCache();
|
|
|
|
ui::d3d12::util::ReleaseAndNull(scratch_buffer_);
|
|
scratch_buffer_size_ = 0;
|
|
|
|
if (bindless_resources_used_) {
|
|
texture_cache_bindless_sampler_map_.clear();
|
|
for (const auto& sampler_bindless_heap_overflowed :
|
|
sampler_bindless_heaps_overflowed_) {
|
|
sampler_bindless_heap_overflowed.first->Release();
|
|
}
|
|
sampler_bindless_heaps_overflowed_.clear();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
} else {
|
|
sampler_bindful_heap_pool_->ClearCache();
|
|
view_bindful_heap_pool_->ClearCache();
|
|
}
|
|
constant_buffer_pool_->ClearCache();
|
|
|
|
texture_cache_->ClearCache();
|
|
|
|
// Not clearing the root signatures as they're referenced by pipelines,
|
|
// which are not destroyed.
|
|
|
|
primitive_processor_->ClearCache();
|
|
|
|
render_target_cache_->ClearCache();
|
|
|
|
shared_memory_->ClearCache();
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool D3D12CommandProcessor::CanEndSubmissionImmediately() const {
|
|
return !submission_open_ || !pipeline_cache_->IsCreatingPipelines();
|
|
}
|
|
|
|
void D3D12CommandProcessor::ClearCommandAllocatorCache() {
|
|
while (command_allocator_submitted_first_) {
|
|
auto next = command_allocator_submitted_first_->next;
|
|
command_allocator_submitted_first_->command_allocator->Release();
|
|
delete command_allocator_submitted_first_;
|
|
command_allocator_submitted_first_ = next;
|
|
}
|
|
command_allocator_submitted_last_ = nullptr;
|
|
while (command_allocator_writable_first_) {
|
|
auto next = command_allocator_writable_first_->next;
|
|
command_allocator_writable_first_->command_allocator->Release();
|
|
delete command_allocator_writable_first_;
|
|
command_allocator_writable_first_ = next;
|
|
}
|
|
command_allocator_writable_last_ = nullptr;
|
|
}
|
|
|
|
void D3D12CommandProcessor::UpdateFixedFunctionState(
|
|
const draw_util::ViewportInfo& viewport_info,
|
|
const draw_util::Scissor& scissor, bool primitive_polygonal,
|
|
reg::RB_DEPTHCONTROL normalized_depth_control) {
|
|
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
// Viewport.
|
|
D3D12_VIEWPORT viewport;
|
|
viewport.TopLeftX = float(viewport_info.xy_offset[0]);
|
|
viewport.TopLeftY = float(viewport_info.xy_offset[1]);
|
|
viewport.Width = float(viewport_info.xy_extent[0]);
|
|
viewport.Height = float(viewport_info.xy_extent[1]);
|
|
viewport.MinDepth = viewport_info.z_min;
|
|
viewport.MaxDepth = viewport_info.z_max;
|
|
SetViewport(viewport);
|
|
|
|
// Scissor.
|
|
D3D12_RECT scissor_rect;
|
|
scissor_rect.left = LONG(scissor.offset[0]);
|
|
scissor_rect.top = LONG(scissor.offset[1]);
|
|
scissor_rect.right = LONG(scissor.offset[0] + scissor.extent[0]);
|
|
scissor_rect.bottom = LONG(scissor.offset[1] + scissor.extent[1]);
|
|
SetScissorRect(scissor_rect);
|
|
|
|
if (render_target_cache_->GetPath() ==
|
|
RenderTargetCache::Path::kHostRenderTargets) {
|
|
const RegisterFile& regs = *register_file_;
|
|
|
|
// Blend factor.
|
|
float blend_factor[] = {
|
|
regs[XE_GPU_REG_RB_BLEND_RED].f32,
|
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
|
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
|
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
|
|
};
|
|
// std::memcmp instead of != so in case of NaN, every draw won't be
|
|
// invalidating it.
|
|
ff_blend_factor_update_needed_ |=
|
|
std::memcmp(ff_blend_factor_, blend_factor, sizeof(float) * 4) != 0;
|
|
if (ff_blend_factor_update_needed_) {
|
|
std::memcpy(ff_blend_factor_, blend_factor, sizeof(float) * 4);
|
|
deferred_command_list_.D3DOMSetBlendFactor(ff_blend_factor_);
|
|
ff_blend_factor_update_needed_ = false;
|
|
}
|
|
|
|
// Stencil reference value. Per-face reference not supported by Direct3D 12,
|
|
// choose the back face one only if drawing only back faces.
|
|
Register stencil_ref_mask_reg;
|
|
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
|
if (primitive_polygonal && normalized_depth_control.backface_enable &&
|
|
pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) {
|
|
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
|
|
} else {
|
|
stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK;
|
|
}
|
|
uint32_t stencil_ref =
|
|
regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_reg).stencilref;
|
|
ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref;
|
|
if (ff_stencil_ref_update_needed_) {
|
|
ff_stencil_ref_ = stencil_ref;
|
|
deferred_command_list_.D3DOMSetStencilRef(ff_stencil_ref_);
|
|
ff_stencil_ref_update_needed_ = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|
bool shared_memory_is_uav, bool primitive_polygonal,
|
|
uint32_t line_loop_closing_index, xenos::Endian index_endian,
|
|
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
|
|
reg::RB_DEPTHCONTROL normalized_depth_control,
|
|
uint32_t normalized_color_mask) {
|
|
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
const RegisterFile& regs = *register_file_;
|
|
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
|
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
|
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
|
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
|
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
|
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
|
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
|
|
auto rb_stencilrefmask_bf =
|
|
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
|
|
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
|
auto sq_context_misc = regs.Get<reg::SQ_CONTEXT_MISC>();
|
|
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
|
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
|
uint32_t vgt_indx_offset = regs.Get<reg::VGT_INDX_OFFSET>().indx_offset;
|
|
uint32_t vgt_max_vtx_indx = regs.Get<reg::VGT_MAX_VTX_INDX>().max_indx;
|
|
uint32_t vgt_min_vtx_indx = regs.Get<reg::VGT_MIN_VTX_INDX>().min_indx;
|
|
|
|
bool edram_rov_used = render_target_cache_->GetPath() ==
|
|
RenderTargetCache::Path::kPixelShaderInterlock;
|
|
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
|
|
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
|
|
|
|
// Get the color info register values for each render target. Also, for ROV,
|
|
// exclude components that don't exist in the format from the write mask.
|
|
// Don't exclude fully overlapping render targets, however - two render
|
|
// targets with the same base address are used in the lighting pass of
|
|
// 4D5307E6, for example, with the needed one picked with dynamic control
|
|
// flow.
|
|
reg::RB_COLOR_INFO color_infos[4];
|
|
float rt_clamp[4][4];
|
|
uint32_t rt_keep_masks[4][2];
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
|
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
|
color_infos[i] = color_info;
|
|
if (edram_rov_used) {
|
|
// Get the mask for keeping previous color's components unmodified,
|
|
// or two UINT32_MAX if no colors actually existing in the RT are written.
|
|
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
|
|
color_info.color_format, (normalized_color_mask >> (i * 4)) & 0b1111,
|
|
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
|
|
rt_keep_masks[i][0], rt_keep_masks[i][1]);
|
|
}
|
|
}
|
|
|
|
// Disable depth and stencil if it aliases a color render target (for
|
|
// instance, during the XBLA logo in 58410954, though depth writing is already
|
|
// disabled there).
|
|
bool depth_stencil_enabled = normalized_depth_control.stencil_enable ||
|
|
normalized_depth_control.z_enable;
|
|
if (edram_rov_used && depth_stencil_enabled) {
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (rb_depth_info.depth_base == color_infos[i].color_base &&
|
|
(rt_keep_masks[i][0] != UINT32_MAX ||
|
|
rt_keep_masks[i][1] != UINT32_MAX)) {
|
|
depth_stencil_enabled = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool dirty = false;
|
|
|
|
// Flags.
|
|
uint32_t flags = 0;
|
|
// Whether shared memory is an SRV or a UAV. Because a resource can't be in a
|
|
// read-write (UAV) and a read-only (SRV, IBV) state at once, if any shader in
|
|
// the pipeline uses memexport, the shared memory buffer must be a UAV.
|
|
if (shared_memory_is_uav) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_SharedMemoryIsUAV;
|
|
}
|
|
// W0 division control.
|
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
|
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
|
// = false: multiply the X, Y coordinates by 1/W0.
|
|
// 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
|
// = false: multiply the Z coordinate by 1/W0.
|
|
// 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal
|
|
// to get 1/W0.
|
|
if (pa_cl_vte_cntl.vtx_xy_fmt) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW;
|
|
}
|
|
if (pa_cl_vte_cntl.vtx_z_fmt) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW;
|
|
}
|
|
if (pa_cl_vte_cntl.vtx_w0_fmt) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal;
|
|
}
|
|
// Whether the primitive is polygonal and SV_IsFrontFace matters.
|
|
if (primitive_polygonal) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_PrimitivePolygonal;
|
|
}
|
|
// Primitive type.
|
|
if (draw_util::IsPrimitiveLine(regs)) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_PrimitiveLine;
|
|
}
|
|
// Depth format.
|
|
if (rb_depth_info.depth_format == xenos::DepthRenderTargetFormat::kD24FS8) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24;
|
|
}
|
|
// Alpha test.
|
|
xenos::CompareFunction alpha_test_function =
|
|
rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func
|
|
: xenos::CompareFunction::kAlways;
|
|
flags |= uint32_t(alpha_test_function)
|
|
<< DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift;
|
|
// Gamma writing.
|
|
if (!render_target_cache_->gamma_render_target_as_srgb()) {
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (color_infos[i].color_format ==
|
|
xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ConvertColor0ToGamma << i;
|
|
}
|
|
}
|
|
}
|
|
if (edram_rov_used && depth_stencil_enabled) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil;
|
|
if (normalized_depth_control.z_enable) {
|
|
flags |= uint32_t(normalized_depth_control.zfunc)
|
|
<< DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift;
|
|
if (normalized_depth_control.z_write_enable) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite;
|
|
}
|
|
} else {
|
|
// In case stencil is used without depth testing - always pass, and
|
|
// don't modify the stored depth.
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess |
|
|
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual |
|
|
DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater;
|
|
}
|
|
if (normalized_depth_control.stencil_enable) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest;
|
|
}
|
|
// Hint - if not applicable to the shader, will not have effect.
|
|
if (alpha_test_function == xenos::CompareFunction::kAlways &&
|
|
!rb_colorcontrol.alpha_to_mask_enable) {
|
|
flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencilEarlyWrite;
|
|
}
|
|
}
|
|
dirty |= system_constants_.flags != flags;
|
|
system_constants_.flags = flags;
|
|
|
|
// Tessellation factor range, plus 1.0 according to the images in
|
|
// https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360
|
|
float tessellation_factor_min =
|
|
regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f;
|
|
float tessellation_factor_max =
|
|
regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f;
|
|
dirty |= system_constants_.tessellation_factor_range_min !=
|
|
tessellation_factor_min;
|
|
system_constants_.tessellation_factor_range_min = tessellation_factor_min;
|
|
dirty |= system_constants_.tessellation_factor_range_max !=
|
|
tessellation_factor_max;
|
|
system_constants_.tessellation_factor_range_max = tessellation_factor_max;
|
|
|
|
// Line loop closing index (or 0 when drawing other primitives or using an
|
|
// index buffer).
|
|
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
|
|
system_constants_.line_loop_closing_index = line_loop_closing_index;
|
|
|
|
// Index or tessellation edge factor buffer endianness.
|
|
dirty |= system_constants_.vertex_index_endian != index_endian;
|
|
system_constants_.vertex_index_endian = index_endian;
|
|
|
|
// Vertex index offset.
|
|
dirty |= system_constants_.vertex_index_offset != vgt_indx_offset;
|
|
system_constants_.vertex_index_offset = vgt_indx_offset;
|
|
|
|
// Vertex index range.
|
|
dirty |= system_constants_.vertex_index_min != vgt_min_vtx_indx;
|
|
dirty |= system_constants_.vertex_index_max != vgt_max_vtx_indx;
|
|
system_constants_.vertex_index_min = vgt_min_vtx_indx;
|
|
system_constants_.vertex_index_max = vgt_max_vtx_indx;
|
|
|
|
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
|
// The shader knows only the total count - tightly packing the user clip
|
|
// planes that are actually used.
|
|
if (!pa_cl_clip_cntl.clip_disable) {
|
|
float* user_clip_plane_write_ptr = system_constants_.user_clip_planes[0];
|
|
uint32_t user_clip_planes_remaining = pa_cl_clip_cntl.ucp_ena;
|
|
uint32_t user_clip_plane_index;
|
|
while (xe::bit_scan_forward(user_clip_planes_remaining,
|
|
&user_clip_plane_index)) {
|
|
user_clip_planes_remaining &= ~(UINT32_C(1) << user_clip_plane_index);
|
|
const float* user_clip_plane =
|
|
®s[XE_GPU_REG_PA_CL_UCP_0_X + user_clip_plane_index * 4].f32;
|
|
if (std::memcmp(user_clip_plane_write_ptr, user_clip_plane,
|
|
4 * sizeof(float))) {
|
|
dirty = true;
|
|
std::memcpy(user_clip_plane_write_ptr, user_clip_plane,
|
|
4 * sizeof(float));
|
|
}
|
|
user_clip_plane_write_ptr += 4;
|
|
}
|
|
}
|
|
|
|
// Conversion to Direct3D 12 normalized device coordinates.
|
|
for (uint32_t i = 0; i < 3; ++i) {
|
|
dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
|
|
dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
|
|
system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
|
|
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
|
|
}
|
|
|
|
// Point size.
|
|
if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) {
|
|
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
|
auto pa_su_point_size = regs.Get<reg::PA_SU_POINT_SIZE>();
|
|
float point_vertex_diameter_min =
|
|
float(pa_su_point_minmax.min_size) * (2.0f / 16.0f);
|
|
float point_vertex_diameter_max =
|
|
float(pa_su_point_minmax.max_size) * (2.0f / 16.0f);
|
|
float point_constant_diameter_x =
|
|
float(pa_su_point_size.width) * (2.0f / 16.0f);
|
|
float point_constant_diameter_y =
|
|
float(pa_su_point_size.height) * (2.0f / 16.0f);
|
|
dirty |= system_constants_.point_vertex_diameter_min !=
|
|
point_vertex_diameter_min;
|
|
dirty |= system_constants_.point_vertex_diameter_max !=
|
|
point_vertex_diameter_max;
|
|
dirty |= system_constants_.point_constant_diameter[0] !=
|
|
point_constant_diameter_x;
|
|
dirty |= system_constants_.point_constant_diameter[1] !=
|
|
point_constant_diameter_y;
|
|
system_constants_.point_vertex_diameter_min = point_vertex_diameter_min;
|
|
system_constants_.point_vertex_diameter_max = point_vertex_diameter_max;
|
|
system_constants_.point_constant_diameter[0] = point_constant_diameter_x;
|
|
system_constants_.point_constant_diameter[1] = point_constant_diameter_y;
|
|
// 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter
|
|
// to radius conversion to avoid multiplying the per-vertex diameter by an
|
|
// additional constant in the shader.
|
|
float point_screen_diameter_to_ndc_radius_x =
|
|
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) /
|
|
std::max(viewport_info.xy_extent[0], uint32_t(1));
|
|
float point_screen_diameter_to_ndc_radius_y =
|
|
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) /
|
|
std::max(viewport_info.xy_extent[1], uint32_t(1));
|
|
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] !=
|
|
point_screen_diameter_to_ndc_radius_x;
|
|
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] !=
|
|
point_screen_diameter_to_ndc_radius_y;
|
|
system_constants_.point_screen_diameter_to_ndc_radius[0] =
|
|
point_screen_diameter_to_ndc_radius_x;
|
|
system_constants_.point_screen_diameter_to_ndc_radius[1] =
|
|
point_screen_diameter_to_ndc_radius_y;
|
|
}
|
|
|
|
// Texture signedness / gamma.
|
|
bool gamma_render_target_as_srgb =
|
|
render_target_cache_->gamma_render_target_as_srgb();
|
|
uint32_t textures_resolved = 0;
|
|
uint32_t textures_remaining = used_texture_mask;
|
|
uint32_t texture_index;
|
|
while (xe::bit_scan_forward(textures_remaining, &texture_index)) {
|
|
textures_remaining &= ~(uint32_t(1) << texture_index);
|
|
uint32_t& texture_signs_uint =
|
|
system_constants_.texture_swizzled_signs[texture_index >> 2];
|
|
uint32_t texture_signs_shift = (texture_index & 3) * 8;
|
|
uint8_t texture_signs =
|
|
texture_cache_->GetActiveTextureSwizzledSigns(texture_index);
|
|
uint32_t texture_signs_shifted = uint32_t(texture_signs)
|
|
<< texture_signs_shift;
|
|
uint32_t texture_signs_mask = uint32_t(0b11111111) << texture_signs_shift;
|
|
dirty |= (texture_signs_uint & texture_signs_mask) != texture_signs_shifted;
|
|
texture_signs_uint =
|
|
(texture_signs_uint & ~texture_signs_mask) | texture_signs_shifted;
|
|
textures_resolved |=
|
|
uint32_t(texture_cache_->IsActiveTextureResolved(texture_index))
|
|
<< texture_index;
|
|
}
|
|
dirty |= system_constants_.textures_resolved != textures_resolved;
|
|
system_constants_.textures_resolved = textures_resolved;
|
|
|
|
// Log2 of sample count, for alpha to mask and with ROV, for EDRAM address
|
|
// calculation with MSAA.
|
|
uint32_t sample_count_log2_x =
|
|
rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X ? 1 : 0;
|
|
uint32_t sample_count_log2_y =
|
|
rb_surface_info.msaa_samples >= xenos::MsaaSamples::k2X ? 1 : 0;
|
|
dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x;
|
|
dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y;
|
|
system_constants_.sample_count_log2[0] = sample_count_log2_x;
|
|
system_constants_.sample_count_log2[1] = sample_count_log2_y;
|
|
|
|
// Alpha test and alpha to coverage.
|
|
dirty |= system_constants_.alpha_test_reference != rb_alpha_ref;
|
|
system_constants_.alpha_test_reference = rb_alpha_ref;
|
|
uint32_t alpha_to_mask = rb_colorcontrol.alpha_to_mask_enable
|
|
? (rb_colorcontrol.value >> 24) | (1 << 8)
|
|
: 0;
|
|
dirty |= system_constants_.alpha_to_mask != alpha_to_mask;
|
|
system_constants_.alpha_to_mask = alpha_to_mask;
|
|
|
|
uint32_t edram_tile_dwords_scaled =
|
|
xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples *
|
|
(draw_resolution_scale_x * draw_resolution_scale_y);
|
|
|
|
// EDRAM pitch for ROV writing.
|
|
if (edram_rov_used) {
|
|
// Align, then multiply by 32bpp tile size in dwords.
|
|
uint32_t edram_32bpp_tile_pitch_dwords_scaled =
|
|
((rb_surface_info.surface_pitch *
|
|
(rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1)) +
|
|
(xenos::kEdramTileWidthSamples - 1)) /
|
|
xenos::kEdramTileWidthSamples * edram_tile_dwords_scaled;
|
|
dirty |= system_constants_.edram_32bpp_tile_pitch_dwords_scaled !=
|
|
edram_32bpp_tile_pitch_dwords_scaled;
|
|
system_constants_.edram_32bpp_tile_pitch_dwords_scaled =
|
|
edram_32bpp_tile_pitch_dwords_scaled;
|
|
}
|
|
|
|
// Color exponent bias and ROV render target writing.
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
reg::RB_COLOR_INFO color_info = color_infos[i];
|
|
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
|
|
int32_t color_exp_bias = color_info.color_exp_bias;
|
|
if (color_info.color_format == xenos::ColorRenderTargetFormat::k_16_16 ||
|
|
color_info.color_format ==
|
|
xenos::ColorRenderTargetFormat::k_16_16_16_16) {
|
|
if (render_target_cache_->GetPath() ==
|
|
RenderTargetCache::Path::kHostRenderTargets &&
|
|
!render_target_cache_->IsFixed16TruncatedToMinus1To1()) {
|
|
// Remap from -32...32 to -1...1 by dividing the output values by 32,
|
|
// losing blending correctness, but getting the full range.
|
|
color_exp_bias -= 5;
|
|
}
|
|
}
|
|
float color_exp_bias_scale;
|
|
*reinterpret_cast<int32_t*>(&color_exp_bias_scale) =
|
|
0x3F800000 + (color_exp_bias << 23);
|
|
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
|
|
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
|
|
if (edram_rov_used) {
|
|
dirty |=
|
|
system_constants_.edram_rt_keep_mask[i][0] != rt_keep_masks[i][0];
|
|
system_constants_.edram_rt_keep_mask[i][0] = rt_keep_masks[i][0];
|
|
dirty |=
|
|
system_constants_.edram_rt_keep_mask[i][1] != rt_keep_masks[i][1];
|
|
system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1];
|
|
if (rt_keep_masks[i][0] != UINT32_MAX ||
|
|
rt_keep_masks[i][1] != UINT32_MAX) {
|
|
uint32_t rt_base_dwords_scaled =
|
|
color_info.color_base * edram_tile_dwords_scaled;
|
|
dirty |= system_constants_.edram_rt_base_dwords_scaled[i] !=
|
|
rt_base_dwords_scaled;
|
|
system_constants_.edram_rt_base_dwords_scaled[i] =
|
|
rt_base_dwords_scaled;
|
|
uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags(
|
|
color_info.color_format);
|
|
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
|
|
system_constants_.edram_rt_format_flags[i] = format_flags;
|
|
// Can't do float comparisons here because NaNs would result in always
|
|
// setting the dirty flag.
|
|
dirty |= std::memcmp(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
|
4 * sizeof(float)) != 0;
|
|
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
|
4 * sizeof(float));
|
|
uint32_t blend_factors_ops =
|
|
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
|
|
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
|
|
blend_factors_ops;
|
|
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (edram_rov_used) {
|
|
uint32_t depth_base_dwords_scaled =
|
|
rb_depth_info.depth_base * edram_tile_dwords_scaled;
|
|
dirty |= system_constants_.edram_depth_base_dwords_scaled !=
|
|
depth_base_dwords_scaled;
|
|
system_constants_.edram_depth_base_dwords_scaled = depth_base_dwords_scaled;
|
|
|
|
// For non-polygons, front polygon offset is used, and it's enabled if
|
|
// POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back
|
|
// are used.
|
|
float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f;
|
|
float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f;
|
|
if (primitive_polygonal) {
|
|
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
|
|
poly_offset_front_scale =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
|
poly_offset_front_offset =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
|
}
|
|
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
|
|
poly_offset_back_scale =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
|
poly_offset_back_offset =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
|
}
|
|
} else {
|
|
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
|
poly_offset_front_scale =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
|
poly_offset_front_offset =
|
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
|
poly_offset_back_scale = poly_offset_front_scale;
|
|
poly_offset_back_offset = poly_offset_front_offset;
|
|
}
|
|
}
|
|
// With non-square resolution scaling, make sure the worst-case impact is
|
|
// reverted (slope only along the scaled axis), thus max. More bias is
|
|
// better than less bias, because less bias means Z fighting with the
|
|
// background is more likely.
|
|
float poly_offset_scale_factor =
|
|
xenos::kPolygonOffsetScaleSubpixelUnit *
|
|
std::max(draw_resolution_scale_x, draw_resolution_scale_y);
|
|
poly_offset_front_scale *= poly_offset_scale_factor;
|
|
poly_offset_back_scale *= poly_offset_scale_factor;
|
|
dirty |= system_constants_.edram_poly_offset_front_scale !=
|
|
poly_offset_front_scale;
|
|
system_constants_.edram_poly_offset_front_scale = poly_offset_front_scale;
|
|
dirty |= system_constants_.edram_poly_offset_front_offset !=
|
|
poly_offset_front_offset;
|
|
system_constants_.edram_poly_offset_front_offset = poly_offset_front_offset;
|
|
dirty |= system_constants_.edram_poly_offset_back_scale !=
|
|
poly_offset_back_scale;
|
|
system_constants_.edram_poly_offset_back_scale = poly_offset_back_scale;
|
|
dirty |= system_constants_.edram_poly_offset_back_offset !=
|
|
poly_offset_back_offset;
|
|
system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset;
|
|
|
|
if (depth_stencil_enabled && normalized_depth_control.stencil_enable) {
|
|
dirty |= system_constants_.edram_stencil_front_reference !=
|
|
rb_stencilrefmask.stencilref;
|
|
system_constants_.edram_stencil_front_reference =
|
|
rb_stencilrefmask.stencilref;
|
|
dirty |= system_constants_.edram_stencil_front_read_mask !=
|
|
rb_stencilrefmask.stencilmask;
|
|
system_constants_.edram_stencil_front_read_mask =
|
|
rb_stencilrefmask.stencilmask;
|
|
dirty |= system_constants_.edram_stencil_front_write_mask !=
|
|
rb_stencilrefmask.stencilwritemask;
|
|
system_constants_.edram_stencil_front_write_mask =
|
|
rb_stencilrefmask.stencilwritemask;
|
|
uint32_t stencil_func_ops =
|
|
(normalized_depth_control.value >> 8) & ((1 << 12) - 1);
|
|
dirty |=
|
|
system_constants_.edram_stencil_front_func_ops != stencil_func_ops;
|
|
system_constants_.edram_stencil_front_func_ops = stencil_func_ops;
|
|
|
|
if (primitive_polygonal && normalized_depth_control.backface_enable) {
|
|
dirty |= system_constants_.edram_stencil_back_reference !=
|
|
rb_stencilrefmask_bf.stencilref;
|
|
system_constants_.edram_stencil_back_reference =
|
|
rb_stencilrefmask_bf.stencilref;
|
|
dirty |= system_constants_.edram_stencil_back_read_mask !=
|
|
rb_stencilrefmask_bf.stencilmask;
|
|
system_constants_.edram_stencil_back_read_mask =
|
|
rb_stencilrefmask_bf.stencilmask;
|
|
dirty |= system_constants_.edram_stencil_back_write_mask !=
|
|
rb_stencilrefmask_bf.stencilwritemask;
|
|
system_constants_.edram_stencil_back_write_mask =
|
|
rb_stencilrefmask_bf.stencilwritemask;
|
|
uint32_t stencil_func_ops_bf =
|
|
(normalized_depth_control.value >> 20) & ((1 << 12) - 1);
|
|
dirty |= system_constants_.edram_stencil_back_func_ops !=
|
|
stencil_func_ops_bf;
|
|
system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf;
|
|
} else {
|
|
dirty |= std::memcmp(system_constants_.edram_stencil_back,
|
|
system_constants_.edram_stencil_front,
|
|
4 * sizeof(uint32_t)) != 0;
|
|
std::memcpy(system_constants_.edram_stencil_back,
|
|
system_constants_.edram_stencil_front,
|
|
4 * sizeof(uint32_t));
|
|
}
|
|
}
|
|
|
|
dirty |= system_constants_.edram_blend_constant[0] !=
|
|
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
|
system_constants_.edram_blend_constant[0] =
|
|
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
|
dirty |= system_constants_.edram_blend_constant[1] !=
|
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
|
system_constants_.edram_blend_constant[1] =
|
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
|
dirty |= system_constants_.edram_blend_constant[2] !=
|
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
|
system_constants_.edram_blend_constant[2] =
|
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
|
dirty |= system_constants_.edram_blend_constant[3] !=
|
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
|
system_constants_.edram_blend_constant[3] =
|
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
|
}
|
|
|
|
cbuffer_binding_system_.up_to_date &= !dirty;
|
|
}
|
|
|
|
bool D3D12CommandProcessor::UpdateBindings(
|
|
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
|
|
ID3D12RootSignature* root_signature) {
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
ID3D12Device* device = provider.GetDevice();
|
|
const RegisterFile& regs = *register_file_;
|
|
|
|
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
// Set the new root signature.
|
|
if (current_graphics_root_signature_ != root_signature) {
|
|
current_graphics_root_signature_ = root_signature;
|
|
if (!bindless_resources_used_) {
|
|
GetRootBindfulExtraParameterIndices(
|
|
vertex_shader, pixel_shader, current_graphics_root_bindful_extras_);
|
|
}
|
|
// Changing the root signature invalidates all bindings.
|
|
current_graphics_root_up_to_date_ = 0;
|
|
deferred_command_list_.D3DSetGraphicsRootSignature(root_signature);
|
|
}
|
|
|
|
// Select the root parameter indices depending on the used binding model.
|
|
uint32_t root_parameter_fetch_constants =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_FetchConstants
|
|
: kRootParameter_Bindful_FetchConstants;
|
|
uint32_t root_parameter_float_constants_vertex =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsVertex
|
|
: kRootParameter_Bindful_FloatConstantsVertex;
|
|
uint32_t root_parameter_float_constants_pixel =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsPixel
|
|
: kRootParameter_Bindful_FloatConstantsPixel;
|
|
uint32_t root_parameter_system_constants =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_SystemConstants
|
|
: kRootParameter_Bindful_SystemConstants;
|
|
uint32_t root_parameter_bool_loop_constants =
|
|
bindless_resources_used_ ? kRootParameter_Bindless_BoolLoopConstants
|
|
: kRootParameter_Bindful_BoolLoopConstants;
|
|
|
|
//
|
|
// Update root constant buffers that are common for bindful and bindless.
|
|
//
|
|
|
|
// These are the constant base addresses/ranges for shaders.
|
|
// We have these hardcoded right now cause nothing seems to differ on the Xbox
|
|
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
|
|
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
|
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
|
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
|
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
|
// Check if the float constant layout is still the same and get the counts.
|
|
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
|
vertex_shader->constant_register_map();
|
|
uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count;
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (current_float_constant_map_vertex_[i] !=
|
|
float_constant_map_vertex.float_bitmap[i]) {
|
|
current_float_constant_map_vertex_[i] =
|
|
float_constant_map_vertex.float_bitmap[i];
|
|
// If no float constants at all, we can reuse any buffer for them, so not
|
|
// invalidating.
|
|
if (float_constant_count_vertex) {
|
|
cbuffer_binding_float_vertex_.up_to_date = false;
|
|
}
|
|
}
|
|
}
|
|
uint32_t float_constant_count_pixel = 0;
|
|
if (pixel_shader != nullptr) {
|
|
const Shader::ConstantRegisterMap& float_constant_map_pixel =
|
|
pixel_shader->constant_register_map();
|
|
float_constant_count_pixel = float_constant_map_pixel.float_count;
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (current_float_constant_map_pixel_[i] !=
|
|
float_constant_map_pixel.float_bitmap[i]) {
|
|
current_float_constant_map_pixel_[i] =
|
|
float_constant_map_pixel.float_bitmap[i];
|
|
if (float_constant_count_pixel) {
|
|
cbuffer_binding_float_pixel_.up_to_date = false;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
std::memset(current_float_constant_map_pixel_, 0,
|
|
sizeof(current_float_constant_map_pixel_));
|
|
}
|
|
|
|
// Write the constant buffer data.
|
|
if (!cbuffer_binding_system_.up_to_date) {
|
|
uint8_t* system_constants = constant_buffer_pool_->Request(
|
|
frame_current_, sizeof(system_constants_),
|
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
|
&cbuffer_binding_system_.address);
|
|
if (system_constants == nullptr) {
|
|
return false;
|
|
}
|
|
std::memcpy(system_constants, &system_constants_,
|
|
sizeof(system_constants_));
|
|
cbuffer_binding_system_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_system_constants);
|
|
}
|
|
if (!cbuffer_binding_float_vertex_.up_to_date) {
|
|
// Even if the shader doesn't need any float constants, a valid binding must
|
|
// still be provided, so if the first draw in the frame with the current
|
|
// root signature doesn't have float constants at all, still allocate an
|
|
// empty buffer.
|
|
uint8_t* float_constants = constant_buffer_pool_->Request(
|
|
frame_current_,
|
|
sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)),
|
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
|
&cbuffer_binding_float_vertex_.address);
|
|
if (float_constants == nullptr) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
uint64_t float_constant_map_entry =
|
|
float_constant_map_vertex.float_bitmap[i];
|
|
uint32_t float_constant_index;
|
|
while (xe::bit_scan_forward(float_constant_map_entry,
|
|
&float_constant_index)) {
|
|
float_constant_map_entry &= ~(1ull << float_constant_index);
|
|
std::memcpy(float_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
|
(float_constant_index << 2)]
|
|
.f32,
|
|
4 * sizeof(float));
|
|
float_constants += 4 * sizeof(float);
|
|
}
|
|
}
|
|
cbuffer_binding_float_vertex_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_float_constants_vertex);
|
|
}
|
|
if (!cbuffer_binding_float_pixel_.up_to_date) {
|
|
uint8_t* float_constants = constant_buffer_pool_->Request(
|
|
frame_current_,
|
|
sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)),
|
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
|
&cbuffer_binding_float_pixel_.address);
|
|
if (float_constants == nullptr) {
|
|
return false;
|
|
}
|
|
if (pixel_shader != nullptr) {
|
|
const Shader::ConstantRegisterMap& float_constant_map_pixel =
|
|
pixel_shader->constant_register_map();
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
uint64_t float_constant_map_entry =
|
|
float_constant_map_pixel.float_bitmap[i];
|
|
uint32_t float_constant_index;
|
|
while (xe::bit_scan_forward(float_constant_map_entry,
|
|
&float_constant_index)) {
|
|
float_constant_map_entry &= ~(1ull << float_constant_index);
|
|
std::memcpy(float_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
|
(float_constant_index << 2)]
|
|
.f32,
|
|
4 * sizeof(float));
|
|
float_constants += 4 * sizeof(float);
|
|
}
|
|
}
|
|
}
|
|
cbuffer_binding_float_pixel_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_float_constants_pixel);
|
|
}
|
|
if (!cbuffer_binding_bool_loop_.up_to_date) {
|
|
constexpr uint32_t kBoolLoopConstantsSize = (8 + 32) * sizeof(uint32_t);
|
|
uint8_t* bool_loop_constants = constant_buffer_pool_->Request(
|
|
frame_current_, kBoolLoopConstantsSize,
|
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
|
&cbuffer_binding_bool_loop_.address);
|
|
if (bool_loop_constants == nullptr) {
|
|
return false;
|
|
}
|
|
std::memcpy(bool_loop_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
|
kBoolLoopConstantsSize);
|
|
cbuffer_binding_bool_loop_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_bool_loop_constants);
|
|
}
|
|
if (!cbuffer_binding_fetch_.up_to_date) {
|
|
constexpr uint32_t kFetchConstantsSize = 32 * 6 * sizeof(uint32_t);
|
|
uint8_t* fetch_constants = constant_buffer_pool_->Request(
|
|
frame_current_, kFetchConstantsSize,
|
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
|
&cbuffer_binding_fetch_.address);
|
|
if (fetch_constants == nullptr) {
|
|
return false;
|
|
}
|
|
std::memcpy(fetch_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
|
kFetchConstantsSize);
|
|
cbuffer_binding_fetch_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << root_parameter_fetch_constants);
|
|
}
|
|
|
|
//
|
|
// Update descriptors.
|
|
//
|
|
|
|
// Get textures and samplers used by the vertex shader, check if the last used
|
|
// samplers are compatible and update them.
|
|
size_t texture_layout_uid_vertex =
|
|
vertex_shader->GetTextureBindingLayoutUserUID();
|
|
size_t sampler_layout_uid_vertex =
|
|
vertex_shader->GetSamplerBindingLayoutUserUID();
|
|
const std::vector<D3D12Shader::TextureBinding>& textures_vertex =
|
|
vertex_shader->GetTextureBindingsAfterTranslation();
|
|
const std::vector<D3D12Shader::SamplerBinding>& samplers_vertex =
|
|
vertex_shader->GetSamplerBindingsAfterTranslation();
|
|
size_t texture_count_vertex = textures_vertex.size();
|
|
size_t sampler_count_vertex = samplers_vertex.size();
|
|
if (sampler_count_vertex) {
|
|
if (current_sampler_layout_uid_vertex_ != sampler_layout_uid_vertex) {
|
|
current_sampler_layout_uid_vertex_ = sampler_layout_uid_vertex;
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
bindful_samplers_written_vertex_ = false;
|
|
}
|
|
current_samplers_vertex_.resize(
|
|
std::max(current_samplers_vertex_.size(), sampler_count_vertex));
|
|
for (size_t i = 0; i < sampler_count_vertex; ++i) {
|
|
D3D12TextureCache::SamplerParameters parameters =
|
|
texture_cache_->GetSamplerParameters(samplers_vertex[i]);
|
|
if (current_samplers_vertex_[i] != parameters) {
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
bindful_samplers_written_vertex_ = false;
|
|
current_samplers_vertex_[i] = parameters;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get textures and samplers used by the pixel shader, check if the last used
|
|
// samplers are compatible and update them.
|
|
size_t texture_layout_uid_pixel, sampler_layout_uid_pixel;
|
|
const std::vector<D3D12Shader::TextureBinding>* textures_pixel;
|
|
const std::vector<D3D12Shader::SamplerBinding>* samplers_pixel;
|
|
size_t texture_count_pixel, sampler_count_pixel;
|
|
if (pixel_shader != nullptr) {
|
|
texture_layout_uid_pixel = pixel_shader->GetTextureBindingLayoutUserUID();
|
|
sampler_layout_uid_pixel = pixel_shader->GetSamplerBindingLayoutUserUID();
|
|
textures_pixel = &pixel_shader->GetTextureBindingsAfterTranslation();
|
|
texture_count_pixel = textures_pixel->size();
|
|
samplers_pixel = &pixel_shader->GetSamplerBindingsAfterTranslation();
|
|
sampler_count_pixel = samplers_pixel->size();
|
|
if (sampler_count_pixel) {
|
|
if (current_sampler_layout_uid_pixel_ != sampler_layout_uid_pixel) {
|
|
current_sampler_layout_uid_pixel_ = sampler_layout_uid_pixel;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
current_samplers_pixel_.resize(std::max(current_samplers_pixel_.size(),
|
|
size_t(sampler_count_pixel)));
|
|
for (uint32_t i = 0; i < sampler_count_pixel; ++i) {
|
|
D3D12TextureCache::SamplerParameters parameters =
|
|
texture_cache_->GetSamplerParameters((*samplers_pixel)[i]);
|
|
if (current_samplers_pixel_[i] != parameters) {
|
|
current_samplers_pixel_[i] = parameters;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
texture_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty;
|
|
sampler_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty;
|
|
textures_pixel = nullptr;
|
|
texture_count_pixel = 0;
|
|
samplers_pixel = nullptr;
|
|
sampler_count_pixel = 0;
|
|
}
|
|
|
|
assert_true(sampler_count_vertex + sampler_count_pixel <= kSamplerHeapSize);
|
|
|
|
if (bindless_resources_used_) {
|
|
//
|
|
// Bindless descriptors path.
|
|
//
|
|
|
|
// Check if need to write new descriptor indices.
|
|
// Samplers have already been checked.
|
|
if (texture_count_vertex &&
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date &&
|
|
(current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
|
texture_count_vertex))) {
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
}
|
|
if (texture_count_pixel &&
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date &&
|
|
(current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
|
texture_count_pixel))) {
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
}
|
|
|
|
// Get sampler descriptor indices, write new samplers, and handle sampler
|
|
// heap overflow if it happens.
|
|
if ((sampler_count_vertex &&
|
|
!cbuffer_binding_descriptor_indices_vertex_.up_to_date) ||
|
|
(sampler_count_pixel &&
|
|
!cbuffer_binding_descriptor_indices_pixel_.up_to_date)) {
|
|
for (uint32_t i = 0; i < 2; ++i) {
|
|
if (i) {
|
|
// Overflow happened - invalidate sampler bindings because their
|
|
// descriptor indices can't be used anymore (and even if heap creation
|
|
// fails, because current_sampler_bindless_indices_#_ are in an
|
|
// undefined state now) and switch to a new sampler heap.
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = false;
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = false;
|
|
ID3D12DescriptorHeap* sampler_heap_new;
|
|
if (!sampler_bindless_heaps_overflowed_.empty() &&
|
|
sampler_bindless_heaps_overflowed_.front().second <=
|
|
submission_completed_) {
|
|
sampler_heap_new = sampler_bindless_heaps_overflowed_.front().first;
|
|
sampler_bindless_heaps_overflowed_.pop_front();
|
|
} else {
|
|
D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_new_desc;
|
|
sampler_heap_new_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
|
|
sampler_heap_new_desc.NumDescriptors = kSamplerHeapSize;
|
|
sampler_heap_new_desc.Flags =
|
|
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
sampler_heap_new_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&sampler_heap_new_desc, IID_PPV_ARGS(&sampler_heap_new)))) {
|
|
XELOGE(
|
|
"Failed to create a new bindless sampler descriptor heap "
|
|
"after an overflow of the previous one");
|
|
return false;
|
|
}
|
|
}
|
|
// Only change the heap if a new heap was created successfully, not to
|
|
// leave the values in an undefined state in case CreateDescriptorHeap
|
|
// has failed.
|
|
sampler_bindless_heaps_overflowed_.push_back(std::make_pair(
|
|
sampler_bindless_heap_current_, submission_current_));
|
|
sampler_bindless_heap_current_ = sampler_heap_new;
|
|
sampler_bindless_heap_cpu_start_ =
|
|
sampler_bindless_heap_current_
|
|
->GetCPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_gpu_start_ =
|
|
sampler_bindless_heap_current_
|
|
->GetGPUDescriptorHandleForHeapStart();
|
|
sampler_bindless_heap_allocated_ = 0;
|
|
// The only thing the heap is used for now is texture cache samplers -
|
|
// invalidate all of them.
|
|
texture_cache_bindless_sampler_map_.clear();
|
|
deferred_command_list_.SetDescriptorHeaps(
|
|
view_bindless_heap_, sampler_bindless_heap_current_);
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindless_SamplerHeap);
|
|
}
|
|
bool samplers_overflowed = false;
|
|
if (sampler_count_vertex &&
|
|
!cbuffer_binding_descriptor_indices_vertex_.up_to_date) {
|
|
current_sampler_bindless_indices_vertex_.resize(
|
|
std::max(current_sampler_bindless_indices_vertex_.size(),
|
|
size_t(sampler_count_vertex)));
|
|
for (uint32_t j = 0; j < sampler_count_vertex; ++j) {
|
|
D3D12TextureCache::SamplerParameters sampler_parameters =
|
|
current_samplers_vertex_[j];
|
|
uint32_t sampler_index;
|
|
auto it = texture_cache_bindless_sampler_map_.find(
|
|
sampler_parameters.value);
|
|
if (it != texture_cache_bindless_sampler_map_.end()) {
|
|
sampler_index = it->second;
|
|
} else {
|
|
if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) {
|
|
samplers_overflowed = true;
|
|
break;
|
|
}
|
|
sampler_index = sampler_bindless_heap_allocated_++;
|
|
texture_cache_->WriteSampler(
|
|
sampler_parameters,
|
|
provider.OffsetSamplerDescriptor(
|
|
sampler_bindless_heap_cpu_start_, sampler_index));
|
|
texture_cache_bindless_sampler_map_.emplace(
|
|
sampler_parameters.value, sampler_index);
|
|
}
|
|
current_sampler_bindless_indices_vertex_[j] = sampler_index;
|
|
}
|
|
}
|
|
if (samplers_overflowed) {
|
|
continue;
|
|
}
|
|
if (sampler_count_pixel &&
|
|
!cbuffer_binding_descriptor_indices_pixel_.up_to_date) {
|
|
current_sampler_bindless_indices_pixel_.resize(
|
|
std::max(current_sampler_bindless_indices_pixel_.size(),
|
|
size_t(sampler_count_pixel)));
|
|
for (uint32_t j = 0; j < sampler_count_pixel; ++j) {
|
|
D3D12TextureCache::SamplerParameters sampler_parameters =
|
|
current_samplers_pixel_[j];
|
|
uint32_t sampler_index;
|
|
auto it = texture_cache_bindless_sampler_map_.find(
|
|
sampler_parameters.value);
|
|
if (it != texture_cache_bindless_sampler_map_.end()) {
|
|
sampler_index = it->second;
|
|
} else {
|
|
if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) {
|
|
samplers_overflowed = true;
|
|
break;
|
|
}
|
|
sampler_index = sampler_bindless_heap_allocated_++;
|
|
texture_cache_->WriteSampler(
|
|
sampler_parameters,
|
|
provider.OffsetSamplerDescriptor(
|
|
sampler_bindless_heap_cpu_start_, sampler_index));
|
|
texture_cache_bindless_sampler_map_.emplace(
|
|
sampler_parameters.value, sampler_index);
|
|
}
|
|
current_sampler_bindless_indices_pixel_[j] = sampler_index;
|
|
}
|
|
}
|
|
if (!samplers_overflowed) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!cbuffer_binding_descriptor_indices_vertex_.up_to_date) {
|
|
uint32_t* descriptor_indices =
|
|
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
|
frame_current_,
|
|
std::max(texture_count_vertex + sampler_count_vertex, size_t(1)) *
|
|
sizeof(uint32_t),
|
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
|
&cbuffer_binding_descriptor_indices_vertex_.address));
|
|
if (!descriptor_indices) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i < texture_count_vertex; ++i) {
|
|
const D3D12Shader::TextureBinding& texture = textures_vertex[i];
|
|
descriptor_indices[texture.bindless_descriptor_index] =
|
|
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
|
|
uint32_t(SystemBindlessView::kUnboundedSRVsStart);
|
|
}
|
|
current_texture_layout_uid_vertex_ = texture_layout_uid_vertex;
|
|
if (texture_count_vertex) {
|
|
current_texture_srv_keys_vertex_.resize(
|
|
std::max(current_texture_srv_keys_vertex_.size(),
|
|
size_t(texture_count_vertex)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
|
texture_count_vertex);
|
|
}
|
|
// Current samplers have already been updated.
|
|
for (size_t i = 0; i < sampler_count_vertex; ++i) {
|
|
descriptor_indices[samplers_vertex[i].bindless_descriptor_index] =
|
|
current_sampler_bindless_indices_vertex_[i];
|
|
}
|
|
cbuffer_binding_descriptor_indices_vertex_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindless_DescriptorIndicesVertex);
|
|
}
|
|
|
|
if (!cbuffer_binding_descriptor_indices_pixel_.up_to_date) {
|
|
uint32_t* descriptor_indices =
|
|
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
|
frame_current_,
|
|
std::max(texture_count_pixel + sampler_count_pixel, size_t(1)) *
|
|
sizeof(uint32_t),
|
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
|
&cbuffer_binding_descriptor_indices_pixel_.address));
|
|
if (!descriptor_indices) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i < texture_count_pixel; ++i) {
|
|
const D3D12Shader::TextureBinding& texture = (*textures_pixel)[i];
|
|
descriptor_indices[texture.bindless_descriptor_index] =
|
|
texture_cache_->GetActiveTextureBindlessSRVIndex(texture) -
|
|
uint32_t(SystemBindlessView::kUnboundedSRVsStart);
|
|
}
|
|
current_texture_layout_uid_pixel_ = texture_layout_uid_pixel;
|
|
if (texture_count_pixel) {
|
|
current_texture_srv_keys_pixel_.resize(
|
|
std::max(current_texture_srv_keys_pixel_.size(),
|
|
size_t(texture_count_pixel)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
|
texture_count_pixel);
|
|
}
|
|
// Current samplers have already been updated.
|
|
for (size_t i = 0; i < sampler_count_pixel; ++i) {
|
|
descriptor_indices[(*samplers_pixel)[i].bindless_descriptor_index] =
|
|
current_sampler_bindless_indices_pixel_[i];
|
|
}
|
|
cbuffer_binding_descriptor_indices_pixel_.up_to_date = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindless_DescriptorIndicesPixel);
|
|
}
|
|
} else {
|
|
//
|
|
// Bindful descriptors path.
|
|
//
|
|
|
|
// See what descriptors need to be updated.
|
|
// Samplers have already been checked.
|
|
bool write_textures_vertex =
|
|
texture_count_vertex &&
|
|
(!bindful_textures_written_vertex_ ||
|
|
current_texture_layout_uid_vertex_ != texture_layout_uid_vertex ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
|
texture_count_vertex));
|
|
bool write_textures_pixel =
|
|
texture_count_pixel &&
|
|
(!bindful_textures_written_pixel_ ||
|
|
current_texture_layout_uid_pixel_ != texture_layout_uid_pixel ||
|
|
!texture_cache_->AreActiveTextureSRVKeysUpToDate(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
|
texture_count_pixel));
|
|
bool write_samplers_vertex =
|
|
sampler_count_vertex && !bindful_samplers_written_vertex_;
|
|
bool write_samplers_pixel =
|
|
sampler_count_pixel && !bindful_samplers_written_pixel_;
|
|
bool edram_rov_used = render_target_cache_->GetPath() ==
|
|
RenderTargetCache::Path::kPixelShaderInterlock;
|
|
|
|
// Allocate the descriptors.
|
|
size_t view_count_partial_update = 0;
|
|
if (write_textures_vertex) {
|
|
view_count_partial_update += texture_count_vertex;
|
|
}
|
|
if (write_textures_pixel) {
|
|
view_count_partial_update += texture_count_pixel;
|
|
}
|
|
// All the constants + shared memory SRV and UAV + textures.
|
|
size_t view_count_full_update =
|
|
2 + texture_count_vertex + texture_count_pixel;
|
|
if (edram_rov_used) {
|
|
// + EDRAM UAV.
|
|
++view_count_full_update;
|
|
}
|
|
D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle;
|
|
uint32_t descriptor_size_view = provider.GetViewDescriptorSize();
|
|
uint64_t view_heap_index = RequestViewBindfulDescriptors(
|
|
draw_view_bindful_heap_index_, uint32_t(view_count_partial_update),
|
|
uint32_t(view_count_full_update), view_cpu_handle, view_gpu_handle);
|
|
if (view_heap_index ==
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
|
XELOGE("Failed to allocate view descriptors");
|
|
return false;
|
|
}
|
|
size_t sampler_count_partial_update = 0;
|
|
if (write_samplers_vertex) {
|
|
sampler_count_partial_update += sampler_count_vertex;
|
|
}
|
|
if (write_samplers_pixel) {
|
|
sampler_count_partial_update += sampler_count_pixel;
|
|
}
|
|
D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu_handle = {};
|
|
D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {};
|
|
uint32_t descriptor_size_sampler = provider.GetSamplerDescriptorSize();
|
|
uint64_t sampler_heap_index =
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid;
|
|
if (sampler_count_vertex != 0 || sampler_count_pixel != 0) {
|
|
sampler_heap_index = RequestSamplerBindfulDescriptors(
|
|
draw_sampler_bindful_heap_index_,
|
|
uint32_t(sampler_count_partial_update),
|
|
uint32_t(sampler_count_vertex + sampler_count_pixel),
|
|
sampler_cpu_handle, sampler_gpu_handle);
|
|
if (sampler_heap_index ==
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
|
XELOGE("Failed to allocate sampler descriptors");
|
|
return false;
|
|
}
|
|
}
|
|
if (draw_view_bindful_heap_index_ != view_heap_index) {
|
|
// Need to update all view descriptors.
|
|
write_textures_vertex = texture_count_vertex != 0;
|
|
write_textures_pixel = texture_count_pixel != 0;
|
|
bindful_textures_written_vertex_ = false;
|
|
bindful_textures_written_pixel_ = false;
|
|
// If updating fully, write the shared memory SRV and UAV descriptors and,
|
|
// if needed, the EDRAM descriptor.
|
|
gpu_handle_shared_memory_and_edram_ = view_gpu_handle;
|
|
shared_memory_->WriteRawSRVDescriptor(view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
shared_memory_->WriteRawUAVDescriptor(view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
if (edram_rov_used) {
|
|
render_target_cache_->WriteEdramUintPow2UAVDescriptor(view_cpu_handle,
|
|
2);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
}
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << kRootParameter_Bindful_SharedMemoryAndEdram);
|
|
}
|
|
if (sampler_heap_index !=
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid &&
|
|
draw_sampler_bindful_heap_index_ != sampler_heap_index) {
|
|
write_samplers_vertex = sampler_count_vertex != 0;
|
|
write_samplers_pixel = sampler_count_pixel != 0;
|
|
bindful_samplers_written_vertex_ = false;
|
|
bindful_samplers_written_pixel_ = false;
|
|
}
|
|
|
|
// Write the descriptors.
|
|
if (write_textures_vertex) {
|
|
assert_true(current_graphics_root_bindful_extras_.textures_vertex !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_textures_vertex_ = view_gpu_handle;
|
|
for (size_t i = 0; i < texture_count_vertex; ++i) {
|
|
texture_cache_->WriteActiveTextureBindfulSRV(textures_vertex[i],
|
|
view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
}
|
|
current_texture_layout_uid_vertex_ = texture_layout_uid_vertex;
|
|
current_texture_srv_keys_vertex_.resize(
|
|
std::max(current_texture_srv_keys_vertex_.size(),
|
|
size_t(texture_count_vertex)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_vertex_.data(), textures_vertex.data(),
|
|
texture_count_vertex);
|
|
bindful_textures_written_vertex_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.textures_vertex);
|
|
}
|
|
if (write_textures_pixel) {
|
|
assert_true(current_graphics_root_bindful_extras_.textures_pixel !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_textures_pixel_ = view_gpu_handle;
|
|
for (size_t i = 0; i < texture_count_pixel; ++i) {
|
|
texture_cache_->WriteActiveTextureBindfulSRV((*textures_pixel)[i],
|
|
view_cpu_handle);
|
|
view_cpu_handle.ptr += descriptor_size_view;
|
|
view_gpu_handle.ptr += descriptor_size_view;
|
|
}
|
|
current_texture_layout_uid_pixel_ = texture_layout_uid_pixel;
|
|
current_texture_srv_keys_pixel_.resize(std::max(
|
|
current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel)));
|
|
texture_cache_->WriteActiveTextureSRVKeys(
|
|
current_texture_srv_keys_pixel_.data(), textures_pixel->data(),
|
|
texture_count_pixel);
|
|
bindful_textures_written_pixel_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.textures_pixel);
|
|
}
|
|
if (write_samplers_vertex) {
|
|
assert_true(current_graphics_root_bindful_extras_.samplers_vertex !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_samplers_vertex_ = sampler_gpu_handle;
|
|
for (size_t i = 0; i < sampler_count_vertex; ++i) {
|
|
texture_cache_->WriteSampler(current_samplers_vertex_[i],
|
|
sampler_cpu_handle);
|
|
sampler_cpu_handle.ptr += descriptor_size_sampler;
|
|
sampler_gpu_handle.ptr += descriptor_size_sampler;
|
|
}
|
|
// Current samplers have already been updated.
|
|
bindful_samplers_written_vertex_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.samplers_vertex);
|
|
}
|
|
if (write_samplers_pixel) {
|
|
assert_true(current_graphics_root_bindful_extras_.samplers_pixel !=
|
|
RootBindfulExtraParameterIndices::kUnavailable);
|
|
gpu_handle_samplers_pixel_ = sampler_gpu_handle;
|
|
for (size_t i = 0; i < sampler_count_pixel; ++i) {
|
|
texture_cache_->WriteSampler(current_samplers_pixel_[i],
|
|
sampler_cpu_handle);
|
|
sampler_cpu_handle.ptr += descriptor_size_sampler;
|
|
sampler_gpu_handle.ptr += descriptor_size_sampler;
|
|
}
|
|
// Current samplers have already been updated.
|
|
bindful_samplers_written_pixel_ = true;
|
|
current_graphics_root_up_to_date_ &=
|
|
~(1u << current_graphics_root_bindful_extras_.samplers_pixel);
|
|
}
|
|
|
|
// Wrote new descriptors on the current page.
|
|
draw_view_bindful_heap_index_ = view_heap_index;
|
|
if (sampler_heap_index !=
|
|
ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) {
|
|
draw_sampler_bindful_heap_index_ = sampler_heap_index;
|
|
}
|
|
}
|
|
|
|
// Update the root parameters.
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_fetch_constants))) {
|
|
deferred_command_list_.D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_fetch_constants, cbuffer_binding_fetch_.address);
|
|
current_graphics_root_up_to_date_ |= 1u << root_parameter_fetch_constants;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_float_constants_vertex))) {
|
|
deferred_command_list_.D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_float_constants_vertex,
|
|
cbuffer_binding_float_vertex_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << root_parameter_float_constants_vertex;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_float_constants_pixel))) {
|
|
deferred_command_list_.D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_float_constants_pixel,
|
|
cbuffer_binding_float_pixel_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << root_parameter_float_constants_pixel;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_system_constants))) {
|
|
deferred_command_list_.D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_system_constants, cbuffer_binding_system_.address);
|
|
current_graphics_root_up_to_date_ |= 1u << root_parameter_system_constants;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << root_parameter_bool_loop_constants))) {
|
|
deferred_command_list_.D3DSetGraphicsRootConstantBufferView(
|
|
root_parameter_bool_loop_constants, cbuffer_binding_bool_loop_.address);
|
|
current_graphics_root_up_to_date_ |= 1u
|
|
<< root_parameter_bool_loop_constants;
|
|
}
|
|
if (bindless_resources_used_) {
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_DescriptorIndicesPixel))) {
|
|
deferred_command_list_.D3DSetGraphicsRootConstantBufferView(
|
|
kRootParameter_Bindless_DescriptorIndicesPixel,
|
|
cbuffer_binding_descriptor_indices_pixel_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindless_DescriptorIndicesPixel;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_DescriptorIndicesVertex))) {
|
|
deferred_command_list_.D3DSetGraphicsRootConstantBufferView(
|
|
kRootParameter_Bindless_DescriptorIndicesVertex,
|
|
cbuffer_binding_descriptor_indices_vertex_.address);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindless_DescriptorIndicesVertex;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_SamplerHeap))) {
|
|
deferred_command_list_.D3DSetGraphicsRootDescriptorTable(
|
|
kRootParameter_Bindless_SamplerHeap,
|
|
sampler_bindless_heap_gpu_start_);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindless_SamplerHeap;
|
|
}
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindless_ViewHeap))) {
|
|
deferred_command_list_.D3DSetGraphicsRootDescriptorTable(
|
|
kRootParameter_Bindless_ViewHeap, view_bindless_heap_gpu_start_);
|
|
current_graphics_root_up_to_date_ |= 1u
|
|
<< kRootParameter_Bindless_ViewHeap;
|
|
}
|
|
} else {
|
|
if (!(current_graphics_root_up_to_date_ &
|
|
(1u << kRootParameter_Bindful_SharedMemoryAndEdram))) {
|
|
deferred_command_list_.D3DSetGraphicsRootDescriptorTable(
|
|
kRootParameter_Bindful_SharedMemoryAndEdram,
|
|
gpu_handle_shared_memory_and_edram_);
|
|
current_graphics_root_up_to_date_ |=
|
|
1u << kRootParameter_Bindful_SharedMemoryAndEdram;
|
|
}
|
|
uint32_t extra_index;
|
|
extra_index = current_graphics_root_bindful_extras_.textures_pixel;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_.D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_textures_pixel_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
extra_index = current_graphics_root_bindful_extras_.samplers_pixel;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_.D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_samplers_pixel_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
extra_index = current_graphics_root_bindful_extras_.textures_vertex;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_.D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_textures_vertex_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
extra_index = current_graphics_root_bindful_extras_.samplers_vertex;
|
|
if (extra_index != RootBindfulExtraParameterIndices::kUnavailable &&
|
|
!(current_graphics_root_up_to_date_ & (1u << extra_index))) {
|
|
deferred_command_list_.D3DSetGraphicsRootDescriptorTable(
|
|
extra_index, gpu_handle_samplers_vertex_);
|
|
current_graphics_root_up_to_date_ |= 1u << extra_index;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize(
|
|
xenos::ColorFormat format) {
|
|
switch (format) {
|
|
case xenos::ColorFormat::k_8_8_8_8:
|
|
case xenos::ColorFormat::k_2_10_10_10:
|
|
// TODO(Triang3l): Investigate how k_8_8_8_8_A works - not supported in the
|
|
// texture cache currently.
|
|
// case xenos::ColorFormat::k_8_8_8_8_A:
|
|
case xenos::ColorFormat::k_10_11_11:
|
|
case xenos::ColorFormat::k_11_11_10:
|
|
case xenos::ColorFormat::k_16_16:
|
|
case xenos::ColorFormat::k_16_16_FLOAT:
|
|
case xenos::ColorFormat::k_32_FLOAT:
|
|
case xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16:
|
|
case xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16:
|
|
case xenos::ColorFormat::k_10_11_11_AS_16_16_16_16:
|
|
case xenos::ColorFormat::k_11_11_10_AS_16_16_16_16:
|
|
return 1;
|
|
case xenos::ColorFormat::k_16_16_16_16:
|
|
case xenos::ColorFormat::k_16_16_16_16_FLOAT:
|
|
case xenos::ColorFormat::k_32_32_FLOAT:
|
|
return 2;
|
|
case xenos::ColorFormat::k_32_32_32_32_FLOAT:
|
|
return 4;
|
|
default:
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) {
|
|
if (size == 0) {
|
|
return nullptr;
|
|
}
|
|
size = xe::align(size, kReadbackBufferSizeIncrement);
|
|
if (size > readback_buffer_size_) {
|
|
const ui::d3d12::D3D12Provider& provider = GetD3D12Provider();
|
|
ID3D12Device* device = provider.GetDevice();
|
|
D3D12_RESOURCE_DESC buffer_desc;
|
|
ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size,
|
|
D3D12_RESOURCE_FLAG_NONE);
|
|
ID3D12Resource* buffer;
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesReadback,
|
|
provider.GetHeapFlagCreateNotZeroed(), &buffer_desc,
|
|
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&buffer)))) {
|
|
XELOGE("Failed to create a {} MB readback buffer", size >> 20);
|
|
return nullptr;
|
|
}
|
|
if (readback_buffer_ != nullptr) {
|
|
readback_buffer_->Release();
|
|
}
|
|
readback_buffer_ = buffer;
|
|
}
|
|
return readback_buffer_;
|
|
}
|
|
|
|
void D3D12CommandProcessor::WriteGammaRampSRV(
|
|
bool is_pwl, D3D12_CPU_DESCRIPTOR_HANDLE handle) const {
|
|
ID3D12Device* device = GetD3D12Provider().GetDevice();
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
|
|
desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM;
|
|
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
|
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
desc.Buffer.StructureByteStride = 0;
|
|
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
|
|
if (is_pwl) {
|
|
desc.Format = DXGI_FORMAT_R16G16_UINT;
|
|
desc.Buffer.FirstElement = 256 * 4 / 4;
|
|
desc.Buffer.NumElements = 128 * 3;
|
|
} else {
|
|
desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM;
|
|
desc.Buffer.FirstElement = 0;
|
|
desc.Buffer.NumElements = 256;
|
|
}
|
|
device->CreateShaderResourceView(gamma_ramp_buffer_.Get(), &desc, handle);
|
|
}
|
|
|
|
} // namespace d3d12
|
|
} // namespace gpu
|
|
} // namespace xe
|