2191 lines
90 KiB
C++
2191 lines
90 KiB
C++
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
******************************************************************************
|
|
*/
|
|
|
|
#include "xenia/gpu/d3d12/render_target_cache.h"
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <cstring>
|
|
|
|
#include "xenia/base/assert.h"
|
|
#include "xenia/base/cvar.h"
|
|
#include "xenia/base/logging.h"
|
|
#include "xenia/base/math.h"
|
|
#include "xenia/base/memory.h"
|
|
#include "xenia/base/profiling.h"
|
|
#include "xenia/base/string.h"
|
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
|
#include "xenia/gpu/draw_util.h"
|
|
#include "xenia/gpu/gpu_flags.h"
|
|
#include "xenia/gpu/texture_info.h"
|
|
#include "xenia/gpu/texture_util.h"
|
|
#include "xenia/ui/d3d12/d3d12_util.h"
|
|
|
|
DEFINE_bool(d3d12_16bit_rtv_full_range, true,
|
|
"Use full -32...32 range for RG16 and RGBA16 render targets "
|
|
"(at the expense of blending correctness) without ROV.",
|
|
"D3D12");
|
|
|
|
namespace xe {
|
|
namespace gpu {
|
|
namespace d3d12 {
|
|
|
|
// Generated with `xb buildhlsl`.
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_32bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_64bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_color_7e3_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float24and32_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_float_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_load_depth_unorm_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_32bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_64bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float24and32_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_float_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/edram_store_depth_unorm_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_32bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_64bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_clear_depth_24_32_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_1x2xmsaa_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_32bpp_4xmsaa_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_1x2xmsaa_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_fast_64bpp_4xmsaa_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_128bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_16bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_32bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_64bpp_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_2xres_cs.h"
|
|
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_full_8bpp_cs.h"
|
|
|
|
const RenderTargetCache::EdramLoadStoreModeInfo
|
|
RenderTargetCache::edram_load_store_mode_info_[size_t(
|
|
RenderTargetCache::EdramLoadStoreMode::kCount)] = {
|
|
{edram_load_color_32bpp_cs, sizeof(edram_load_color_32bpp_cs),
|
|
L"EDRAM Load 32bpp Color", edram_store_color_32bpp_cs,
|
|
sizeof(edram_store_color_32bpp_cs), L"EDRAM Store 32bpp Color"},
|
|
{edram_load_color_64bpp_cs, sizeof(edram_load_color_64bpp_cs),
|
|
L"EDRAM Load 64bpp Color", edram_store_color_64bpp_cs,
|
|
sizeof(edram_store_color_64bpp_cs), L"EDRAM Store 64bpp Color"},
|
|
{edram_load_color_7e3_cs, sizeof(edram_load_color_7e3_cs),
|
|
L"EDRAM Load 7e3 Color", edram_store_color_7e3_cs,
|
|
sizeof(edram_store_color_7e3_cs), L"EDRAM Store 7e3 Color"},
|
|
{edram_load_depth_unorm_cs, sizeof(edram_load_depth_unorm_cs),
|
|
L"EDRAM Load UNorm Depth", edram_store_depth_unorm_cs,
|
|
sizeof(edram_store_depth_unorm_cs), L"EDRAM Store UNorm Depth"},
|
|
{edram_load_depth_float_cs, sizeof(edram_load_depth_float_cs),
|
|
L"EDRAM Load Float Depth", edram_store_depth_float_cs,
|
|
sizeof(edram_store_depth_float_cs), L"EDRAM Store Float Depth"},
|
|
{edram_load_depth_float24and32_cs,
|
|
sizeof(edram_load_depth_float24and32_cs),
|
|
L"EDRAM Load 24-bit & 32-bit Float Depth",
|
|
edram_store_depth_float24and32_cs,
|
|
sizeof(edram_store_depth_float24and32_cs),
|
|
L"EDRAM Store 24-bit & 32-bit Float Depth"},
|
|
};
|
|
|
|
const std::pair<const uint8_t*, size_t>
|
|
RenderTargetCache::resolve_copy_shaders_[size_t(
|
|
draw_util::ResolveCopyShaderIndex::kCount)] = {
|
|
{resolve_fast_32bpp_1x2xmsaa_cs,
|
|
sizeof(resolve_fast_32bpp_1x2xmsaa_cs)},
|
|
{resolve_fast_32bpp_4xmsaa_cs, sizeof(resolve_fast_32bpp_4xmsaa_cs)},
|
|
{resolve_fast_32bpp_2xres_cs, sizeof(resolve_fast_32bpp_2xres_cs)},
|
|
{resolve_fast_64bpp_1x2xmsaa_cs,
|
|
sizeof(resolve_fast_64bpp_1x2xmsaa_cs)},
|
|
{resolve_fast_64bpp_4xmsaa_cs, sizeof(resolve_fast_64bpp_4xmsaa_cs)},
|
|
{resolve_fast_64bpp_2xres_cs, sizeof(resolve_fast_64bpp_2xres_cs)},
|
|
{resolve_full_8bpp_cs, sizeof(resolve_full_8bpp_cs)},
|
|
{resolve_full_8bpp_2xres_cs, sizeof(resolve_full_8bpp_2xres_cs)},
|
|
{resolve_full_16bpp_cs, sizeof(resolve_full_16bpp_cs)},
|
|
{resolve_full_16bpp_2xres_cs, sizeof(resolve_full_16bpp_2xres_cs)},
|
|
{resolve_full_32bpp_cs, sizeof(resolve_full_32bpp_cs)},
|
|
{resolve_full_32bpp_2xres_cs, sizeof(resolve_full_32bpp_2xres_cs)},
|
|
{resolve_full_64bpp_cs, sizeof(resolve_full_64bpp_cs)},
|
|
{resolve_full_64bpp_2xres_cs, sizeof(resolve_full_64bpp_2xres_cs)},
|
|
{resolve_full_128bpp_cs, sizeof(resolve_full_128bpp_cs)},
|
|
{resolve_full_128bpp_2xres_cs, sizeof(resolve_full_128bpp_2xres_cs)},
|
|
};
|
|
|
|
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor& command_processor,
|
|
const RegisterFile& register_file,
|
|
TraceWriter& trace_writer,
|
|
bool bindless_resources_used,
|
|
bool edram_rov_used)
|
|
: command_processor_(command_processor),
|
|
register_file_(register_file),
|
|
trace_writer_(trace_writer),
|
|
bindless_resources_used_(bindless_resources_used),
|
|
edram_rov_used_(edram_rov_used) {}
|
|
|
|
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
|
|
|
|
bool RenderTargetCache::Initialize(const TextureCache& texture_cache) {
|
|
depth_float24_conversion_ = flags::GetDepthFloat24Conversion();
|
|
|
|
// EDRAM buffer size depends on this.
|
|
resolution_scale_2x_ = texture_cache.IsResolutionScale2X();
|
|
assert_false(resolution_scale_2x_ && !edram_rov_used_);
|
|
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
|
|
uint32_t edram_buffer_size = GetEdramBufferSize();
|
|
|
|
// Create the buffer for reinterpreting EDRAM contents.
|
|
D3D12_RESOURCE_DESC edram_buffer_desc;
|
|
ui::d3d12::util::FillBufferResourceDesc(
|
|
edram_buffer_desc, edram_buffer_size,
|
|
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
|
|
// The first operation will likely be drawing with ROV or a load without ROV.
|
|
edram_buffer_state_ = edram_rov_used_
|
|
? D3D12_RESOURCE_STATE_UNORDERED_ACCESS
|
|
: D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
// Request zeroed (though no guarantee) when not using ROV so the host 32-bit
|
|
// depth buffer will be initialized to deterministic values (because it's
|
|
// involved in comparison with converted 24-bit values - whether the 32-bit
|
|
// value is up to date is determined by whether it's equal to the 24-bit
|
|
// value in the main EDRAM buffer when converted to 24-bit).
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault,
|
|
edram_rov_used_ ? provider.GetHeapFlagCreateNotZeroed()
|
|
: D3D12_HEAP_FLAG_NONE,
|
|
&edram_buffer_desc, edram_buffer_state_, nullptr,
|
|
IID_PPV_ARGS(&edram_buffer_)))) {
|
|
XELOGE("Failed to create the EDRAM buffer");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
edram_buffer_modified_ = false;
|
|
|
|
// Create non-shader-visible descriptors of the EDRAM buffer for copying.
|
|
D3D12_DESCRIPTOR_HEAP_DESC edram_buffer_descriptor_heap_desc;
|
|
edram_buffer_descriptor_heap_desc.Type =
|
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
edram_buffer_descriptor_heap_desc.NumDescriptors =
|
|
uint32_t(EdramBufferDescriptorIndex::kCount);
|
|
edram_buffer_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
|
edram_buffer_descriptor_heap_desc.NodeMask = 0;
|
|
if (FAILED(device->CreateDescriptorHeap(
|
|
&edram_buffer_descriptor_heap_desc,
|
|
IID_PPV_ARGS(&edram_buffer_descriptor_heap_)))) {
|
|
XELOGE("Failed to create the descriptor heap for EDRAM buffer views");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
edram_buffer_descriptor_heap_start_ =
|
|
edram_buffer_descriptor_heap_->GetCPUDescriptorHandleForHeapStart();
|
|
ui::d3d12::util::CreateBufferRawSRV(
|
|
device,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kRawSRV)),
|
|
edram_buffer_, edram_buffer_size);
|
|
ui::d3d12::util::CreateBufferTypedSRV(
|
|
device,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kR32UintSRV)),
|
|
edram_buffer_, DXGI_FORMAT_R32_UINT, edram_buffer_size >> 2);
|
|
ui::d3d12::util::CreateBufferTypedSRV(
|
|
device,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kR32G32UintSRV)),
|
|
edram_buffer_, DXGI_FORMAT_R32G32_UINT, edram_buffer_size >> 3);
|
|
ui::d3d12::util::CreateBufferTypedSRV(
|
|
device,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kR32G32B32A32UintSRV)),
|
|
edram_buffer_, DXGI_FORMAT_R32G32B32A32_UINT, edram_buffer_size >> 4);
|
|
ui::d3d12::util::CreateBufferRawUAV(
|
|
device,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kRawUAV)),
|
|
edram_buffer_, edram_buffer_size);
|
|
ui::d3d12::util::CreateBufferTypedUAV(
|
|
device,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kR32UintUAV)),
|
|
edram_buffer_, DXGI_FORMAT_R32_UINT, edram_buffer_size >> 2);
|
|
ui::d3d12::util::CreateBufferTypedUAV(
|
|
device,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kR32G32B32A32UintUAV)),
|
|
edram_buffer_, DXGI_FORMAT_R32G32B32A32_UINT, edram_buffer_size >> 4);
|
|
|
|
if (!edram_rov_used_) {
|
|
// Create the root signature for EDRAM buffer load/store.
|
|
D3D12_ROOT_PARAMETER load_store_root_parameters[3];
|
|
// Parameter 0 is constants (changed for each render target binding).
|
|
load_store_root_parameters[0].ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
load_store_root_parameters[0].Constants.ShaderRegister = 0;
|
|
load_store_root_parameters[0].Constants.RegisterSpace = 0;
|
|
load_store_root_parameters[0].Constants.Num32BitValues =
|
|
sizeof(EdramLoadStoreRootConstants) / sizeof(uint32_t);
|
|
load_store_root_parameters[0].ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
// Parameter 1 is the destination.
|
|
D3D12_DESCRIPTOR_RANGE load_store_root_dest_range;
|
|
load_store_root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
load_store_root_dest_range.NumDescriptors = 1;
|
|
load_store_root_dest_range.BaseShaderRegister = 0;
|
|
load_store_root_dest_range.RegisterSpace = 0;
|
|
load_store_root_dest_range.OffsetInDescriptorsFromTableStart = 0;
|
|
load_store_root_parameters[1].ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
|
load_store_root_parameters[1].DescriptorTable.pDescriptorRanges =
|
|
&load_store_root_dest_range;
|
|
load_store_root_parameters[1].ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
// Parameter 2 is the source.
|
|
D3D12_DESCRIPTOR_RANGE load_store_root_source_range;
|
|
load_store_root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
load_store_root_source_range.NumDescriptors = 1;
|
|
load_store_root_source_range.BaseShaderRegister = 0;
|
|
load_store_root_source_range.RegisterSpace = 0;
|
|
load_store_root_source_range.OffsetInDescriptorsFromTableStart = 0;
|
|
load_store_root_parameters[2].ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
load_store_root_parameters[2].DescriptorTable.NumDescriptorRanges = 1;
|
|
load_store_root_parameters[2].DescriptorTable.pDescriptorRanges =
|
|
&load_store_root_source_range;
|
|
load_store_root_parameters[2].ShaderVisibility =
|
|
D3D12_SHADER_VISIBILITY_ALL;
|
|
D3D12_ROOT_SIGNATURE_DESC load_store_root_desc;
|
|
load_store_root_desc.NumParameters =
|
|
UINT(xe::countof(load_store_root_parameters));
|
|
load_store_root_desc.pParameters = load_store_root_parameters;
|
|
load_store_root_desc.NumStaticSamplers = 0;
|
|
load_store_root_desc.pStaticSamplers = nullptr;
|
|
load_store_root_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
|
|
edram_load_store_root_signature_ =
|
|
ui::d3d12::util::CreateRootSignature(provider, load_store_root_desc);
|
|
if (edram_load_store_root_signature_ == nullptr) {
|
|
XELOGE("Failed to create the EDRAM load/store root signature");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
|
|
// Create the EDRAM load/store pipelines.
|
|
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
|
|
const EdramLoadStoreModeInfo& mode_info = edram_load_store_mode_info_[i];
|
|
edram_load_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
|
device, mode_info.load_shader, mode_info.load_shader_size,
|
|
edram_load_store_root_signature_);
|
|
edram_store_pipelines_[i] = ui::d3d12::util::CreateComputePipeline(
|
|
device, mode_info.store_shader, mode_info.store_shader_size,
|
|
edram_load_store_root_signature_);
|
|
if (edram_load_pipelines_[i] == nullptr ||
|
|
edram_store_pipelines_[i] == nullptr) {
|
|
XELOGE("Failed to create the EDRAM load/store pipelines for mode {}",
|
|
i);
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
edram_load_pipelines_[i]->SetName(mode_info.load_pipeline_name);
|
|
edram_store_pipelines_[i]->SetName(mode_info.store_pipeline_name);
|
|
}
|
|
}
|
|
|
|
// Create the resolve root signatures and pipelines.
|
|
D3D12_ROOT_PARAMETER resolve_root_parameters[3];
|
|
|
|
// Copying root signature.
|
|
// Parameter 0 is constants.
|
|
resolve_root_parameters[0].ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
resolve_root_parameters[0].Constants.ShaderRegister = 0;
|
|
resolve_root_parameters[0].Constants.RegisterSpace = 0;
|
|
// Binding all of the shared memory at 1x resolution, portions with scaled
|
|
// resolution.
|
|
resolve_root_parameters[0].Constants.Num32BitValues =
|
|
(resolution_scale_2x_
|
|
? sizeof(draw_util::ResolveCopyShaderConstants::DestRelative)
|
|
: sizeof(draw_util::ResolveCopyShaderConstants)) /
|
|
sizeof(uint32_t);
|
|
resolve_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
// Parameter 1 is the destination (shared memory for copying, EDRAM for
|
|
// clearing).
|
|
D3D12_DESCRIPTOR_RANGE resolve_dest_range;
|
|
resolve_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
|
resolve_dest_range.NumDescriptors = 1;
|
|
resolve_dest_range.BaseShaderRegister = 0;
|
|
resolve_dest_range.RegisterSpace = 0;
|
|
resolve_dest_range.OffsetInDescriptorsFromTableStart = 0;
|
|
resolve_root_parameters[1].ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
resolve_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
|
resolve_root_parameters[1].DescriptorTable.pDescriptorRanges =
|
|
&resolve_dest_range;
|
|
resolve_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
// Parameter 2 is the source, if exists (EDRAM for copying).
|
|
D3D12_DESCRIPTOR_RANGE resolve_source_range;
|
|
resolve_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
|
resolve_source_range.NumDescriptors = 1;
|
|
resolve_source_range.BaseShaderRegister = 0;
|
|
resolve_source_range.RegisterSpace = 0;
|
|
resolve_source_range.OffsetInDescriptorsFromTableStart = 0;
|
|
resolve_root_parameters[2].ParameterType =
|
|
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
|
resolve_root_parameters[2].DescriptorTable.NumDescriptorRanges = 1;
|
|
resolve_root_parameters[2].DescriptorTable.pDescriptorRanges =
|
|
&resolve_source_range;
|
|
resolve_root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
D3D12_ROOT_SIGNATURE_DESC resolve_root_signature_desc;
|
|
resolve_root_signature_desc.NumParameters = 3;
|
|
resolve_root_signature_desc.pParameters = resolve_root_parameters;
|
|
resolve_root_signature_desc.NumStaticSamplers = 0;
|
|
resolve_root_signature_desc.pStaticSamplers = nullptr;
|
|
resolve_root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
|
|
resolve_copy_root_signature_ = ui::d3d12::util::CreateRootSignature(
|
|
provider, resolve_root_signature_desc);
|
|
if (resolve_copy_root_signature_ == nullptr) {
|
|
XELOGE("Failed to create the resolve copy root signature");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
|
|
// Clearing root signature.
|
|
resolve_root_parameters[0].Constants.Num32BitValues =
|
|
sizeof(draw_util::ResolveClearShaderConstants) / sizeof(uint32_t);
|
|
resolve_root_signature_desc.NumParameters = 2;
|
|
resolve_clear_root_signature_ = ui::d3d12::util::CreateRootSignature(
|
|
provider, resolve_root_signature_desc);
|
|
if (resolve_clear_root_signature_ == nullptr) {
|
|
XELOGE("Failed to create the resolve clear root signature");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
|
|
// Copying pipelines.
|
|
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
|
|
for (size_t i = 0; i < size_t(draw_util::ResolveCopyShaderIndex::kCount);
|
|
++i) {
|
|
// Somewhat verification whether resolve_copy_shaders_ is up to date.
|
|
assert_not_null(resolve_copy_shaders_[i].first);
|
|
const draw_util::ResolveCopyShaderInfo& resolve_copy_shader_info =
|
|
draw_util::resolve_copy_shader_info[i];
|
|
if (resolve_copy_shader_info.resolution_scale != resolution_scale) {
|
|
continue;
|
|
}
|
|
const auto& resolve_copy_shader = resolve_copy_shaders_[i];
|
|
ID3D12PipelineState* resolve_copy_pipeline =
|
|
ui::d3d12::util::CreateComputePipeline(
|
|
device, resolve_copy_shader.first, resolve_copy_shader.second,
|
|
resolve_copy_root_signature_);
|
|
if (resolve_copy_pipeline == nullptr) {
|
|
XELOGE("Failed to create {} resolve copy pipeline",
|
|
resolve_copy_shader_info.debug_name);
|
|
}
|
|
resolve_copy_pipeline->SetName(reinterpret_cast<LPCWSTR>(
|
|
xe::to_utf16(resolve_copy_shader_info.debug_name).c_str()));
|
|
resolve_copy_pipelines_[i] = resolve_copy_pipeline;
|
|
}
|
|
|
|
// Clearing pipelines.
|
|
resolve_clear_32bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
|
device,
|
|
resolution_scale_2x_ ? resolve_clear_32bpp_2xres_cs
|
|
: resolve_clear_32bpp_cs,
|
|
resolution_scale_2x_ ? sizeof(resolve_clear_32bpp_2xres_cs)
|
|
: sizeof(resolve_clear_32bpp_cs),
|
|
resolve_clear_root_signature_);
|
|
if (resolve_clear_32bpp_pipeline_ == nullptr) {
|
|
XELOGE("Failed to create the 32bpp resolve clear pipeline");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
resolve_clear_32bpp_pipeline_->SetName(L"Resolve Clear 32bpp");
|
|
resolve_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
|
device,
|
|
resolution_scale_2x_ ? resolve_clear_64bpp_2xres_cs
|
|
: resolve_clear_64bpp_cs,
|
|
resolution_scale_2x_ ? sizeof(resolve_clear_64bpp_2xres_cs)
|
|
: sizeof(resolve_clear_64bpp_cs),
|
|
resolve_clear_root_signature_);
|
|
if (resolve_clear_64bpp_pipeline_ == nullptr) {
|
|
XELOGE("Failed to create the 64bpp resolve clear pipeline");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
resolve_clear_64bpp_pipeline_->SetName(L"Resolve Clear 64bpp");
|
|
if (!edram_rov_used_ &&
|
|
depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) {
|
|
assert_false(resolution_scale_2x_);
|
|
resolve_clear_depth_24_32_pipeline_ =
|
|
ui::d3d12::util::CreateComputePipeline(
|
|
device, resolve_clear_depth_24_32_cs,
|
|
sizeof(resolve_clear_depth_24_32_cs),
|
|
resolve_clear_root_signature_);
|
|
if (resolve_clear_depth_24_32_pipeline_ == nullptr) {
|
|
XELOGE(
|
|
"Failed to create the 24-bit and 32-bit depth resolve clear pipeline "
|
|
"state");
|
|
Shutdown();
|
|
return false;
|
|
}
|
|
resolve_clear_depth_24_32_pipeline_->SetName(
|
|
L"Resolve Clear 24-bit & 32-bit Depth");
|
|
}
|
|
|
|
ClearBindings();
|
|
|
|
return true;
|
|
}
|
|
|
|
void RenderTargetCache::Shutdown() {
|
|
ClearCache();
|
|
|
|
edram_snapshot_restore_pool_.reset();
|
|
ui::d3d12::util::ReleaseAndNull(edram_snapshot_download_buffer_);
|
|
ui::d3d12::util::ReleaseAndNull(resolve_clear_depth_24_32_pipeline_);
|
|
ui::d3d12::util::ReleaseAndNull(resolve_clear_64bpp_pipeline_);
|
|
ui::d3d12::util::ReleaseAndNull(resolve_clear_32bpp_pipeline_);
|
|
ui::d3d12::util::ReleaseAndNull(resolve_clear_root_signature_);
|
|
for (size_t i = 0; i < xe::countof(resolve_copy_pipelines_); ++i) {
|
|
ui::d3d12::util::ReleaseAndNull(resolve_copy_pipelines_[i]);
|
|
}
|
|
ui::d3d12::util::ReleaseAndNull(resolve_copy_root_signature_);
|
|
for (uint32_t i = 0; i < uint32_t(EdramLoadStoreMode::kCount); ++i) {
|
|
ui::d3d12::util::ReleaseAndNull(edram_store_pipelines_[i]);
|
|
ui::d3d12::util::ReleaseAndNull(edram_load_pipelines_[i]);
|
|
}
|
|
ui::d3d12::util::ReleaseAndNull(edram_load_store_root_signature_);
|
|
ui::d3d12::util::ReleaseAndNull(edram_buffer_descriptor_heap_);
|
|
ui::d3d12::util::ReleaseAndNull(edram_buffer_);
|
|
}
|
|
|
|
void RenderTargetCache::ClearCache() {
|
|
for (auto render_target_pair : render_targets_) {
|
|
RenderTarget* render_target = render_target_pair.second;
|
|
render_target->resource->Release();
|
|
delete render_target;
|
|
}
|
|
render_targets_.clear();
|
|
COUNT_profile_set("gpu/render_target_cache/render_targets", 0);
|
|
|
|
while (descriptor_heaps_depth_ != nullptr) {
|
|
auto heap = descriptor_heaps_depth_;
|
|
heap->heap->Release();
|
|
descriptor_heaps_depth_ = heap->previous;
|
|
delete heap;
|
|
}
|
|
while (descriptor_heaps_color_ != nullptr) {
|
|
auto heap = descriptor_heaps_color_;
|
|
heap->heap->Release();
|
|
descriptor_heaps_color_ = heap->previous;
|
|
delete heap;
|
|
}
|
|
|
|
#if 0
|
|
for (uint32_t i = 0; i < xe::countof(heaps_); ++i) {
|
|
if (heaps_[i] != nullptr) {
|
|
heaps_[i]->Release();
|
|
heaps_[i] = nullptr;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
edram_snapshot_restore_pool_.reset();
|
|
}
|
|
|
|
void RenderTargetCache::CompletedSubmissionUpdated() {
|
|
if (edram_snapshot_restore_pool_) {
|
|
edram_snapshot_restore_pool_->Reclaim(
|
|
command_processor_.GetCompletedSubmission());
|
|
}
|
|
}
|
|
|
|
void RenderTargetCache::BeginSubmission() {
|
|
// With the ROV, a submission does not always end in a resolve (for example,
|
|
// when memexport readback happens) or something else that would surely submit
|
|
// the UAV barrier, so we need to preserve the `current_` variables.
|
|
//
|
|
// With RTVs, simply going to a different command list doesn't have to cause
|
|
// storing the render targets to the EDRAM buffer, however, the new command
|
|
// list doesn't have the needed RTVs/DSV bound yet.
|
|
//
|
|
// Just make sure they are bound to the new command list.
|
|
apply_to_command_list_ = true;
|
|
}
|
|
|
|
void RenderTargetCache::EndFrame() {
|
|
// May be clearing the cache after this.
|
|
FlushAndUnbindRenderTargets();
|
|
}
|
|
|
|
bool RenderTargetCache::UpdateRenderTargets(
|
|
uint32_t shader_writes_color_targets) {
|
|
// There are two kinds of render target binding updates in this implementation
|
|
// in case something has been changed - full and partial.
|
|
//
|
|
// For the RTV/DSV path, a full update involves flushing all the currently
|
|
// bound render targets that have been modified to the EDRAM buffer,
|
|
// allocating all the newly bound render targets in the heaps, loading them
|
|
// from the EDRAM buffer and binding them.
|
|
//
|
|
// For the ROV path, a full update places a UAV barrier because across draws,
|
|
// pixels with different SV_Positions or different sample counts (thus without
|
|
// interlocking between each other) may access the same data now. Not having
|
|
// the barriers causes visual glitches in many games, such as Halo 3 where the
|
|
// right side of the menu and shadow maps get corrupted (at least on Nvidia).
|
|
//
|
|
// ("Bound" here means ever used since the last full update - and in this case
|
|
// it's bound to the Direct3D 12 command list in the RTV/DSV path.)
|
|
//
|
|
// However, Banjo-Kazooie interleaves color/depth and depth-only writes every
|
|
// draw call, and doing a full update whenever the color mask is changed is
|
|
// too expensive. So, we shouldn't do a full update if the game only toggles
|
|
// color writes and depth testing. Instead, we're only adding or re-enabling
|
|
// render targets if color writes are being enabled (adding involves loading
|
|
// the contents from the EDRAM, while re-enabling does nothing on the D3D
|
|
// side).
|
|
//
|
|
// There are cases when simply toggling render targets may still require EDRAM
|
|
// stores and thus a full update. Here's an example situation:
|
|
// Draw 1:
|
|
// - 32bpp RT0 0-10 MB
|
|
// - 32bpp RT1 3-10 MB
|
|
// - 1280x720 viewport
|
|
// Draw 2:
|
|
// - 32bpp RT0 0-10 MB
|
|
// - Inactive RT1
|
|
// - 1280x1440 viewport
|
|
// Draw 3:
|
|
// - 32bpp RT0 0-10 MB
|
|
// - 32bpp RT1 3-10 MB
|
|
// - 1280x720 viewport
|
|
// In this case, before draw 2, RT1 must be written to the EDRAM buffer, and
|
|
// RT0 must be loaded, and also before draw 3 RT1 must receive the changes
|
|
// made to the lower part of RT0. So, before draws 2 and 3, full updates must
|
|
// be done.
|
|
//
|
|
// Direct3D 12 also requires all render targets to have the same size, so the
|
|
// height is calculated from the EDRAM space available to the last render
|
|
// target available in it. However, to make toggling render targets like in
|
|
// the Banjo-Kazooie case possible, the height may be decreased only in full
|
|
// updates.
|
|
// TODO(Triang3l): Check if it's safe to calculate the smallest EDRAM region
|
|
// without aliasing and use it for the height. This won't work if games
|
|
// actually alias active render targets for some reason.
|
|
//
|
|
// To summarize, a full update happens if:
|
|
// - Starting a new frame.
|
|
// - Drawing after resolving.
|
|
// - Surface pitch changed.
|
|
// - Sample count changed.
|
|
// - Render target is disabled and another render target got more space than
|
|
// is currently available in the textures (RTV/DSV only).
|
|
// - EDRAM base of a currently used RT changed.
|
|
// - Format of a currently used RT changed (RTV/DSV) or pixel size of a
|
|
// currently used RT changed (ROV).
|
|
// - Current viewport contains unsaved data from previously used render
|
|
// targets.
|
|
// - New render target overlaps unsaved data from other bound render targets.
|
|
//
|
|
// "Previously used" and "new" in the last 2 conditions is important so if the
|
|
// game has render targets aliased in the same draw call, there won't be a
|
|
// full update every draw.
|
|
//
|
|
// A partial update happens if:
|
|
// - New render target is added, but doesn't overlap unsaved data from other
|
|
// currently or previously used render targets, and it doesn't require a
|
|
// bigger size.
|
|
|
|
const auto& regs = register_file_;
|
|
|
|
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
|
uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u);
|
|
if (surface_pitch == 0) {
|
|
// TODO(Triang3l): Do something if a memexport-only draw has 0 surface
|
|
// pitch (never seen in any game so far, not sure if even legal).
|
|
return false;
|
|
}
|
|
uint32_t msaa_samples_x =
|
|
rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1;
|
|
uint32_t msaa_samples_y =
|
|
rb_surface_info.msaa_samples >= xenos::MsaaSamples::k2X ? 2 : 1;
|
|
|
|
// Extract color/depth info in a unified way.
|
|
bool enabled[5];
|
|
uint32_t edram_bases[5];
|
|
uint32_t formats[5];
|
|
bool formats_are_64bpp[5];
|
|
uint32_t color_mask =
|
|
command_processor_.GetCurrentColorMask(shader_writes_color_targets);
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
enabled[i] = (color_mask & (0xF << (i * 4))) != 0;
|
|
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
|
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
|
edram_bases[i] = std::min(color_info.color_base, 2048u);
|
|
formats[i] = uint32_t(GetBaseColorFormat(color_info.color_format));
|
|
formats_are_64bpp[i] = xenos::IsColorRenderTargetFormat64bpp(
|
|
xenos::ColorRenderTargetFormat(formats[i]));
|
|
}
|
|
auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs);
|
|
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
|
// 0x1 = stencil test, 0x2 = depth test.
|
|
enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable;
|
|
edram_bases[4] = std::min(rb_depth_info.depth_base, 2048u);
|
|
formats[4] = uint32_t(rb_depth_info.depth_format);
|
|
formats_are_64bpp[4] = false;
|
|
// Don't mark depth regions as dirty if not writing the depth.
|
|
// TODO(Triang3l): Make a common function for checking if stencil writing is
|
|
// really done?
|
|
bool depth_readonly =
|
|
!rb_depthcontrol.stencil_enable && !rb_depthcontrol.z_write_enable;
|
|
|
|
bool full_update = false;
|
|
|
|
// Check the following full update conditions:
|
|
// - Starting a new frame.
|
|
// - Drawing after resolving.
|
|
// - Surface pitch changed.
|
|
// - Sample count changed.
|
|
// Draws are skipped if the surface pitch is 0, so a full update can be forced
|
|
// in the beginning of the frame or after resolves by setting the current
|
|
// pitch to 0.
|
|
if (current_surface_pitch_ != surface_pitch ||
|
|
current_msaa_samples_ != rb_surface_info.msaa_samples) {
|
|
full_update = true;
|
|
}
|
|
|
|
// Get the maximum height of each render target in EDRAM rows to help
|
|
// clamp the dirty region heights.
|
|
uint32_t edram_row_tiles_32bpp = (surface_pitch * msaa_samples_x + 79) / 80;
|
|
uint32_t edram_row_tiles[5];
|
|
uint32_t edram_max_rows = UINT32_MAX;
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
edram_row_tiles[i] = edram_row_tiles_32bpp * (formats_are_64bpp[i] ? 2 : 1);
|
|
if (enabled[i]) {
|
|
// Direct3D 12 doesn't allow render targets with different sizes, so
|
|
// calculate the height from the render target closest to the end of
|
|
// EDRAM.
|
|
edram_max_rows = std::min(edram_max_rows,
|
|
(2048 - edram_bases[i]) / edram_row_tiles[i]);
|
|
}
|
|
}
|
|
if (edram_max_rows == UINT32_MAX) {
|
|
// No render targets needed - likely a memexport-only draw, just keep using
|
|
// the current state (or 0 if nothing bound yet, but nothing will be bound
|
|
// anyway so it won't matter).
|
|
edram_max_rows = current_edram_max_rows_;
|
|
} else {
|
|
if (edram_max_rows == 0) {
|
|
// Some render target is totally in the end of EDRAM - can't create
|
|
// textures with 0 height.
|
|
return false;
|
|
}
|
|
}
|
|
// Don't create render targets larger than x2560.
|
|
edram_max_rows = std::min(edram_max_rows, 160u * msaa_samples_y);
|
|
// Check the following full update conditions:
|
|
// - Render target is disabled and another render target got more space than
|
|
// is currently available in the textures (RTV/DSV only).
|
|
if (!edram_rov_used_ && edram_max_rows > current_edram_max_rows_) {
|
|
full_update = true;
|
|
}
|
|
|
|
// Get EDRAM usage of the current draw so dirty regions can be calculated.
|
|
// See D3D12CommandProcessor::UpdateFixedFunctionState for more info.
|
|
int32_t window_offset_y =
|
|
regs.Get<reg::PA_SC_WINDOW_OFFSET>().window_y_offset;
|
|
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
|
float viewport_scale_y = pa_cl_vte_cntl.vport_y_scale_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
|
: 1280.0f;
|
|
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
|
|
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
|
: std::abs(viewport_scale_y);
|
|
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
|
|
viewport_offset_y += float(window_offset_y);
|
|
}
|
|
uint32_t viewport_bottom = uint32_t(std::max(
|
|
0.0f, std::ceil(viewport_offset_y + std::abs(viewport_scale_y))));
|
|
uint32_t scissor_bottom = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>().br_y;
|
|
if (!regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>().window_offset_disable) {
|
|
scissor_bottom = std::max(int32_t(scissor_bottom) + window_offset_y, 0);
|
|
}
|
|
uint32_t dirty_bottom =
|
|
std::min(std::min(viewport_bottom, scissor_bottom), 2560u);
|
|
uint32_t edram_dirty_rows =
|
|
std::min((dirty_bottom * msaa_samples_y + 15) >> 4, edram_max_rows);
|
|
|
|
// Check the following full update conditions:
|
|
// - EDRAM base of a currently used RT changed.
|
|
// - Format of a currently used RT changed (RTV/DSV) or pixel size of a
|
|
// currently used RT changed (ROV).
|
|
// Also build a list of render targets to attach in a partial update.
|
|
uint32_t render_targets_to_attach = 0;
|
|
if (!full_update) {
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
if (!enabled[i]) {
|
|
continue;
|
|
}
|
|
const RenderTargetBinding& binding = current_bindings_[i];
|
|
if (binding.is_bound) {
|
|
if (binding.edram_base != edram_bases[i]) {
|
|
full_update = true;
|
|
break;
|
|
}
|
|
if (edram_rov_used_) {
|
|
if (i != 4) {
|
|
full_update |= xenos::IsColorRenderTargetFormat64bpp(
|
|
binding.color_format) != formats_are_64bpp[i];
|
|
}
|
|
} else {
|
|
full_update |= binding.format != formats[i];
|
|
}
|
|
if (full_update) {
|
|
break;
|
|
}
|
|
} else {
|
|
render_targets_to_attach |= 1 << i;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check the following full update conditions here:
|
|
// - Current viewport contains unsaved data from previously used render
|
|
// targets.
|
|
// - New render target overlaps unsaved data from other bound render
|
|
// targets.
|
|
if (!full_update) {
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
const RenderTargetBinding& binding_1 = current_bindings_[i];
|
|
uint32_t edram_dirty_rows_1;
|
|
if (binding_1.is_bound) {
|
|
if (enabled[i]) {
|
|
continue;
|
|
}
|
|
// Checking if now overlapping a previously used render target.
|
|
// binding_1 is the previously used render target.
|
|
edram_dirty_rows_1 = binding_1.edram_dirty_rows;
|
|
} else {
|
|
if (!(render_targets_to_attach & (1 << i))) {
|
|
continue;
|
|
}
|
|
// Checking if the new render target is overlapping any bound one.
|
|
// binding_1 is the new render target.
|
|
edram_dirty_rows_1 = edram_dirty_rows;
|
|
}
|
|
for (uint32_t j = 0; j < 5; ++j) {
|
|
const RenderTargetBinding& binding_2 = current_bindings_[j];
|
|
if (!binding_2.is_bound) {
|
|
continue;
|
|
}
|
|
uint32_t edram_dirty_rows_2;
|
|
if (binding_1.is_bound) {
|
|
if (!enabled[j]) {
|
|
continue;
|
|
}
|
|
// Checking if now overlapping a previously used render target.
|
|
// binding_2 is a currently used render target.
|
|
edram_dirty_rows_2 = edram_dirty_rows;
|
|
} else {
|
|
// Checking if the new render target is overlapping any bound one.
|
|
// binding_2 is another bound render target.
|
|
edram_dirty_rows_2 = binding_2.edram_dirty_rows;
|
|
}
|
|
// Do a full update if there is overlap.
|
|
if (edram_bases[i] <
|
|
edram_bases[j] + edram_dirty_rows_2 * edram_row_tiles[j] &&
|
|
edram_bases[j] <
|
|
edram_bases[i] + edram_dirty_rows_1 * edram_row_tiles[i]) {
|
|
full_update = true;
|
|
break;
|
|
}
|
|
}
|
|
if (full_update) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool sample_positions_set = false;
|
|
|
|
// Need to change the bindings.
|
|
if (full_update || render_targets_to_attach) {
|
|
#if 0
|
|
uint32_t heap_usage[5] = {};
|
|
#endif
|
|
if (full_update) {
|
|
if (edram_rov_used_) {
|
|
// Place a UAV barrier because across draws, pixels with different
|
|
// SV_Positions or different sample counts (thus without interlocking
|
|
// between each other) may access the same data now.
|
|
CommitEdramBufferUAVWrites(false);
|
|
} else {
|
|
// Export the currently bound render targets before we ruin the
|
|
// bindings.
|
|
StoreRenderTargetsToEdram();
|
|
}
|
|
|
|
ClearBindings();
|
|
current_surface_pitch_ = surface_pitch;
|
|
current_msaa_samples_ = rb_surface_info.msaa_samples;
|
|
if (!edram_rov_used_) {
|
|
current_edram_max_rows_ = edram_max_rows;
|
|
}
|
|
|
|
// If updating fully, need to reattach all the render targets and allocate
|
|
// from scratch.
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
if (enabled[i]) {
|
|
render_targets_to_attach |= 1 << i;
|
|
}
|
|
}
|
|
} else {
|
|
#if 0
|
|
if (!edram_rov_used_) {
|
|
// If updating partially, only need to attach new render targets.
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
const RenderTargetBinding& binding = current_bindings_[i];
|
|
if (!binding.is_bound) {
|
|
continue;
|
|
}
|
|
const RenderTarget* render_target = binding.render_target;
|
|
if (render_target != nullptr) {
|
|
// There are no holes between 4 MB pages in each heap.
|
|
heap_usage[render_target->heap_page_first / kHeap4MBPages] +=
|
|
render_target->heap_page_count;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
XELOGGPU("RT Cache: {} update - pitch {}, samples {}, RTs to attach {}",
|
|
full_update ? "Full" : "Partial", surface_pitch,
|
|
rb_surface_info.msaa_samples, render_targets_to_attach);
|
|
|
|
#if 0
|
|
auto device =
|
|
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
|
|
#endif
|
|
|
|
// Allocate new render targets and add them to the bindings list.
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
if (!(render_targets_to_attach & (1 << i))) {
|
|
continue;
|
|
}
|
|
RenderTargetBinding& binding = current_bindings_[i];
|
|
binding.is_bound = true;
|
|
binding.edram_base = edram_bases[i];
|
|
binding.edram_dirty_rows = 0;
|
|
binding.format = formats[i];
|
|
binding.render_target = nullptr;
|
|
|
|
if (!edram_rov_used_) {
|
|
RenderTargetKey key;
|
|
key.width_ss_div_80 = edram_row_tiles_32bpp;
|
|
key.height_ss_div_16 = current_edram_max_rows_;
|
|
key.is_depth = i == 4 ? 1 : 0;
|
|
key.format = formats[i];
|
|
|
|
D3D12_RESOURCE_DESC resource_desc;
|
|
if (!GetResourceDesc(key, resource_desc)) {
|
|
// Invalid format.
|
|
continue;
|
|
}
|
|
|
|
#if 0
|
|
// Calculate the number of 4 MB pages of the heaps this RT will use.
|
|
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
|
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
|
if (allocation_info.SizeInBytes == 0 ||
|
|
allocation_info.SizeInBytes > (kHeap4MBPages << 22)) {
|
|
assert_always();
|
|
continue;
|
|
}
|
|
uint32_t heap_page_count =
|
|
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
|
|
|
|
// Find the heap page range for this render target.
|
|
uint32_t heap_page_first = UINT32_MAX;
|
|
for (uint32_t j = 0; j < 5; ++j) {
|
|
if (heap_usage[j] + heap_page_count <= kHeap4MBPages) {
|
|
heap_page_first = j * kHeap4MBPages + heap_usage[j];
|
|
break;
|
|
}
|
|
}
|
|
if (heap_page_first == UINT32_MAX) {
|
|
assert_always();
|
|
continue;
|
|
}
|
|
|
|
// Get the render target.
|
|
binding.render_target = FindOrCreateRenderTarget(key, heap_page_first);
|
|
if (binding.render_target == nullptr) {
|
|
continue;
|
|
}
|
|
heap_usage[heap_page_first / kHeap4MBPages] += heap_page_count;
|
|
|
|
// Inform Direct3D that we're reusing the heap for this render target.
|
|
command_processor_.PushAliasingBarrier(nullptr,
|
|
binding.render_target->resource);
|
|
#else
|
|
// If multiple render targets have the same format, assign different
|
|
// instance numbers to them.
|
|
uint32_t instance = 0;
|
|
if (i != 4) {
|
|
for (uint32_t j = 0; j < i; ++j) {
|
|
const RenderTargetBinding& other_binding = current_bindings_[j];
|
|
if (other_binding.is_bound &&
|
|
other_binding.render_target != nullptr &&
|
|
other_binding.format == formats[i]) {
|
|
++instance;
|
|
}
|
|
}
|
|
}
|
|
binding.render_target = FindOrCreateRenderTarget(key, instance);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
if (!edram_rov_used_) {
|
|
// Sample positions when loading depth must match sample positions when
|
|
// drawing.
|
|
command_processor_.SetSamplePositions(current_msaa_samples_);
|
|
sample_positions_set = true;
|
|
|
|
// Load the contents of the new render targets from the EDRAM buffer (will
|
|
// change the state of the render targets to copy destination).
|
|
RenderTarget* load_render_targets[5];
|
|
uint32_t load_edram_bases[5];
|
|
uint32_t load_render_target_count = 0;
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
if (!(render_targets_to_attach & (1 << i))) {
|
|
continue;
|
|
}
|
|
RenderTarget* render_target = current_bindings_[i].render_target;
|
|
if (render_target == nullptr) {
|
|
continue;
|
|
}
|
|
load_render_targets[load_render_target_count] = render_target;
|
|
load_edram_bases[load_render_target_count] = edram_bases[i];
|
|
++load_render_target_count;
|
|
}
|
|
if (load_render_target_count != 0) {
|
|
LoadRenderTargetsFromEdram(load_render_target_count,
|
|
load_render_targets, load_edram_bases);
|
|
}
|
|
|
|
// Transition the render targets to the appropriate state if needed,
|
|
// compress the list of the render target because null RTV descriptors are
|
|
// broken in Direct3D 12 and update the list of the render targets to bind
|
|
// to the command list.
|
|
uint32_t rtv_count = 0;
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
const RenderTargetBinding& binding = current_bindings_[i];
|
|
if (!binding.is_bound) {
|
|
continue;
|
|
}
|
|
RenderTarget* render_target = binding.render_target;
|
|
if (render_target == nullptr) {
|
|
continue;
|
|
}
|
|
XELOGGPU("RT Color {}: base {}, format {}", i, edram_bases[i],
|
|
formats[i]);
|
|
command_processor_.PushTransitionBarrier(
|
|
render_target->resource, render_target->state,
|
|
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
current_pipeline_render_targets_[rtv_count].guest_render_target = i;
|
|
current_pipeline_render_targets_[rtv_count].format =
|
|
GetColorDXGIFormat(xenos::ColorRenderTargetFormat(formats[i]));
|
|
++rtv_count;
|
|
}
|
|
for (uint32_t i = rtv_count; i < 4; ++i) {
|
|
current_pipeline_render_targets_[i].guest_render_target = i;
|
|
current_pipeline_render_targets_[i].format = DXGI_FORMAT_UNKNOWN;
|
|
}
|
|
const RenderTargetBinding& depth_binding = current_bindings_[4];
|
|
RenderTarget* depth_render_target = depth_binding.render_target;
|
|
current_pipeline_render_targets_[4].guest_render_target = 4;
|
|
if (depth_binding.is_bound && depth_render_target != nullptr) {
|
|
XELOGGPU("RT Depth: base {}, format {}", edram_bases[4], formats[4]);
|
|
command_processor_.PushTransitionBarrier(
|
|
depth_render_target->resource, depth_render_target->state,
|
|
D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
|
depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
|
current_pipeline_render_targets_[4].format =
|
|
GetDepthDXGIFormat(xenos::DepthRenderTargetFormat(formats[4]));
|
|
} else {
|
|
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
|
|
}
|
|
command_processor_.SubmitBarriers();
|
|
apply_to_command_list_ = true;
|
|
}
|
|
}
|
|
|
|
// Bind the render targets to the command list, either in case of an update or
|
|
// if asked to externally.
|
|
if (!edram_rov_used_ && apply_to_command_list_) {
|
|
apply_to_command_list_ = false;
|
|
if (!sample_positions_set) {
|
|
command_processor_.SetSamplePositions(current_msaa_samples_);
|
|
}
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
|
|
uint32_t rtv_count;
|
|
for (rtv_count = 0; rtv_count < 4; ++rtv_count) {
|
|
const PipelineRenderTarget& pipeline_render_target =
|
|
current_pipeline_render_targets_[rtv_count];
|
|
if (pipeline_render_target.format == DXGI_FORMAT_UNKNOWN) {
|
|
break;
|
|
}
|
|
const RenderTargetBinding& binding =
|
|
current_bindings_[pipeline_render_target.guest_render_target];
|
|
rtv_handles[rtv_count] = binding.render_target->handle;
|
|
}
|
|
const RenderTargetBinding& depth_binding = current_bindings_[4];
|
|
const D3D12_CPU_DESCRIPTOR_HANDLE* dsv_handle =
|
|
current_pipeline_render_targets_[4].format != DXGI_FORMAT_UNKNOWN
|
|
? &depth_binding.render_target->handle
|
|
: nullptr;
|
|
command_processor_.GetDeferredCommandList().D3DOMSetRenderTargets(
|
|
rtv_count, rtv_handles, FALSE, dsv_handle);
|
|
}
|
|
|
|
// Update the dirty regions.
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
if (!enabled[i] || (i == 4 && depth_readonly)) {
|
|
continue;
|
|
}
|
|
RenderTargetBinding& binding = current_bindings_[i];
|
|
if (!edram_rov_used_ && binding.render_target == nullptr) {
|
|
// Nothing to store to the EDRAM buffer if there was an error.
|
|
continue;
|
|
}
|
|
binding.edram_dirty_rows =
|
|
std::max(binding.edram_dirty_rows, edram_dirty_rows);
|
|
}
|
|
|
|
if (edram_rov_used_) {
|
|
// The buffer will be used for ROV drawing now.
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
edram_buffer_modified_ = true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool RenderTargetCache::Resolve(const Memory& memory,
|
|
D3D12SharedMemory& shared_memory,
|
|
TextureCache& texture_cache,
|
|
uint32_t& written_address_out,
|
|
uint32_t& written_length_out) {
|
|
written_address_out = 0;
|
|
written_length_out = 0;
|
|
|
|
uint32_t resolution_scale = resolution_scale_2x_ ? 2 : 1;
|
|
draw_util::ResolveInfo resolve_info;
|
|
if (!draw_util::GetResolveInfo(
|
|
register_file_, memory, trace_writer_, resolution_scale,
|
|
!edram_rov_used_ && !cvars::d3d12_16bit_rtv_full_range,
|
|
resolve_info)) {
|
|
return false;
|
|
}
|
|
|
|
// Nothing to copy/clear.
|
|
if (!resolve_info.address.width_div_8 || !resolve_info.address.height_div_8) {
|
|
return true;
|
|
}
|
|
|
|
if (!edram_rov_used_) {
|
|
// Save the currently bound render targets to the EDRAM buffer that will be
|
|
// used as the resolve source and clear bindings to allow render target
|
|
// resources to be reused as source textures for format conversion,
|
|
// resolving samples, to let format conversion bind other render targets,
|
|
// and so after a clear new data will be loaded.
|
|
StoreRenderTargetsToEdram();
|
|
ClearBindings();
|
|
}
|
|
|
|
auto& command_list = command_processor_.GetDeferredCommandList();
|
|
|
|
// Copying.
|
|
bool copied = false;
|
|
if (resolve_info.copy_dest_length) {
|
|
draw_util::ResolveCopyShaderConstants copy_shader_constants;
|
|
uint32_t copy_group_count_x, copy_group_count_y;
|
|
draw_util::ResolveCopyShaderIndex copy_shader =
|
|
resolve_info.GetCopyShader(resolution_scale, copy_shader_constants,
|
|
copy_group_count_x, copy_group_count_y);
|
|
assert_true(copy_group_count_x && copy_group_count_y);
|
|
if (copy_shader != draw_util::ResolveCopyShaderIndex::kUnknown) {
|
|
const draw_util::ResolveCopyShaderInfo& copy_shader_info =
|
|
draw_util::resolve_copy_shader_info[size_t(copy_shader)];
|
|
|
|
// Make sure we have the memory to write to.
|
|
bool copy_dest_resident;
|
|
if (resolution_scale_2x_) {
|
|
copy_dest_resident = texture_cache.EnsureScaledResolveBufferResident(
|
|
resolve_info.copy_dest_base, resolve_info.copy_dest_length);
|
|
} else {
|
|
copy_dest_resident = shared_memory.RequestRange(
|
|
resolve_info.copy_dest_base, resolve_info.copy_dest_length);
|
|
}
|
|
if (copy_dest_resident) {
|
|
// Write the descriptors and transition the resources.
|
|
// Full shared memory without resolution scaling, range of the scaled
|
|
// resolve buffer with scaling because only 128 R32 elements can be
|
|
// addressed on Nvidia.
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest;
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source;
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
|
if (command_processor_.RequestOneUseSingleViewDescriptors(
|
|
bindless_resources_used_ ? (resolution_scale_2x_ ? 1 : 0) : 2,
|
|
descriptors)) {
|
|
if (bindless_resources_used_) {
|
|
if (resolution_scale_2x_) {
|
|
descriptor_dest = descriptors[0];
|
|
} else {
|
|
descriptor_dest =
|
|
command_processor_
|
|
.GetSharedMemoryUintPow2BindlessUAVHandlePair(
|
|
copy_shader_info.dest_bpe_log2);
|
|
}
|
|
if (copy_shader_info.source_is_raw) {
|
|
descriptor_source =
|
|
command_processor_.GetSystemBindlessViewHandlePair(
|
|
D3D12CommandProcessor::SystemBindlessView::kEdramRawSRV);
|
|
} else {
|
|
descriptor_source =
|
|
command_processor_.GetEdramUintPow2BindlessSRVHandlePair(
|
|
copy_shader_info.source_bpe_log2);
|
|
}
|
|
} else {
|
|
descriptor_dest = descriptors[0];
|
|
if (!resolution_scale_2x_) {
|
|
shared_memory.WriteUintPow2UAVDescriptor(
|
|
descriptor_dest.first, copy_shader_info.dest_bpe_log2);
|
|
}
|
|
descriptor_source = descriptors[1];
|
|
if (copy_shader_info.source_is_raw) {
|
|
WriteEdramRawSRVDescriptor(descriptor_source.first);
|
|
} else {
|
|
WriteEdramUintPow2SRVDescriptor(descriptor_source.first,
|
|
copy_shader_info.source_bpe_log2);
|
|
}
|
|
}
|
|
if (resolution_scale_2x_) {
|
|
texture_cache.CreateScaledResolveBufferUintPow2UAV(
|
|
descriptor_dest.first, resolve_info.copy_dest_base,
|
|
resolve_info.copy_dest_length, copy_shader_info.dest_bpe_log2);
|
|
texture_cache.UseScaledResolveBufferForWriting();
|
|
} else {
|
|
shared_memory.UseForWriting();
|
|
}
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
|
|
|
// Submit the resolve.
|
|
command_list.D3DSetComputeRootSignature(resolve_copy_root_signature_);
|
|
command_list.D3DSetComputeRootDescriptorTable(
|
|
2, descriptor_source.second);
|
|
command_list.D3DSetComputeRootDescriptorTable(1,
|
|
descriptor_dest.second);
|
|
if (resolution_scale_2x_) {
|
|
command_list.D3DSetComputeRoot32BitConstants(
|
|
0,
|
|
sizeof(copy_shader_constants.dest_relative) / sizeof(uint32_t),
|
|
©_shader_constants.dest_relative, 0);
|
|
} else {
|
|
command_list.D3DSetComputeRoot32BitConstants(
|
|
0, sizeof(copy_shader_constants) / sizeof(uint32_t),
|
|
©_shader_constants, 0);
|
|
}
|
|
command_processor_.SetComputePipeline(
|
|
resolve_copy_pipelines_[size_t(copy_shader)]);
|
|
command_processor_.SubmitBarriers();
|
|
command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1);
|
|
|
|
// Order the resolve with other work using the destination as a UAV.
|
|
if (resolution_scale_2x_) {
|
|
texture_cache.MarkScaledResolveBufferUAVWritesCommitNeeded();
|
|
} else {
|
|
shared_memory.MarkUAVWritesCommitNeeded();
|
|
}
|
|
|
|
// Invalidate textures and mark the range as scaled if needed.
|
|
texture_cache.MarkRangeAsResolved(resolve_info.copy_dest_base,
|
|
resolve_info.copy_dest_length);
|
|
written_address_out = resolve_info.copy_dest_base;
|
|
written_length_out = resolve_info.copy_dest_length;
|
|
copied = true;
|
|
}
|
|
} else {
|
|
XELOGE("Failed to obtain the resolve destination memory region");
|
|
}
|
|
}
|
|
} else {
|
|
copied = true;
|
|
}
|
|
|
|
// Clearing.
|
|
bool cleared = false;
|
|
bool clear_depth = resolve_info.IsClearingDepth();
|
|
bool clear_color = resolve_info.IsClearingColor();
|
|
if (clear_depth || clear_color) {
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
|
|
bool descriptor_edram_obtained;
|
|
if (bindless_resources_used_) {
|
|
descriptor_edram = command_processor_.GetSystemBindlessViewHandlePair(
|
|
D3D12CommandProcessor::SystemBindlessView::kEdramR32G32B32A32UintUAV);
|
|
descriptor_edram_obtained = true;
|
|
} else {
|
|
descriptor_edram_obtained =
|
|
command_processor_.RequestOneUseSingleViewDescriptors(
|
|
1, &descriptor_edram);
|
|
if (descriptor_edram_obtained) {
|
|
WriteEdramUintPow2UAVDescriptor(descriptor_edram.first, 4);
|
|
}
|
|
}
|
|
if (descriptor_edram_obtained) {
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
// Should be safe to only commit once (if was UAV previously - if nothing
|
|
// to copy for some reason, for instance), overlap of the depth and the
|
|
// color ranges is highly unlikely.
|
|
CommitEdramBufferUAVWrites(false);
|
|
command_list.D3DSetComputeRootSignature(resolve_clear_root_signature_);
|
|
command_list.D3DSetComputeRootDescriptorTable(1, descriptor_edram.second);
|
|
std::pair<uint32_t, uint32_t> clear_group_count =
|
|
resolve_info.GetClearShaderGroupCount();
|
|
assert_true(clear_group_count.first && clear_group_count.second);
|
|
if (clear_depth) {
|
|
// Also clear the host 32-bit floating-point depth used for loaing and
|
|
// storing 24-bit floating-point depth at full precision.
|
|
bool clear_float32_depth = !edram_rov_used_ &&
|
|
depth_float24_conversion_ ==
|
|
flags::DepthFloat24Conversion::kOnCopy &&
|
|
xenos::DepthRenderTargetFormat(
|
|
resolve_info.depth_edram_info.format) ==
|
|
xenos::DepthRenderTargetFormat::kD24FS8;
|
|
draw_util::ResolveClearShaderConstants depth_clear_constants;
|
|
resolve_info.GetDepthClearShaderConstants(clear_float32_depth,
|
|
depth_clear_constants);
|
|
command_list.D3DSetComputeRoot32BitConstants(
|
|
0, sizeof(depth_clear_constants) / sizeof(uint32_t),
|
|
&depth_clear_constants, 0);
|
|
command_processor_.SetComputePipeline(
|
|
clear_float32_depth ? resolve_clear_depth_24_32_pipeline_
|
|
: resolve_clear_32bpp_pipeline_);
|
|
command_processor_.SubmitBarriers();
|
|
command_list.D3DDispatch(clear_group_count.first,
|
|
clear_group_count.second, 1);
|
|
}
|
|
if (clear_color) {
|
|
draw_util::ResolveClearShaderConstants color_clear_constants;
|
|
resolve_info.GetColorClearShaderConstants(color_clear_constants);
|
|
if (clear_depth) {
|
|
// Non-RT-specific constants have already been set.
|
|
command_list.D3DSetComputeRoot32BitConstants(
|
|
0, sizeof(color_clear_constants.rt_specific) / sizeof(uint32_t),
|
|
&color_clear_constants.rt_specific,
|
|
offsetof(draw_util::ResolveClearShaderConstants, rt_specific) /
|
|
sizeof(uint32_t));
|
|
} else {
|
|
command_list.D3DSetComputeRoot32BitConstants(
|
|
0, sizeof(color_clear_constants) / sizeof(uint32_t),
|
|
&color_clear_constants, 0);
|
|
}
|
|
command_processor_.SetComputePipeline(
|
|
resolve_info.color_edram_info.format_is_64bpp
|
|
? resolve_clear_64bpp_pipeline_
|
|
: resolve_clear_32bpp_pipeline_);
|
|
command_processor_.SubmitBarriers();
|
|
command_list.D3DDispatch(clear_group_count.first,
|
|
clear_group_count.second, 1);
|
|
}
|
|
assert_true(edram_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
edram_buffer_modified_ = true;
|
|
cleared = true;
|
|
}
|
|
} else {
|
|
cleared = true;
|
|
}
|
|
|
|
return copied && cleared;
|
|
}
|
|
|
|
void RenderTargetCache::FlushAndUnbindRenderTargets() {
|
|
if (edram_rov_used_) {
|
|
return;
|
|
}
|
|
StoreRenderTargetsToEdram();
|
|
ClearBindings();
|
|
}
|
|
|
|
void RenderTargetCache::WriteEdramRawSRVDescriptor(
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
device->CopyDescriptorsSimple(
|
|
1, handle,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kRawSRV)),
|
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
}
|
|
|
|
void RenderTargetCache::WriteEdramRawUAVDescriptor(
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
device->CopyDescriptorsSimple(
|
|
1, handle,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kRawUAV)),
|
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
}
|
|
|
|
void RenderTargetCache::WriteEdramUintPow2SRVDescriptor(
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2) {
|
|
EdramBufferDescriptorIndex descriptor_index;
|
|
switch (element_size_bytes_pow2) {
|
|
case 2:
|
|
descriptor_index = EdramBufferDescriptorIndex::kR32UintSRV;
|
|
break;
|
|
case 3:
|
|
descriptor_index = EdramBufferDescriptorIndex::kR32G32UintSRV;
|
|
break;
|
|
case 4:
|
|
descriptor_index = EdramBufferDescriptorIndex::kR32G32B32A32UintSRV;
|
|
break;
|
|
default:
|
|
assert_unhandled_case(element_size_bytes_pow2);
|
|
return;
|
|
}
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
device->CopyDescriptorsSimple(
|
|
1, handle,
|
|
provider.OffsetViewDescriptor(edram_buffer_descriptor_heap_start_,
|
|
uint32_t(descriptor_index)),
|
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
}
|
|
|
|
void RenderTargetCache::WriteEdramUintPow2UAVDescriptor(
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle, uint32_t element_size_bytes_pow2) {
|
|
EdramBufferDescriptorIndex descriptor_index;
|
|
switch (element_size_bytes_pow2) {
|
|
case 2:
|
|
descriptor_index = EdramBufferDescriptorIndex::kR32UintUAV;
|
|
break;
|
|
// 3 is not needed currently.
|
|
case 4:
|
|
descriptor_index = EdramBufferDescriptorIndex::kR32G32B32A32UintUAV;
|
|
break;
|
|
default:
|
|
assert_unhandled_case(element_size_bytes_pow2);
|
|
return;
|
|
}
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
device->CopyDescriptorsSimple(
|
|
1, handle,
|
|
provider.OffsetViewDescriptor(edram_buffer_descriptor_heap_start_,
|
|
uint32_t(descriptor_index)),
|
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
}
|
|
|
|
xenos::ColorRenderTargetFormat RenderTargetCache::GetBaseColorFormat(
|
|
xenos::ColorRenderTargetFormat format) {
|
|
switch (format) {
|
|
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
|
return xenos::ColorRenderTargetFormat::k_8_8_8_8;
|
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
|
|
return xenos::ColorRenderTargetFormat::k_2_10_10_10;
|
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
|
return xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT;
|
|
default:
|
|
return format;
|
|
}
|
|
}
|
|
|
|
DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat(
|
|
xenos::ColorRenderTargetFormat format) {
|
|
switch (format) {
|
|
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
|
|
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
|
return DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
|
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
|
|
return DXGI_FORMAT_R10G10B10A2_UNORM;
|
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
|
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
|
return DXGI_FORMAT_R16G16B16A16_FLOAT;
|
|
case xenos::ColorRenderTargetFormat::k_16_16:
|
|
return DXGI_FORMAT_R16G16_SNORM;
|
|
case xenos::ColorRenderTargetFormat::k_16_16_16_16:
|
|
return DXGI_FORMAT_R16G16B16A16_SNORM;
|
|
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
|
|
return DXGI_FORMAT_R16G16_FLOAT;
|
|
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
|
|
return DXGI_FORMAT_R32_FLOAT;
|
|
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT:
|
|
return DXGI_FORMAT_R32G32_FLOAT;
|
|
default:
|
|
break;
|
|
}
|
|
return DXGI_FORMAT_UNKNOWN;
|
|
}
|
|
|
|
bool RenderTargetCache::InitializeTraceSubmitDownloads() {
|
|
if (resolution_scale_2x_) {
|
|
// No 1:1 mapping.
|
|
return false;
|
|
}
|
|
if (!edram_snapshot_download_buffer_) {
|
|
D3D12_RESOURCE_DESC edram_snapshot_download_buffer_desc;
|
|
ui::d3d12::util::FillBufferResourceDesc(edram_snapshot_download_buffer_desc,
|
|
xenos::kEdramSizeBytes,
|
|
D3D12_RESOURCE_FLAG_NONE);
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesReadback,
|
|
provider.GetHeapFlagCreateNotZeroed(),
|
|
&edram_snapshot_download_buffer_desc,
|
|
D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
|
|
IID_PPV_ARGS(&edram_snapshot_download_buffer_)))) {
|
|
XELOGE("Failed to create a EDRAM snapshot download buffer");
|
|
return false;
|
|
}
|
|
}
|
|
auto& command_list = command_processor_.GetDeferredCommandList();
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
command_processor_.SubmitBarriers();
|
|
command_list.D3DCopyBufferRegion(edram_snapshot_download_buffer_, 0,
|
|
edram_buffer_, 0, xenos::kEdramSizeBytes);
|
|
return true;
|
|
}
|
|
|
|
void RenderTargetCache::InitializeTraceCompleteDownloads() {
|
|
if (!edram_snapshot_download_buffer_) {
|
|
return;
|
|
}
|
|
void* download_mapping;
|
|
if (SUCCEEDED(edram_snapshot_download_buffer_->Map(0, nullptr,
|
|
&download_mapping))) {
|
|
trace_writer_.WriteEdramSnapshot(download_mapping);
|
|
D3D12_RANGE download_write_range = {};
|
|
edram_snapshot_download_buffer_->Unmap(0, &download_write_range);
|
|
} else {
|
|
XELOGE("Failed to map the EDRAM snapshot download buffer");
|
|
}
|
|
edram_snapshot_download_buffer_->Release();
|
|
edram_snapshot_download_buffer_ = nullptr;
|
|
}
|
|
|
|
void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) {
|
|
if (resolution_scale_2x_) {
|
|
// No 1:1 mapping.
|
|
return;
|
|
}
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
if (!edram_snapshot_restore_pool_) {
|
|
edram_snapshot_restore_pool_ =
|
|
std::make_unique<ui::d3d12::D3D12UploadBufferPool>(
|
|
provider, xenos::kEdramSizeBytes);
|
|
}
|
|
ID3D12Resource* upload_buffer;
|
|
size_t upload_buffer_offset;
|
|
void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request(
|
|
command_processor_.GetCurrentSubmission(), xenos::kEdramSizeBytes, 1,
|
|
&upload_buffer, &upload_buffer_offset, nullptr);
|
|
if (!upload_buffer_mapping) {
|
|
XELOGE("Failed to get a buffer for restoring a EDRAM snapshot");
|
|
return;
|
|
}
|
|
std::memcpy(upload_buffer_mapping, snapshot, xenos::kEdramSizeBytes);
|
|
auto& command_list = command_processor_.GetDeferredCommandList();
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_COPY_DEST);
|
|
command_processor_.SubmitBarriers();
|
|
command_list.D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer,
|
|
UINT64(upload_buffer_offset),
|
|
xenos::kEdramSizeBytes);
|
|
if (!edram_rov_used_) {
|
|
// Clear and ignore the old 32-bit float depth - the non-ROV path is
|
|
// inaccurate anyway, and this is backend-specific, not a part of a guest
|
|
// trace.
|
|
bool edram_shader_visible_r32_uav_obtained;
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair edram_shader_visible_r32_uav;
|
|
if (bindless_resources_used_) {
|
|
edram_shader_visible_r32_uav_obtained = true;
|
|
edram_shader_visible_r32_uav =
|
|
command_processor_.GetSystemBindlessViewHandlePair(
|
|
D3D12CommandProcessor::SystemBindlessView::kEdramR32UintUAV);
|
|
} else {
|
|
edram_shader_visible_r32_uav_obtained =
|
|
command_processor_.RequestOneUseSingleViewDescriptors(
|
|
1, &edram_shader_visible_r32_uav);
|
|
if (edram_shader_visible_r32_uav_obtained) {
|
|
WriteEdramUintPow2UAVDescriptor(edram_shader_visible_r32_uav.first, 2);
|
|
}
|
|
}
|
|
if (edram_shader_visible_r32_uav_obtained) {
|
|
UINT clear_value[4] = {0, 0, 0, 0};
|
|
D3D12_RECT clear_rect;
|
|
clear_rect.left = xenos::kEdramSizeBytes >> 2;
|
|
clear_rect.top = 0;
|
|
clear_rect.right = (xenos::kEdramSizeBytes >> 2) << 1;
|
|
clear_rect.bottom = 1;
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
command_processor_.SubmitBarriers();
|
|
// ClearUnorderedAccessView takes a shader-visible GPU descriptor and a
|
|
// non-shader-visible CPU descriptor.
|
|
command_list.D3DClearUnorderedAccessViewUint(
|
|
edram_shader_visible_r32_uav.second,
|
|
provider.OffsetViewDescriptor(
|
|
edram_buffer_descriptor_heap_start_,
|
|
uint32_t(EdramBufferDescriptorIndex::kR32UintUAV)),
|
|
edram_buffer_, clear_value, 1, &clear_rect);
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32_t RenderTargetCache::GetEdramBufferSize() const {
|
|
uint32_t size = xenos::kEdramSizeBytes;
|
|
if (!edram_rov_used_ &&
|
|
depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy) {
|
|
// Two 10 MB pages, one containing color and integer depth data, another
|
|
// with 32-bit float depth when 20e4 depth is used to allow for multipass
|
|
// drawing without precision loss in case of EDRAM store/load.
|
|
size *= 2;
|
|
}
|
|
if (resolution_scale_2x_) {
|
|
size *= 4;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
void RenderTargetCache::TransitionEdramBuffer(D3D12_RESOURCE_STATES new_state) {
|
|
command_processor_.PushTransitionBarrier(edram_buffer_, edram_buffer_state_,
|
|
new_state);
|
|
edram_buffer_state_ = new_state;
|
|
if (new_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
|
edram_buffer_modified_ = false;
|
|
}
|
|
}
|
|
|
|
void RenderTargetCache::CommitEdramBufferUAVWrites(bool force) {
|
|
if ((edram_buffer_modified_ || force) &&
|
|
edram_buffer_state_ == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
|
command_processor_.PushUAVBarrier(edram_buffer_);
|
|
}
|
|
edram_buffer_modified_ = false;
|
|
}
|
|
|
|
void RenderTargetCache::ClearBindings() {
|
|
current_surface_pitch_ = 0;
|
|
current_msaa_samples_ = xenos::MsaaSamples::k1X;
|
|
current_edram_max_rows_ = 0;
|
|
std::memset(current_bindings_, 0, sizeof(current_bindings_));
|
|
apply_to_command_list_ = true;
|
|
}
|
|
|
|
#if 0
|
|
bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) {
|
|
if (heap_index >= 5) {
|
|
assert_always();
|
|
return false;
|
|
}
|
|
if (heaps_[heap_index] != nullptr) {
|
|
return true;
|
|
}
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
D3D12_HEAP_DESC heap_desc = {};
|
|
heap_desc.SizeInBytes = kHeap4MBPages << 22;
|
|
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
|
|
heap_desc.Alignment = 0;
|
|
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES |
|
|
provider.GetHeapFlagCreateNotZeroed();
|
|
if (FAILED(
|
|
device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[heap_index])))) {
|
|
XELOGE("Failed to create a {} MB heap for render targets",
|
|
kHeap4MBPages * 4);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
bool RenderTargetCache::EnsureRTVHeapAvailable(bool is_depth) {
|
|
auto& heap = is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
|
|
if (heap != nullptr &&
|
|
heap->descriptors_used < kRenderTargetDescriptorHeapSize) {
|
|
return true;
|
|
}
|
|
auto device =
|
|
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
|
|
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
|
|
heap_desc.Type = is_depth ? D3D12_DESCRIPTOR_HEAP_TYPE_DSV
|
|
: D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
|
|
heap_desc.NumDescriptors = kRenderTargetDescriptorHeapSize;
|
|
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
|
heap_desc.NodeMask = 0;
|
|
ID3D12DescriptorHeap* new_d3d_heap;
|
|
if (FAILED(device->CreateDescriptorHeap(&heap_desc,
|
|
IID_PPV_ARGS(&new_d3d_heap)))) {
|
|
XELOGE("Failed to create a heap for {} {} buffer descriptors",
|
|
kRenderTargetDescriptorHeapSize, is_depth ? "depth" : "color");
|
|
return false;
|
|
}
|
|
RenderTargetDescriptorHeap* new_heap = new RenderTargetDescriptorHeap;
|
|
new_heap->heap = new_d3d_heap;
|
|
new_heap->start_handle = new_d3d_heap->GetCPUDescriptorHandleForHeapStart();
|
|
new_heap->descriptors_used = 0;
|
|
new_heap->previous = heap;
|
|
heap = new_heap;
|
|
return true;
|
|
}
|
|
|
|
bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
|
|
D3D12_RESOURCE_DESC& desc) {
|
|
if (key.width_ss_div_80 == 0 || key.height_ss_div_16 == 0) {
|
|
return false;
|
|
}
|
|
DXGI_FORMAT dxgi_format =
|
|
key.is_depth
|
|
? GetDepthDXGIFormat(xenos::DepthRenderTargetFormat(key.format))
|
|
: GetColorDXGIFormat(xenos::ColorRenderTargetFormat(key.format));
|
|
if (dxgi_format == DXGI_FORMAT_UNKNOWN) {
|
|
return false;
|
|
}
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
// TODO(Triang3l): If real MSAA is added, alignment must be 4 MB.
|
|
desc.Alignment = 0;
|
|
desc.Width = key.width_ss_div_80 * 80;
|
|
desc.Height = key.height_ss_div_16 * 16;
|
|
desc.DepthOrArraySize = 1;
|
|
desc.MipLevels = 1;
|
|
desc.Format = dxgi_format;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
desc.Flags = key.is_depth ? D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL
|
|
: D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
|
return true;
|
|
}
|
|
|
|
RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|
#if 0
|
|
RenderTargetKey key, uint32_t heap_page_first
|
|
#else
|
|
RenderTargetKey key, uint32_t instance
|
|
#endif
|
|
) {
|
|
#if 0
|
|
assert_true(heap_page_first < kHeap4MBPages * 5);
|
|
#endif
|
|
|
|
// Try to find an existing render target.
|
|
auto found_range = render_targets_.equal_range(key.value);
|
|
for (auto iter = found_range.first; iter != found_range.second; ++iter) {
|
|
RenderTarget* found_render_target = iter->second;
|
|
#if 0
|
|
if (found_render_target->heap_page_first == heap_page_first) {
|
|
return found_render_target;
|
|
}
|
|
#else
|
|
if (found_render_target->instance == instance) {
|
|
return found_render_target;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
D3D12_RESOURCE_DESC resource_desc;
|
|
if (!GetResourceDesc(key, resource_desc)) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider();
|
|
auto device = provider.GetDevice();
|
|
|
|
#if 0
|
|
// Get the number of heap pages needed for the render target.
|
|
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
|
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
|
uint32_t heap_page_count =
|
|
(uint32_t(allocation_info.SizeInBytes) + ((4 << 20) - 1)) >> 22;
|
|
if (heap_page_count == 0 ||
|
|
(heap_page_first % kHeap4MBPages) + heap_page_count > kHeap4MBPages) {
|
|
assert_always();
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
|
|
// Ensure we can create a new descriptor in the render target heap.
|
|
if (!EnsureRTVHeapAvailable(key.is_depth)) {
|
|
return nullptr;
|
|
}
|
|
|
|
#if 0
|
|
// Create the memory heap if it doesn't exist yet.
|
|
uint32_t heap_index = heap_page_first / kHeap4MBPages;
|
|
if (!MakeHeapResident(heap_index)) {
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
|
|
// The first action likely to be done is EDRAM buffer load.
|
|
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
ID3D12Resource* resource;
|
|
#if 0
|
|
if (FAILED(device->CreatePlacedResource(
|
|
heaps_[heap_index], (heap_page_first % kHeap4MBPages) << 22,
|
|
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
|
|
XELOGE(
|
|
"Failed to create a placed resource for {}x{} {} render target with "
|
|
"format {} at heap 4 MB pages {}:{}",
|
|
uint32_t(resource_desc.Width), resource_desc.Height,
|
|
key.is_depth ? "depth" : "color", key.format, heap_page_first,
|
|
heap_page_first + heap_page_count - 1);
|
|
return nullptr;
|
|
}
|
|
#else
|
|
if (FAILED(device->CreateCommittedResource(
|
|
&ui::d3d12::util::kHeapPropertiesDefault,
|
|
provider.GetHeapFlagCreateNotZeroed(), &resource_desc, state, nullptr,
|
|
IID_PPV_ARGS(&resource)))) {
|
|
XELOGE(
|
|
"Failed to create a committed resource for {}x{} {} render target with "
|
|
"format {}",
|
|
uint32_t(resource_desc.Width), resource_desc.Height,
|
|
key.is_depth ? "depth" : "color", key.format);
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
|
|
// Create the descriptor for the render target.
|
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle;
|
|
if (key.is_depth) {
|
|
descriptor_handle =
|
|
provider.OffsetDSVDescriptor(descriptor_heaps_depth_->start_handle,
|
|
descriptor_heaps_depth_->descriptors_used);
|
|
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
|
|
dsv_desc.Format = resource_desc.Format;
|
|
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
|
|
dsv_desc.Flags = D3D12_DSV_FLAG_NONE;
|
|
dsv_desc.Texture2D.MipSlice = 0;
|
|
device->CreateDepthStencilView(resource, &dsv_desc, descriptor_handle);
|
|
++descriptor_heaps_depth_->descriptors_used;
|
|
} else {
|
|
descriptor_handle =
|
|
provider.OffsetRTVDescriptor(descriptor_heaps_color_->start_handle,
|
|
descriptor_heaps_color_->descriptors_used);
|
|
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
|
|
rtv_desc.Format = resource_desc.Format;
|
|
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
|
|
rtv_desc.Texture2D.MipSlice = 0;
|
|
rtv_desc.Texture2D.PlaneSlice = 0;
|
|
device->CreateRenderTargetView(resource, &rtv_desc, descriptor_handle);
|
|
++descriptor_heaps_color_->descriptors_used;
|
|
}
|
|
|
|
// Get the layout for copying to the EDRAM buffer.
|
|
RenderTarget* render_target = new RenderTarget;
|
|
render_target->resource = resource;
|
|
render_target->state = state;
|
|
render_target->handle = descriptor_handle;
|
|
render_target->key = key;
|
|
#if 0
|
|
render_target->heap_page_first = heap_page_first;
|
|
render_target->heap_page_count = heap_page_count;
|
|
#else
|
|
render_target->instance = instance;
|
|
#endif
|
|
UINT64 copy_buffer_size;
|
|
device->GetCopyableFootprints(&resource_desc, 0, key.is_depth ? 2 : 1, 0,
|
|
render_target->footprints, nullptr, nullptr,
|
|
©_buffer_size);
|
|
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
|
|
render_targets_.emplace(key.value, render_target);
|
|
COUNT_profile_set("gpu/render_target_cache/render_targets",
|
|
render_targets_.size());
|
|
#if 0
|
|
XELOGGPU(
|
|
"Created {}x{} {} render target with format {} at heap 4 MB pages {}:{}",
|
|
uint32_t(resource_desc.Width), resource_desc.Height,
|
|
key.is_depth ? "depth" : "color", key.format, heap_page_first,
|
|
heap_page_first + heap_page_count - 1);
|
|
#else
|
|
XELOGGPU("Created {}x{} {} render target with format {}",
|
|
uint32_t(resource_desc.Width), resource_desc.Height,
|
|
key.is_depth ? "depth" : "color", key.format);
|
|
#endif
|
|
return render_target;
|
|
}
|
|
|
|
RenderTargetCache::EdramLoadStoreMode RenderTargetCache::GetLoadStoreMode(
|
|
bool is_depth, uint32_t format) const {
|
|
if (is_depth) {
|
|
if (xenos::DepthRenderTargetFormat(format) ==
|
|
xenos::DepthRenderTargetFormat::kD24FS8) {
|
|
return depth_float24_conversion_ == flags::DepthFloat24Conversion::kOnCopy
|
|
? EdramLoadStoreMode::kDepthFloat24And32
|
|
: EdramLoadStoreMode::kDepthFloat;
|
|
}
|
|
return EdramLoadStoreMode::kDepthUnorm;
|
|
}
|
|
xenos::ColorRenderTargetFormat color_format =
|
|
xenos::ColorRenderTargetFormat(format);
|
|
if (color_format == xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT ||
|
|
color_format ==
|
|
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16) {
|
|
return EdramLoadStoreMode::kColor7e3;
|
|
}
|
|
return xenos::IsColorRenderTargetFormat64bpp(color_format)
|
|
? EdramLoadStoreMode::kColor64bpp
|
|
: EdramLoadStoreMode::kColor32bpp;
|
|
}
|
|
|
|
void RenderTargetCache::StoreRenderTargetsToEdram() {
|
|
if (edram_rov_used_) {
|
|
return;
|
|
}
|
|
|
|
auto& command_list = command_processor_.GetDeferredCommandList();
|
|
|
|
// Extract only the render targets that need to be stored, transition them to
|
|
// copy sources and calculate copy buffer size.
|
|
uint32_t store_bindings[5];
|
|
uint32_t store_binding_count = 0;
|
|
uint32_t copy_buffer_size = 0;
|
|
for (uint32_t i = 0; i < 5; ++i) {
|
|
const RenderTargetBinding& binding = current_bindings_[i];
|
|
RenderTarget* render_target = binding.render_target;
|
|
if (!binding.is_bound || render_target == nullptr ||
|
|
binding.edram_dirty_rows < 0) {
|
|
continue;
|
|
}
|
|
store_bindings[store_binding_count++] = i;
|
|
copy_buffer_size =
|
|
std::max(copy_buffer_size, render_target->copy_buffer_size);
|
|
}
|
|
if (store_binding_count == 0) {
|
|
return;
|
|
}
|
|
|
|
// Allocate descriptors for the buffers.
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source;
|
|
if (bindless_resources_used_) {
|
|
if (!command_processor_.RequestOneUseSingleViewDescriptors(
|
|
1, &descriptor_source)) {
|
|
return;
|
|
}
|
|
descriptor_edram = command_processor_.GetSystemBindlessViewHandlePair(
|
|
D3D12CommandProcessor::SystemBindlessView::kEdramRawUAV);
|
|
} else {
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
|
if (!command_processor_.RequestOneUseSingleViewDescriptors(2,
|
|
descriptors)) {
|
|
return;
|
|
}
|
|
descriptor_edram = descriptors[0];
|
|
WriteEdramRawUAVDescriptor(descriptor_edram.first);
|
|
descriptor_source = descriptors[1];
|
|
}
|
|
|
|
// Get the buffer for copying.
|
|
D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
ID3D12Resource* copy_buffer = command_processor_.RequestScratchGPUBuffer(
|
|
copy_buffer_size, copy_buffer_state);
|
|
if (copy_buffer == nullptr) {
|
|
return;
|
|
}
|
|
|
|
// Transition the render targets that need to be stored to copy sources and
|
|
// the EDRAM buffer to a UAV.
|
|
for (uint32_t i = 0; i < store_binding_count; ++i) {
|
|
RenderTarget* render_target =
|
|
current_bindings_[store_bindings[i]].render_target;
|
|
command_processor_.PushTransitionBarrier(render_target->resource,
|
|
render_target->state,
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
}
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
|
|
// Set up the bindings.
|
|
auto device =
|
|
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
|
|
command_list.D3DSetComputeRootSignature(edram_load_store_root_signature_);
|
|
ui::d3d12::util::CreateBufferRawSRV(device, descriptor_source.first,
|
|
copy_buffer, copy_buffer_size);
|
|
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_source.second);
|
|
command_list.D3DSetComputeRootDescriptorTable(1, descriptor_edram.second);
|
|
|
|
// Sort the bindings in ascending order of EDRAM base so data in the render
|
|
// targets placed farther in EDRAM isn't lost in case of overlap.
|
|
std::sort(store_bindings, store_bindings + store_binding_count,
|
|
[this](uint32_t a, uint32_t b) {
|
|
uint32_t base_a = current_bindings_[a].edram_base;
|
|
uint32_t base_b = current_bindings_[b].edram_base;
|
|
if (base_a == base_b) {
|
|
// If EDRAM bases are the same (not really a valid usage, but
|
|
// happens in Banjo-Tooie - in case color writing was enabled
|
|
// for invalid render targets in some draw call), treat the
|
|
// render targets with the lowest index as more important (it's
|
|
// the primary one after all, while the rest are additional).
|
|
// Depth buffer has lower priority, otherwise the Xbox Live
|
|
// Arcade logo disappears.
|
|
return a > b;
|
|
}
|
|
return base_a < base_b;
|
|
});
|
|
|
|
// Calculate the dispatch width.
|
|
uint32_t surface_pitch_ss =
|
|
current_surface_pitch_ *
|
|
(current_msaa_samples_ >= xenos::MsaaSamples::k4X ? 2 : 1);
|
|
uint32_t surface_pitch_tiles = (surface_pitch_ss + 79) / 80;
|
|
assert_true(surface_pitch_tiles != 0);
|
|
|
|
// Store each render target.
|
|
for (uint32_t i = 0; i < store_binding_count; ++i) {
|
|
const RenderTargetBinding& binding = current_bindings_[store_bindings[i]];
|
|
const RenderTarget* render_target = binding.render_target;
|
|
bool is_64bpp = false;
|
|
|
|
// Transition the copy buffer to copy destination.
|
|
command_processor_.PushTransitionBarrier(copy_buffer, copy_buffer_state,
|
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
|
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
command_processor_.SubmitBarriers();
|
|
|
|
// Copy from the render target planes and set up the layout.
|
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
|
location_source.pResource = render_target->resource;
|
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
location_source.SubresourceIndex = 0;
|
|
location_dest.pResource = copy_buffer;
|
|
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
location_dest.PlacedFootprint = render_target->footprints[0];
|
|
// TODO(Triang3l): Box for color render targets.
|
|
command_list.CopyTexture(location_dest, location_source);
|
|
EdramLoadStoreRootConstants root_constants;
|
|
uint32_t rt_pitch_tiles = surface_pitch_tiles;
|
|
if (!render_target->key.is_depth &&
|
|
xenos::IsColorRenderTargetFormat64bpp(
|
|
xenos::ColorRenderTargetFormat(render_target->key.format))) {
|
|
rt_pitch_tiles *= 2;
|
|
}
|
|
// TODO(Triang3l): log2(sample count, resolution scale).
|
|
root_constants.base_samples_2x_depth_pitch =
|
|
binding.edram_base | (rt_pitch_tiles << 16);
|
|
root_constants.rt_color_depth_offset =
|
|
uint32_t(location_dest.PlacedFootprint.Offset);
|
|
root_constants.rt_color_depth_pitch =
|
|
location_dest.PlacedFootprint.Footprint.RowPitch;
|
|
if (render_target->key.is_depth) {
|
|
root_constants.base_samples_2x_depth_pitch |= 1 << 15;
|
|
location_source.SubresourceIndex = 1;
|
|
location_dest.PlacedFootprint = render_target->footprints[1];
|
|
command_list.CopyTexture(location_dest, location_source);
|
|
root_constants.rt_stencil_offset =
|
|
uint32_t(location_dest.PlacedFootprint.Offset);
|
|
root_constants.rt_stencil_pitch =
|
|
location_dest.PlacedFootprint.Footprint.RowPitch;
|
|
}
|
|
|
|
// Transition the copy buffer to SRV.
|
|
command_processor_.PushTransitionBarrier(
|
|
copy_buffer, copy_buffer_state,
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
|
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
command_processor_.SubmitBarriers();
|
|
|
|
// Store the data.
|
|
command_list.D3DSetComputeRoot32BitConstants(
|
|
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
|
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
|
render_target->key.format);
|
|
command_processor_.SetComputePipeline(edram_store_pipelines_[size_t(mode)]);
|
|
// 1 group per 80x16 samples.
|
|
command_list.D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
|
|
|
|
// Commit the UAV write.
|
|
CommitEdramBufferUAVWrites(true);
|
|
}
|
|
|
|
command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
|
}
|
|
|
|
void RenderTargetCache::LoadRenderTargetsFromEdram(
|
|
uint32_t render_target_count, RenderTarget* const* render_targets,
|
|
const uint32_t* edram_bases) {
|
|
assert_true(render_target_count <= 5);
|
|
if (render_target_count == 0 || render_target_count > 5) {
|
|
return;
|
|
}
|
|
|
|
auto& command_list = command_processor_.GetDeferredCommandList();
|
|
|
|
// Allocate descriptors for the buffers.
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest, descriptor_edram;
|
|
if (bindless_resources_used_) {
|
|
if (!command_processor_.RequestOneUseSingleViewDescriptors(
|
|
1, &descriptor_dest)) {
|
|
return;
|
|
}
|
|
descriptor_edram = command_processor_.GetSystemBindlessViewHandlePair(
|
|
D3D12CommandProcessor::SystemBindlessView::kEdramRawSRV);
|
|
} else {
|
|
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
|
if (!command_processor_.RequestOneUseSingleViewDescriptors(2,
|
|
descriptors)) {
|
|
return;
|
|
}
|
|
descriptor_dest = descriptors[0];
|
|
descriptor_edram = descriptors[1];
|
|
WriteEdramRawSRVDescriptor(descriptor_edram.first);
|
|
}
|
|
|
|
// Get the buffer for copying.
|
|
uint32_t copy_buffer_size = 0;
|
|
for (uint32_t i = 0; i < render_target_count; ++i) {
|
|
copy_buffer_size =
|
|
std::max(copy_buffer_size, render_targets[i]->copy_buffer_size);
|
|
}
|
|
D3D12_RESOURCE_STATES copy_buffer_state =
|
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
ID3D12Resource* copy_buffer = command_processor_.RequestScratchGPUBuffer(
|
|
copy_buffer_size, copy_buffer_state);
|
|
if (copy_buffer == nullptr) {
|
|
return;
|
|
}
|
|
|
|
// Transition the render targets to copy destinations and the EDRAM buffer to
|
|
// a SRV.
|
|
for (uint32_t i = 0; i < render_target_count; ++i) {
|
|
RenderTarget* render_target = render_targets[i];
|
|
command_processor_.PushTransitionBarrier(render_target->resource,
|
|
render_target->state,
|
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
|
render_target->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
}
|
|
TransitionEdramBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
|
|
|
// Set up the bindings.
|
|
auto device =
|
|
command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice();
|
|
command_list.D3DSetComputeRootSignature(edram_load_store_root_signature_);
|
|
command_list.D3DSetComputeRootDescriptorTable(2, descriptor_edram.second);
|
|
ui::d3d12::util::CreateBufferRawUAV(device, descriptor_dest.first,
|
|
copy_buffer, copy_buffer_size);
|
|
command_list.D3DSetComputeRootDescriptorTable(1, descriptor_dest.second);
|
|
|
|
// Load each render target.
|
|
for (uint32_t i = 0; i < render_target_count; ++i) {
|
|
if (edram_bases[i] >= 2048) {
|
|
// Something is wrong with the load.
|
|
continue;
|
|
}
|
|
const RenderTarget* render_target = render_targets[i];
|
|
|
|
// Get the number of EDRAM tiles per row.
|
|
uint32_t edram_pitch_tiles = render_target->key.width_ss_div_80;
|
|
if (!render_target->key.is_depth &&
|
|
xenos::IsColorRenderTargetFormat64bpp(
|
|
xenos::ColorRenderTargetFormat(render_target->key.format))) {
|
|
edram_pitch_tiles *= 2;
|
|
}
|
|
// Clamp the height if somehow requested a render target that is too large.
|
|
uint32_t edram_rows =
|
|
std::min(render_target->key.height_ss_div_16,
|
|
(2048u - edram_bases[i]) / edram_pitch_tiles);
|
|
if (edram_rows == 0) {
|
|
continue;
|
|
}
|
|
|
|
// Transition the copy buffer back to UAV if it's not the first load.
|
|
command_processor_.PushTransitionBarrier(
|
|
copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
|
|
// Load the data.
|
|
command_processor_.SubmitBarriers();
|
|
EdramLoadStoreRootConstants root_constants;
|
|
// TODO(Triang3l): log2(sample count, resolution scale).
|
|
root_constants.base_samples_2x_depth_pitch =
|
|
edram_bases[i] | (edram_pitch_tiles << 16);
|
|
root_constants.rt_color_depth_offset =
|
|
uint32_t(render_target->footprints[0].Offset);
|
|
root_constants.rt_color_depth_pitch =
|
|
render_target->footprints[0].Footprint.RowPitch;
|
|
if (render_target->key.is_depth) {
|
|
root_constants.base_samples_2x_depth_pitch |= 1 << 15;
|
|
root_constants.rt_stencil_offset =
|
|
uint32_t(render_target->footprints[1].Offset);
|
|
root_constants.rt_stencil_pitch =
|
|
render_target->footprints[1].Footprint.RowPitch;
|
|
}
|
|
command_list.D3DSetComputeRoot32BitConstants(
|
|
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
|
EdramLoadStoreMode mode = GetLoadStoreMode(render_target->key.is_depth,
|
|
render_target->key.format);
|
|
command_processor_.SetComputePipeline(edram_load_pipelines_[size_t(mode)]);
|
|
// 1 group per 80x16 samples.
|
|
command_list.D3DDispatch(render_target->key.width_ss_div_80, edram_rows, 1);
|
|
|
|
// Commit the UAV write and transition the copy buffer to copy source now.
|
|
command_processor_.PushUAVBarrier(copy_buffer);
|
|
command_processor_.PushTransitionBarrier(copy_buffer, copy_buffer_state,
|
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
|
|
// Copy to the render target planes.
|
|
command_processor_.SubmitBarriers();
|
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
|
location_source.pResource = copy_buffer;
|
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
location_source.PlacedFootprint = render_target->footprints[0];
|
|
location_dest.pResource = render_target->resource;
|
|
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
location_dest.SubresourceIndex = 0;
|
|
command_list.CopyTexture(location_dest, location_source);
|
|
if (render_target->key.is_depth) {
|
|
location_source.PlacedFootprint = render_target->footprints[1];
|
|
location_dest.SubresourceIndex = 1;
|
|
command_list.CopyTexture(location_dest, location_source);
|
|
}
|
|
}
|
|
|
|
command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
|
}
|
|
|
|
} // namespace d3d12
|
|
} // namespace gpu
|
|
} // namespace xe
|